2127 lines
65 KiB
Elixir
2127 lines
65 KiB
Elixir
defmodule NimbleParsec do
|
|
@moduledoc "README.md"
|
|
|> File.read!()
|
|
|> String.split("<!-- MDOC !-->")
|
|
|> Enum.fetch!(1)
|
|
|
|
defmacrop is_combinator(combinator) do
|
|
quote do
|
|
is_list(unquote(combinator))
|
|
end
|
|
end
|
|
|
|
@doc """
|
|
Defines a parser (and a combinator) with the given `name` and `opts`.
|
|
|
|
The parser is a function that receives two arguments, the binary
|
|
to be parsed and a set of options. You can consult the documentation
|
|
of the generated parser function for more information.
|
|
|
|
This function will also define a combinator that can be used as
|
|
`parsec(name)` when building other parsers. See `parsec/2` for
|
|
more information on invoking compiled combinators.
|
|
|
|
## Beware!
|
|
|
|
`defparsec/3` is executed during compilation. This means you can't
|
|
invoke a function defined in the same module. The following will error
|
|
because the `date` function has not yet been defined:
|
|
|
|
defmodule MyParser do
|
|
import NimbleParsec
|
|
|
|
def date do
|
|
integer(4)
|
|
|> ignore(string("-"))
|
|
|> integer(2)
|
|
|> ignore(string("-"))
|
|
|> integer(2)
|
|
end
|
|
|
|
defparsec :date, date()
|
|
end
|
|
|
|
This can be solved in different ways. You may simply
|
|
compose a long parser using variables. For example:
|
|
|
|
defmodule MyParser do
|
|
import NimbleParsec
|
|
|
|
date =
|
|
integer(4)
|
|
|> ignore(string("-"))
|
|
|> integer(2)
|
|
|> ignore(string("-"))
|
|
|> integer(2)
|
|
|
|
defparsec :date, date
|
|
end
|
|
|
|
Alternatively, you may define a `Helpers` module with many
|
|
convenience combinators, and then invoke them in your parser
|
|
module:
|
|
|
|
defmodule MyParser.Helpers do
|
|
import NimbleParsec
|
|
|
|
def date do
|
|
integer(4)
|
|
|> ignore(string("-"))
|
|
|> integer(2)
|
|
|> ignore(string("-"))
|
|
|> integer(2)
|
|
end
|
|
end
|
|
|
|
defmodule MyParser do
|
|
import NimbleParsec
|
|
import MyParser.Helpers
|
|
|
|
defparsec :date, date()
|
|
end
|
|
|
|
The approach of using helper modules is the favorite way
|
|
of composing parsers in `NimbleParsec`.
|
|
|
|
## Options
|
|
|
|
* `:inline` - when true, inlines clauses that work as redirection for
|
|
other clauses. Settings this may improve runtime performance at the
|
|
cost of increased compilation time and bytecode size
|
|
|
|
* `:debug` - when true, writes generated clauses to `:stderr` for debugging
|
|
|
|
* `:export_combinator` - make the underlying combinator function public
|
|
so it can be used as part of `parsec/1` from other modules
|
|
|
|
* `:export_metadata` - export metadata necessary to use this parser
|
|
combinator to generate inputs
|
|
|
|
"""
|
|
defmacro defparsec(name, combinator, opts \\ []) do
|
|
visibility =
|
|
quote do
|
|
if opts[:export_combinator], do: :def, else: :defp
|
|
end
|
|
|
|
compile(:def, visibility, name, combinator, opts)
|
|
end
|
|
|
|
@doc """
|
|
Defines a private parser (and a combinator) with the given `name` and `opts`.
|
|
|
|
The same as `defparsec/3` but the parsing function is private.
|
|
"""
|
|
defmacro defparsecp(name, combinator, opts \\ []) do
|
|
compile(:defp, :defp, name, combinator, opts)
|
|
end
|
|
|
|
@doc """
|
|
Defines a combinator with the given `name` and `opts`.
|
|
|
|
It is similar to `defparsec/3` except it does not define
|
|
an entry-point parsing function, just the combinator function
|
|
to be used with `parsec/2`.
|
|
"""
|
|
defmacro defcombinator(name, combinator, opts \\ []) do
|
|
compile(nil, :def, name, combinator, opts)
|
|
end
|
|
|
|
@doc """
|
|
Defines a combinator with the given `name` and `opts`.
|
|
|
|
It is similar to `defparsecp/3` except it does not define
|
|
an entry-point parsing function, just the combinator function
|
|
to be used with `parsec/2`.
|
|
"""
|
|
defmacro defcombinatorp(name, combinator, opts \\ []) do
|
|
compile(nil, :defp, name, combinator, opts)
|
|
end
|
|
|
|
defp compile(parser_kind, combinator_kind, name, combinator, opts) do
|
|
prelude =
|
|
quote do
|
|
opts = unquote(opts)
|
|
combinator_kind = unquote(combinator_kind)
|
|
end
|
|
|
|
combinator =
|
|
quote bind_quoted: [
|
|
parser_kind: parser_kind,
|
|
name: name,
|
|
combinator: combinator
|
|
] do
|
|
{defs, inline} = NimbleParsec.Compiler.compile(name, combinator, opts)
|
|
|
|
NimbleParsec.Recorder.record(
|
|
__MODULE__,
|
|
parser_kind,
|
|
combinator_kind,
|
|
name,
|
|
defs,
|
|
inline,
|
|
opts
|
|
)
|
|
|
|
if opts[:export_metadata] do
|
|
def __nimble_parsec__(unquote(name)),
|
|
do: unquote(combinator |> Enum.reverse() |> Macro.escape())
|
|
end
|
|
|
|
if inline != [] do
|
|
@compile {:inline, inline}
|
|
end
|
|
|
|
if combinator_kind == :def do
|
|
for {name, args, guards, body} <- defs do
|
|
def unquote(name)(unquote_splicing(args)) when unquote(guards), do: unquote(body)
|
|
end
|
|
else
|
|
for {name, args, guards, body} <- defs do
|
|
defp unquote(name)(unquote_splicing(args)) when unquote(guards), do: unquote(body)
|
|
end
|
|
end
|
|
end
|
|
|
|
parser = compile_parser(name, parser_kind)
|
|
|
|
quote do
|
|
unquote(prelude)
|
|
unquote(parser)
|
|
unquote(combinator)
|
|
end
|
|
end
|
|
|
|
defp compile_parser(_name, nil) do
|
|
:ok
|
|
end
|
|
|
|
defp compile_parser(name, :def) do
|
|
quote bind_quoted: [name: name] do
|
|
{doc, spec, {name, args, guards, body}} = NimbleParsec.Compiler.entry_point(name)
|
|
Module.get_attribute(__MODULE__, :doc) || @doc doc
|
|
@spec unquote(spec)
|
|
def unquote(name)(unquote_splicing(args)) when unquote(guards), do: unquote(body)
|
|
end
|
|
end
|
|
|
|
defp compile_parser(name, :defp) do
|
|
quote bind_quoted: [name: name] do
|
|
{_doc, spec, {name, args, guards, body}} = NimbleParsec.Compiler.entry_point(name)
|
|
@spec unquote(spec)
|
|
defp unquote(name)(unquote_splicing(args)) when unquote(guards), do: unquote(body)
|
|
end
|
|
end
|
|
|
|
@opaque t :: [combinator]
|
|
@type bin_modifier :: :integer | :utf8 | :utf16 | :utf32
|
|
@type range :: inclusive_range | exclusive_range
|
|
@type inclusive_range :: Range.t() | char
|
|
@type exclusive_range :: {:not, Range.t()} | {:not, char}
|
|
@type min_and_max :: {:min, non_neg_integer} | {:max, pos_integer}
|
|
@type call :: mfargs | fargs | atom
|
|
@type mfargs :: {module, atom, args :: [term]}
|
|
@type fargs :: {atom, args :: [term]}
|
|
@type gen_times :: Range.t() | non_neg_integer | nil
|
|
@type gen_weights :: [pos_integer] | nil
|
|
@type opts :: Keyword.t()
|
|
|
|
# Steps to add a new combinator:
|
|
#
|
|
# 1. Update the combinator type below
|
|
# 2. Update the compiler with combinator
|
|
# 3. Update the compiler with label step
|
|
#
|
|
@typep combinator :: bound_combinator | maybe_bound_combinator | unbound_combinator
|
|
|
|
@typep bound_combinator ::
|
|
{:bin_segment, [inclusive_range], [exclusive_range], bin_modifier}
|
|
| {:string, binary}
|
|
| {:bytes, pos_integer}
|
|
| :eos
|
|
|
|
@typep maybe_bound_combinator ::
|
|
{:label, t, binary}
|
|
| {:traverse, t, :pre | :post | :constant, [mfargs]}
|
|
|
|
@typep unbound_combinator ::
|
|
{:choice, [t], gen_weights}
|
|
| {:eventually, t}
|
|
| {:lookahead, t, :positive | :negative}
|
|
| {:parsec, atom | {module, atom}}
|
|
| {:repeat, t, mfargs, gen_times}
|
|
| {:times, t, pos_integer}
|
|
|
|
@doc ~S"""
|
|
Generate a random binary from the given parsec.
|
|
|
|
Let's see an example:
|
|
|
|
import NimbleParsec
|
|
generate(choice([string("foo"), string("bar")]))
|
|
|
|
The command above will return either "foo" or "bar". `generate/1`
|
|
is often used with pre-defined parsecs. In this case, the
|
|
`:export_metadata` flag must be set:
|
|
|
|
defmodule SomeModule do
|
|
import NimbleParsec
|
|
defparsec :parse,
|
|
choice([string("foo"), string("bar")]),
|
|
export_metadata: true
|
|
end
|
|
|
|
# Reference the parsec and generate from it
|
|
NimbleParsec.parsec({SomeModule, :parse})
|
|
|> NimbleParsec.generate()
|
|
|> IO.puts()
|
|
|
|
`generate/1` can often run forever for recursive algorithms.
|
|
Read the notes below and make use of the `gen_weight` and `gen_times`
|
|
option to certain parsecs to control the recursion depth.
|
|
|
|
## Notes
|
|
|
|
Overall, there is no guarantee over the generated output, except
|
|
that it will generate a binary that is parseable by the parsec
|
|
itself, but even this guarantee may be broken by parsers that have
|
|
custom validations. Keep in mind the following:
|
|
|
|
* `generate/1` is not compatible with NimbleParsecs dumped via
|
|
`mix nimble_parsec.compile`;
|
|
|
|
* `parsec/2` requires the referenced parsec to set `export_metadata: true`
|
|
on its definition;
|
|
|
|
* `choice/2` will be generated evenly. You can pass `:gen_weights`
|
|
as a list of positive integer weights to balance your choices.
|
|
This is particularly important for recursive algorithms;
|
|
|
|
* `repeat/2` and `repeat_while/3` will repeat between 0 and 3 times unless
|
|
a `:gen_times` option is given to these operations. `times/3` without a `:max`
|
|
will also additionally repeat between 0 and 3 times unless `:gen_times` is given.
|
|
The `:gen_times` option can either be an integer as the number of times to
|
|
repeat or a range where a random value in the range will be picked;
|
|
|
|
* `eventually/2` always generates the eventually parsec immediately;
|
|
|
|
* `lookahead/2` and `lookahead_not/2` are simply discarded;
|
|
|
|
* Validations done in any of the traverse definitions are not taken into account
|
|
by the generator. Therefore, if a parsec does validations, the generator may
|
|
generate binaries invalid to said parsec;
|
|
|
|
"""
|
|
def generate(parsecs) do
|
|
parsecs
|
|
|> Enum.reverse()
|
|
|> generate(nil, [])
|
|
|> IO.iodata_to_binary()
|
|
end
|
|
|
|
defp generate([{:parsec, fun} | _parsecs], nil, _acc) when is_atom(fun) do
|
|
raise "cannot generate parsec(#{inspect(fun)}), use a remote parsec instead"
|
|
end
|
|
|
|
defp generate([{:parsec, fun} | parsecs], mod, acc) when is_atom(fun) do
|
|
generate([{:parsec, {mod, fun}} | parsecs], mod, acc)
|
|
end
|
|
|
|
defp generate([{:parsec, {mod, fun}} | outer_parsecs], outer_mod, acc) do
|
|
gen = generate(gen_export(mod, fun), mod, [])
|
|
generate(outer_parsecs, outer_mod, [gen | acc])
|
|
end
|
|
|
|
defp generate([{:string, string} | parsecs], mod, acc) do
|
|
generate(parsecs, mod, [string | acc])
|
|
end
|
|
|
|
defp generate([{:bin_segment, inclusive, exclusive, modifier} | parsecs], mod, acc) do
|
|
gen = gen_bin_segment(inclusive, exclusive)
|
|
|
|
gen =
|
|
if modifier == :integer,
|
|
do: gen,
|
|
else: :unicode.characters_to_binary([gen], :unicode, modifier)
|
|
|
|
generate(parsecs, mod, [gen | acc])
|
|
end
|
|
|
|
defp generate([:eos | parsecs], mod, acc) do
|
|
if parsecs == [] do
|
|
generate([], mod, acc)
|
|
else
|
|
raise ArgumentError, "found :eos not at the end of parsecs"
|
|
end
|
|
end
|
|
|
|
defp generate([{:traverse, t, _, _} | parsecs], mod, acc) do
|
|
generate(t ++ parsecs, mod, acc)
|
|
end
|
|
|
|
defp generate([{:label, t, _} | parsecs], mod, acc) do
|
|
generate(t ++ parsecs, mod, acc)
|
|
end
|
|
|
|
defp generate([{:choice, choices, weights} | parsecs], mod, acc) do
|
|
pick = if weights, do: weighted_random(choices, weights), else: list_random(choices)
|
|
gen = generate(pick, mod, [])
|
|
generate(parsecs, mod, [gen | acc])
|
|
end
|
|
|
|
defp generate([{:lookahead, _, _} | parsecs], mod, acc) do
|
|
generate(parsecs, mod, acc)
|
|
end
|
|
|
|
defp generate([{:repeat, t, _, gen} | parsecs], mod, acc) do
|
|
generate(parsecs, mod, gen_times(t, int_random(gen), mod, acc))
|
|
end
|
|
|
|
defp generate([{:times, t, max} | parsecs], mod, acc) do
|
|
generate(parsecs, mod, gen_times(t, Enum.random(0..max), mod, acc))
|
|
end
|
|
|
|
defp generate([{:bytes, count} | parsecs], mod, acc) do
|
|
bytes = bytes_random(count)
|
|
generate(parsecs, mod, [bytes | acc])
|
|
end
|
|
|
|
defp generate([], _mod, acc), do: Enum.reverse(acc)
|
|
|
|
defp gen_export(mod, fun) do
|
|
unless Code.ensure_loaded?(mod) do
|
|
raise "cannot handle parsec(#{inspect({mod, fun})}) because #{inspect(mod)} is not available"
|
|
end
|
|
|
|
try do
|
|
mod.__nimble_parsec__(fun)
|
|
rescue
|
|
_ ->
|
|
raise "cannot handle parsec(#{inspect({mod, fun})}) because #{inspect(mod)} " <>
|
|
"did not set :export_metadata when defining #{fun}"
|
|
end
|
|
end
|
|
|
|
defp gen_times(_t, 0, _mod, acc), do: acc
|
|
|
|
defp gen_times(t, n, mod, acc) do
|
|
gen = generate(t, mod, [])
|
|
gen_times(t, n - 1, mod, [gen | acc])
|
|
end
|
|
|
|
defp gen_bin_segment(inclusive, exclusive) do
|
|
gen =
|
|
if(inclusive == [], do: [0..255], else: inclusive)
|
|
|> list_random()
|
|
|> int_random()
|
|
|
|
if Enum.any?(exclusive, &exclude_bin_segment?(&1, gen)) do
|
|
gen_bin_segment(inclusive, exclusive)
|
|
else
|
|
gen
|
|
end
|
|
end
|
|
|
|
defp exclude_bin_segment?({:not, _.._//_ = range}, gen), do: gen in range
|
|
defp exclude_bin_segment?({:not, char}, gen) when is_integer(char), do: char == gen
|
|
|
|
defp int_random(nil), do: Enum.random(0..3)
|
|
defp int_random(_.._//_ = range), do: Enum.random(range)
|
|
defp int_random(int) when is_integer(int), do: int
|
|
|
|
# Enum.random uses reservoir sampling but our lists are short, so we use length + fetch!
|
|
defp list_random(list) when is_list(list),
|
|
do: Enum.fetch!(list, :rand.uniform(length(list)) - 1)
|
|
|
|
defp weighted_random(list, weights) do
|
|
weighted_random(list, weights, :rand.uniform(Enum.sum(weights)))
|
|
end
|
|
|
|
defp weighted_random([elem | _], [weight | _], chosen) when chosen <= weight,
|
|
do: elem
|
|
|
|
defp weighted_random([_ | list], [weight | weights], chosen),
|
|
do: weighted_random(list, weights, chosen - weight)
|
|
|
|
defp bytes_random(count) when is_integer(count) do
|
|
:crypto.strong_rand_bytes(count)
|
|
end
|
|
|
|
@doc ~S"""
|
|
Returns an empty combinator.
|
|
|
|
An empty combinator cannot be compiled on its own.
|
|
"""
|
|
@spec empty() :: t
|
|
def empty() do
|
|
[]
|
|
end
|
|
|
|
@doc """
|
|
Invokes an already compiled combinator with name `name` in the
|
|
same module.
|
|
|
|
Every parser defined via `defparsec/3` or `defparsecp/3` can be
|
|
used as combinator. However, the `defparsec/3` and `defparsecp/3`
|
|
functions also define an entry-point parsing function, as implied
|
|
by their names. If you want to define a combinator with the sole
|
|
purpose of using it in combinator, use `defcombinatorp/3` instead.
|
|
|
|
## Use cases
|
|
|
|
`parsec/2` is useful to implement recursive definitions.
|
|
|
|
Note, while `parsec/2` can be used to compose smaller combinators,
|
|
the preferred mechanism for doing composition is via regular functions
|
|
and not via `parsec/2`. Let's see a practical example. Imagine
|
|
that you have this module:
|
|
|
|
defmodule MyParser do
|
|
import NimbleParsec
|
|
|
|
date =
|
|
integer(4)
|
|
|> ignore(string("-"))
|
|
|> integer(2)
|
|
|> ignore(string("-"))
|
|
|> integer(2)
|
|
|
|
time =
|
|
integer(2)
|
|
|> ignore(string(":"))
|
|
|> integer(2)
|
|
|> ignore(string(":"))
|
|
|> integer(2)
|
|
|> optional(string("Z"))
|
|
|
|
defparsec :datetime, date |> ignore(string("T")) |> concat(time), debug: true
|
|
end
|
|
|
|
Now imagine that you want to break `date` and `time` apart
|
|
into helper functions, as you use them in other occasions.
|
|
Generally speaking, you should **NOT** do this:
|
|
|
|
defmodule MyParser do
|
|
import NimbleParsec
|
|
|
|
defcombinatorp :date,
|
|
integer(4)
|
|
|> ignore(string("-"))
|
|
|> integer(2)
|
|
|> ignore(string("-"))
|
|
|> integer(2)
|
|
|
|
defcombinatorp :time,
|
|
integer(2)
|
|
|> ignore(string(":"))
|
|
|> integer(2)
|
|
|> ignore(string(":"))
|
|
|> integer(2)
|
|
|> optional(string("Z"))
|
|
|
|
defparsec :datetime,
|
|
parsec(:date) |> ignore(string("T")) |> concat(parsec(:time))
|
|
end
|
|
|
|
The reason why the above is not recommended is because each
|
|
`parsec/2` combinator ends-up adding a stacktrace entry during
|
|
parsing, which affects the ability of `NimbleParsec` to optimize
|
|
code. If the goal is to compose combinators, you can do so
|
|
with modules and functions:
|
|
|
|
defmodule MyParser.Helpers do
|
|
import NimbleParsec
|
|
|
|
def date do
|
|
integer(4)
|
|
|> ignore(string("-"))
|
|
|> integer(2)
|
|
|> ignore(string("-"))
|
|
|> integer(2)
|
|
end
|
|
|
|
def time do
|
|
integer(2)
|
|
|> ignore(string(":"))
|
|
|> integer(2)
|
|
|> ignore(string(":"))
|
|
|> integer(2)
|
|
|> optional(string("Z"))
|
|
end
|
|
end
|
|
|
|
defmodule MyParser do
|
|
import NimbleParsec
|
|
import MyParser.Helpers
|
|
|
|
defparsec :datetime,
|
|
date() |> ignore(string("T")) |> concat(time())
|
|
end
|
|
|
|
The implementation above will be able to compile to the most
|
|
efficient format as possible without forcing new stacktrace
|
|
entries.
|
|
|
|
The only situation where you should use `parsec/2` for composition
|
|
is when a large parser is used over and over again in a way
|
|
compilation times are high. In this sense, you can use `parsec/2`
|
|
to improve compilation time at the cost of runtime performance.
|
|
By using `parsec/2`, the tree size built at compile time will be
|
|
reduced although runtime performance is degraded as `parsec`
|
|
introduces a stacktrace entry.
|
|
|
|
## Remote combinators
|
|
|
|
You can also reference combinators in other modules by passing
|
|
a tuple with the module name and a function to `parsec/2` as follows:
|
|
|
|
defmodule RemoteCombinatorModule do
|
|
defcombinator :upcase_unicode, utf8_char([...long, list, of, unicode, chars...])
|
|
end
|
|
|
|
defmodule LocalModule do
|
|
# Parsec that depends on `:upcase_A`
|
|
defparsec :parsec_name,
|
|
...
|
|
|> ascii_char([?a..?Z])
|
|
|> parsec({RemoteCombinatorModule, :upcase_unicode})
|
|
end
|
|
|
|
Remote combinators are useful when breaking the compilation of
|
|
large modules apart in order to use Elixir's ability to compile
|
|
modules in parallel.
|
|
|
|
## Examples
|
|
|
|
A good example of using `parsec` is with recursive parsers.
|
|
A limited but recursive XML parser could be written as follows:
|
|
|
|
defmodule SimpleXML do
|
|
import NimbleParsec
|
|
|
|
tag = ascii_string([?a..?z, ?A..?Z], min: 1)
|
|
text = ascii_string([not: ?<], min: 1)
|
|
|
|
opening_tag =
|
|
ignore(string("<"))
|
|
|> concat(tag)
|
|
|> ignore(string(">"))
|
|
|
|
closing_tag =
|
|
ignore(string("</"))
|
|
|> concat(tag)
|
|
|> ignore(string(">"))
|
|
|
|
defparsec :xml,
|
|
opening_tag
|
|
|> repeat(lookahead_not(string("</")) |> choice([parsec(:xml), text]))
|
|
|> concat(closing_tag)
|
|
|> wrap()
|
|
end
|
|
|
|
SimpleXML.xml("<foo>bar</foo>")
|
|
#=> {:ok, [["foo", "bar", "foo"]], "", %{}, {1, 0}, 14}
|
|
|
|
In the example above, `defparsec/3` has defined the entry-point
|
|
parsing function as well as a combinator which we have invoked
|
|
with `parsec(:xml)`.
|
|
|
|
In many cases, however, you want to define recursive combinators
|
|
without the entry-point parsing function. We can do this by
|
|
replacing `defparsec/3` by `defcombinatorp`:
|
|
|
|
defcombinatorp :xml,
|
|
opening_tag
|
|
|> repeat(lookahead_not(string("</")) |> choice([parsec(:xml), text]))
|
|
|> concat(closing_tag)
|
|
|> wrap()
|
|
|
|
When using `defcombinatorp`, you can no longer invoke
|
|
`SimpleXML.xml(xml)` as there is no associated parsing function.
|
|
You can only access the combinator above via `parsec/2`.
|
|
"""
|
|
@spec parsec(name :: atom) :: t
|
|
@spec parsec(t, name :: atom) :: t
|
|
@spec parsec({module, function_name :: atom}) :: t
|
|
@spec parsec(t, {module, function_name :: atom}) :: t
|
|
def parsec(combinator \\ empty(), name)
|
|
|
|
def parsec(combinator, name) when is_combinator(combinator) and is_atom(name) do
|
|
[{:parsec, name} | combinator]
|
|
end
|
|
|
|
def parsec(combinator, {module, function})
|
|
when is_combinator(combinator) and is_atom(module) and is_atom(function) do
|
|
[{:parsec, {module, function}} | combinator]
|
|
end
|
|
|
|
@doc ~S"""
|
|
Defines a single ASCII codepoint in the given ranges.
|
|
|
|
`ranges` is a list containing one of:
|
|
|
|
* a `min..max` range expressing supported codepoints
|
|
* a `codepoint` integer expressing a supported codepoint
|
|
* `{:not, min..max}` expressing not supported codepoints
|
|
* `{:not, codepoint}` expressing a not supported codepoint
|
|
|
|
## Examples
|
|
|
|
defmodule MyParser do
|
|
import NimbleParsec
|
|
|
|
defparsec :digit_and_lowercase,
|
|
empty()
|
|
|> ascii_char([?0..?9])
|
|
|> ascii_char([?a..?z])
|
|
end
|
|
|
|
MyParser.digit_and_lowercase("1a")
|
|
#=> {:ok, [?1, ?a], "", %{}, {1, 0}, 2}
|
|
|
|
MyParser.digit_and_lowercase("a1")
|
|
#=> {:error, "expected ASCII character in the range '0' to '9', followed by ASCII character in the range 'a' to 'z'", "a1", %{}, {1, 0}, 0}
|
|
|
|
"""
|
|
@spec ascii_char([range]) :: t
|
|
@spec ascii_char(t, [range]) :: t
|
|
def ascii_char(combinator \\ empty(), ranges)
|
|
when is_combinator(combinator) and is_list(ranges) do
|
|
{inclusive, exclusive} = split_ranges!(ranges, "ascii_char")
|
|
bin_segment(combinator, inclusive, exclusive, :integer)
|
|
end
|
|
|
|
@doc ~S"""
|
|
Defines a single UTF-8 codepoint in the given ranges.
|
|
|
|
`ranges` is a list containing one of:
|
|
|
|
* a `min..max` range expressing supported codepoints
|
|
* a `codepoint` integer expressing a supported codepoint
|
|
* `{:not, min..max}` expressing not supported codepoints
|
|
* `{:not, codepoint}` expressing a not supported codepoint
|
|
|
|
## Examples
|
|
|
|
defmodule MyParser do
|
|
import NimbleParsec
|
|
|
|
defparsec :digit_and_utf8,
|
|
empty()
|
|
|> utf8_char([?0..?9])
|
|
|> utf8_char([])
|
|
end
|
|
|
|
MyParser.digit_and_utf8("1é")
|
|
#=> {:ok, [?1, ?é], "", %{}, {1, 0}, 2}
|
|
|
|
MyParser.digit_and_utf8("a1")
|
|
#=> {:error, "expected utf8 codepoint in the range '0' to '9', followed by utf8 codepoint", "a1", %{}, {1, 0}, 0}
|
|
|
|
"""
|
|
@spec utf8_char([range]) :: t
|
|
@spec utf8_char(t, [range]) :: t
|
|
def utf8_char(combinator \\ empty(), ranges)
|
|
when is_combinator(combinator) and is_list(ranges) do
|
|
{inclusive, exclusive} = split_ranges!(ranges, "utf8_char")
|
|
bin_segment(combinator, inclusive, exclusive, :utf8)
|
|
end
|
|
|
|
@doc ~S"""
|
|
Adds a label to the combinator to be used in error reports.
|
|
|
|
## Examples
|
|
|
|
defmodule MyParser do
|
|
import NimbleParsec
|
|
|
|
defparsec :digit_and_lowercase,
|
|
empty()
|
|
|> ascii_char([?0..?9])
|
|
|> ascii_char([?a..?z])
|
|
|> label("digit followed by lowercase letter")
|
|
end
|
|
|
|
MyParser.digit_and_lowercase("1a")
|
|
#=> {:ok, [?1, ?a], "", %{}, {1, 0}, 2}
|
|
|
|
MyParser.digit_and_lowercase("a1")
|
|
#=> {:error, "expected a digit followed by lowercase letter", "a1", %{}, {1, 0}, 0}
|
|
|
|
"""
|
|
@spec label(t, String.t()) :: t
|
|
@spec label(t, t, String.t()) :: t
|
|
def label(combinator \\ empty(), to_label, label)
|
|
when is_combinator(combinator) and is_combinator(to_label) and is_binary(label) do
|
|
non_empty!(to_label, "label")
|
|
[{:label, Enum.reverse(to_label), label} | combinator]
|
|
end
|
|
|
|
@doc ~S"""
|
|
Defines an integer combinator with exact length or `min` and `max`
|
|
length.
|
|
|
|
If you want an integer of unknown size, use `integer(min: 1)`.
|
|
|
|
This combinator does not parse the sign and is always on base 10.
|
|
|
|
## Examples
|
|
|
|
With exact length:
|
|
|
|
defmodule MyParser do
|
|
import NimbleParsec
|
|
|
|
defparsec :two_digits_integer, integer(2)
|
|
end
|
|
|
|
MyParser.two_digits_integer("123")
|
|
#=> {:ok, [12], "3", %{}, {1, 0}, 2}
|
|
|
|
MyParser.two_digits_integer("1a3")
|
|
#=> {:error, "expected ASCII character in the range '0' to '9', followed by ASCII character in the range '0' to '9'", "1a3", %{}, {1, 0}, 0}
|
|
|
|
With min and max:
|
|
|
|
defmodule MyParser do
|
|
import NimbleParsec
|
|
|
|
defparsec :two_digits_integer, integer(min: 2, max: 4)
|
|
end
|
|
|
|
MyParser.two_digits_integer("123")
|
|
#=> {:ok, [123], "", %{}, {1, 0}, 2}
|
|
|
|
MyParser.two_digits_integer("1a3")
|
|
#=> {:error, "expected ASCII character in the range '0' to '9', followed by ASCII character in the range '0' to '9'", "1a3", %{}, {1, 0}, 0}
|
|
|
|
If the size of the integer has a min and max close to each other, such as
|
|
from 2 to 4 or from 1 to 2, using choice may emit more efficient code:
|
|
|
|
choice([integer(4), integer(3), integer(2)])
|
|
|
|
Note you should start from bigger to smaller.
|
|
"""
|
|
@spec integer(pos_integer | [min_and_max]) :: t
|
|
@spec integer(t, pos_integer | [min_and_max]) :: t
|
|
def integer(combinator \\ empty(), count_or_opts)
|
|
|
|
def integer(combinator, count)
|
|
when is_combinator(combinator) and is_integer(count) do
|
|
validate_min_and_max!(count, 1)
|
|
|
|
min_max_compile_runtime_chars(
|
|
combinator,
|
|
ascii_char([?0..?9]),
|
|
count,
|
|
:__compile_integer__,
|
|
:__runtime_integer__,
|
|
[]
|
|
)
|
|
end
|
|
|
|
def integer(combinator, opts)
|
|
when is_combinator(combinator) and is_list(opts) do
|
|
# Read the minimum and maximum value to ensure the presence of at least one character
|
|
{min_val, max_val} = validate_min_and_max!(opts, 1)
|
|
opts = opts |> Keyword.put(:min, min_val) |> Keyword.put(:max, max_val)
|
|
|
|
min_max_compile_runtime_chars(
|
|
combinator,
|
|
ascii_char([?0..?9]),
|
|
opts,
|
|
:__compile_integer__,
|
|
:__runtime_integer__,
|
|
[]
|
|
)
|
|
end
|
|
|
|
@doc ~S"""
|
|
Defines an ASCII string combinator with an exact length or `min` and `max`
|
|
length.
|
|
|
|
The `ranges` specify the allowed characters in the ASCII string.
|
|
See `ascii_char/2` for more information.
|
|
|
|
If you want a string of unknown size, use `ascii_string(ranges, min: 1)`.
|
|
If you want a literal string, use `string/2`.
|
|
|
|
## Examples
|
|
|
|
defmodule MyParser do
|
|
import NimbleParsec
|
|
|
|
defparsec :two_lowercase_letters, ascii_string([?a..?z], 2)
|
|
end
|
|
|
|
MyParser.two_lowercase_letters("abc")
|
|
#=> {:ok, ["ab"], "c", %{}, {1, 0}, 2}
|
|
|
|
"""
|
|
@spec ascii_string([range], pos_integer | [min_and_max]) :: t
|
|
@spec ascii_string(t, [range], pos_integer | [min_and_max]) :: t
|
|
def ascii_string(combinator \\ empty(), range, count_or_opts)
|
|
when is_combinator(combinator) and is_list(range) and
|
|
(is_integer(count_or_opts) or is_list(count_or_opts)) do
|
|
min_max_compile_runtime_chars(
|
|
combinator,
|
|
ascii_char(range),
|
|
count_or_opts,
|
|
:__compile_string__,
|
|
:__runtime_string__,
|
|
[quote(do: integer)]
|
|
)
|
|
end
|
|
|
|
@doc ~S"""
|
|
Defines an UTF8 string combinator with of exact length or `min` and `max`
|
|
codepoint length.
|
|
|
|
The `ranges` specify the allowed characters in the UTF8 string.
|
|
See `utf8_char/2` for more information.
|
|
|
|
If you want a string of unknown size, use `utf8_string(ranges, min: 1)`.
|
|
If you want a literal string, use `string/2`.
|
|
|
|
Note that the combinator matches on codepoints, not graphemes. Therefore
|
|
results may vary depending on whether the input is in `nfc` or `nfd`
|
|
normalized form.
|
|
|
|
## Examples
|
|
|
|
defmodule MyParser do
|
|
import NimbleParsec
|
|
|
|
defparsec :two_letters, utf8_string([], 2)
|
|
end
|
|
|
|
MyParser.two_letters("áé")
|
|
#=> {:ok, ["áé"], "", %{}, {1, 0}, 3}
|
|
|
|
"""
|
|
@spec utf8_string([range], pos_integer | [min_and_max]) :: t
|
|
@spec utf8_string(t, [range], pos_integer | [min_and_max]) :: t
|
|
def utf8_string(combinator \\ empty(), range, count_or_opts)
|
|
when is_combinator(combinator) and is_list(range) and
|
|
(is_integer(count_or_opts) or is_list(count_or_opts)) do
|
|
min_max_compile_runtime_chars(
|
|
combinator,
|
|
utf8_char(range),
|
|
count_or_opts,
|
|
:__compile_string__,
|
|
:__runtime_string__,
|
|
[quote(do: utf8)]
|
|
)
|
|
end
|
|
|
|
@doc ~S"""
|
|
Defines an end of string combinator.
|
|
|
|
The end of string does not produce a token and can be parsed multiple times.
|
|
This function is useful to avoid having to check for an empty remainder after
|
|
a successful parse.
|
|
|
|
## Examples
|
|
|
|
defmodule MyParser do
|
|
import NimbleParsec
|
|
|
|
defparsec :letter_pairs, utf8_string([], 2) |> repeat() |> eos()
|
|
end
|
|
|
|
MyParser.letter_pairs("hi")
|
|
#=> {:ok, ["hi"], "", %{}, {1, 0}, 2}
|
|
|
|
MyParser.letter_pairs("hello")
|
|
#=> {:error, "expected end of string", "o", %{}, {1, 0}, 4}
|
|
"""
|
|
@spec eos :: t
|
|
@spec eos(t) :: t
|
|
def eos(combinator \\ empty()) do
|
|
[:eos | combinator]
|
|
end
|
|
|
|
@doc ~S"""
|
|
Concatenates two combinators.
|
|
|
|
## Examples
|
|
|
|
defmodule MyParser do
|
|
import NimbleParsec
|
|
|
|
defparsec :digit_upper_lower_plus,
|
|
concat(
|
|
concat(ascii_char([?0..?9]), ascii_char([?A..?Z])),
|
|
concat(ascii_char([?a..?z]), ascii_char([?+..?+]))
|
|
)
|
|
end
|
|
|
|
MyParser.digit_upper_lower_plus("1Az+")
|
|
#=> {:ok, [?1, ?A, ?z, ?+], "", %{}, {1, 0}, 4}
|
|
|
|
"""
|
|
@spec concat(t, t) :: t
|
|
def concat(left, right) when is_combinator(left) and is_combinator(right) do
|
|
right ++ left
|
|
end
|
|
|
|
@doc """
|
|
Duplicates the combinator `to_duplicate` `n` times.
|
|
"""
|
|
@spec duplicate(t, non_neg_integer) :: t
|
|
@spec duplicate(t, t, non_neg_integer) :: t
|
|
def duplicate(combinator \\ empty(), to_duplicate, n)
|
|
|
|
def duplicate(combinator, to_duplicate, 0)
|
|
when is_combinator(combinator) and is_combinator(to_duplicate) do
|
|
combinator
|
|
end
|
|
|
|
def duplicate(combinator, to_duplicate, n)
|
|
when is_combinator(combinator) and is_combinator(to_duplicate) and is_integer(n) and n >= 1 do
|
|
Enum.reduce(1..n, combinator, fn _, acc -> to_duplicate ++ acc end)
|
|
end
|
|
|
|
@doc """
|
|
Puts the result of the given combinator as the first element
|
|
of a tuple with the `byte_offset` as second element.
|
|
|
|
`byte_offset` is a non-negative integer.
|
|
"""
|
|
@spec byte_offset(t) :: t
|
|
@spec byte_offset(t, t) :: t
|
|
def byte_offset(combinator \\ empty(), to_wrap)
|
|
when is_combinator(combinator) and is_combinator(to_wrap) do
|
|
quoted_post_traverse(combinator, to_wrap, {__MODULE__, :__byte_offset__, []})
|
|
end
|
|
|
|
@doc """
|
|
Puts the result of the given combinator as the first element
|
|
of a tuple with the `line` as second element.
|
|
|
|
`line` is a tuple where the first element is the current line
|
|
and the second element is the byte offset immediately after
|
|
the newline.
|
|
"""
|
|
@spec line(t) :: t
|
|
@spec line(t, t) :: t
|
|
def line(combinator \\ empty(), to_wrap)
|
|
when is_combinator(combinator) and is_combinator(to_wrap) do
|
|
quoted_post_traverse(combinator, to_wrap, {__MODULE__, :__line__, []})
|
|
end
|
|
|
|
@doc ~S"""
|
|
Traverses the combinator results with the remote or local function `call`.
|
|
|
|
`call` is either a `{module, function, args}` representing
|
|
a remote call, a `{function, args}` representing a local call
|
|
or an atom `function` representing `{function, []}`.
|
|
|
|
The function given in `call` will receive 5 additional arguments.
|
|
The rest of the parsed binary, the parser results to be post_traversed,
|
|
the parser context, the current line and the current offset will
|
|
be prepended to the given `args`. The `args` will be injected at
|
|
the compile site and therefore must be escapable via `Macro.escape/1`.
|
|
|
|
The line and offset will represent the location after the combinators.
|
|
To retrieve the position before the combinators, use `pre_traverse/3`.
|
|
|
|
The `call` must return a tuple `{rest, acc, context}` with list of
|
|
results to be added to the accumulator as first argument and a context
|
|
as second argument. It may also return `{:error, reason}` to stop
|
|
processing. Notice the received results are in reverse order and
|
|
must be returned in reverse order too.
|
|
|
|
The number of elements returned does not need to be
|
|
the same as the number of elements given.
|
|
|
|
This is a low-level function for changing the parsed result.
|
|
On top of this function, other functions are built, such as
|
|
`map/3` if you want to map over each individual element and
|
|
not worry about ordering, `reduce/3` to reduce all elements
|
|
into a single one, `replace/3` if you want to replace the
|
|
parsed result by a single value and `ignore/2` if you want to
|
|
ignore the parsed result.
|
|
|
|
## Examples
|
|
|
|
defmodule MyParser do
|
|
import NimbleParsec
|
|
|
|
defparsec :letters_to_chars,
|
|
ascii_char([?a..?z])
|
|
|> ascii_char([?a..?z])
|
|
|> ascii_char([?a..?z])
|
|
|> post_traverse({:join_and_wrap, ["-"]})
|
|
|
|
defp join_and_wrap(rest, args, context, _line, _offset, joiner) do
|
|
{rest, args |> Enum.join(joiner) |> List.wrap(), context}
|
|
end
|
|
end
|
|
|
|
MyParser.letters_to_chars("abc")
|
|
#=> {:ok, ["99-98-97"], "", %{}, {1, 0}, 3}
|
|
|
|
"""
|
|
@spec post_traverse(t, call) :: t
|
|
@spec post_traverse(t, t, call) :: t
|
|
def post_traverse(combinator \\ empty(), to_post_traverse, call)
|
|
when is_combinator(combinator) and is_combinator(to_post_traverse) do
|
|
compile_call!([], call, "post_traverse")
|
|
quoted_post_traverse(combinator, to_post_traverse, {__MODULE__, :__post_traverse__, [call]})
|
|
end
|
|
|
|
@doc """
|
|
The same as `post_traverse/3` but receives the line and offset
|
|
from before the wrapped combinators.
|
|
|
|
`post_traverse/3` should be preferred as it keeps less stack
|
|
information. Use `pre_traverse/3` only if you have to access
|
|
the line and offset from before the given combinators.
|
|
"""
|
|
@spec pre_traverse(t, call) :: t
|
|
@spec pre_traverse(t, t, call) :: t
|
|
def pre_traverse(combinator \\ empty(), to_pre_traverse, call)
|
|
when is_combinator(combinator) and is_combinator(to_pre_traverse) do
|
|
compile_call!([], call, "pre_traverse")
|
|
quoted_pre_traverse(combinator, to_pre_traverse, {__MODULE__, :__pre_traverse__, [call]})
|
|
end
|
|
|
|
@doc ~S"""
|
|
Checks if a combinator is ahead.
|
|
|
|
If it succeeds, it continues as usual, otherwise it aborts the
|
|
closest `choice/2`, `repeat/2`, etc. If there is no closest
|
|
operation to abort, then it errors.
|
|
|
|
Note a lookahead never changes the accumulated output nor the
|
|
context.
|
|
|
|
## Examples
|
|
|
|
For example, imagine you want to parse a language that has the
|
|
keywords "if" and "while" and identifiers made of any letters or
|
|
number, where keywords and identifiers can be separated by a
|
|
single white space:
|
|
|
|
defmodule IfWhileLang do
|
|
import NimbleParsec
|
|
|
|
keyword =
|
|
choice([
|
|
string("if") |> replace(:if),
|
|
string("while") |> replace(:while)
|
|
])
|
|
|
|
identifier =
|
|
ascii_string([?a..?z, ?A..?Z, ?0..?9], min: 1)
|
|
|
|
defparsec :expr, repeat(choice([keyword, identifier]) |> optional(string(" ")))
|
|
end
|
|
|
|
The issue with the implementation above is that the following
|
|
will parse:
|
|
|
|
IfWhileLang.expr("iffy")
|
|
{:ok, [:if, "fy"], "", %{}, {1, 0}, 4}
|
|
|
|
However, "iffy" should be treated as a full identifier. We could
|
|
solve this by inverting the order of `keyword` and `identifier`
|
|
in `:expr` but that means "if" itself will be considered an identifier
|
|
and not a keyword. To solve this, we need lookaheads.
|
|
|
|
One option is to check that after the keyword we either have an
|
|
empty string OR the end of the string:
|
|
|
|
keyword =
|
|
choice([
|
|
string("if") |> replace(:if),
|
|
string("while") |> replace(:while)
|
|
])
|
|
|> lookahead(choice([string(" "), eos()]))
|
|
|
|
However, in this case, a negative lookahead may be clearer,
|
|
and we can assert that we don't have any identifier character after
|
|
the keyword:
|
|
|
|
keyword =
|
|
choice([
|
|
string("if") |> replace(:if),
|
|
string("while") |> replace(:while)
|
|
])
|
|
|> lookahead_not(ascii_char([?a..?z, ?A..?Z, ?0..?9]))
|
|
|
|
Now we get the desired result back:
|
|
|
|
IfWhileLang.expr("iffy")
|
|
#=> {:ok, ["iffy"], "", %{}, {1, 0}, 4}
|
|
|
|
IfWhileLang.expr("if fy")
|
|
#=> {:ok, [:if, " ", "fy"], "", %{}, {1, 0}, 5}
|
|
|
|
"""
|
|
@spec lookahead(t) :: t
|
|
@spec lookahead(t, t) :: t
|
|
def lookahead(combinator \\ empty(), to_lookahead)
|
|
when is_combinator(combinator) and is_combinator(to_lookahead) do
|
|
[{:lookahead, Enum.reverse(to_lookahead), :positive} | combinator]
|
|
end
|
|
|
|
@doc ~S"""
|
|
Checks if a combinator is not ahead.
|
|
|
|
If it succeeds, it aborts the closest `choice/2`, `repeat/2`, etc.
|
|
Otherwise it continues as usual. If there is no closest operation
|
|
to abort, then it errors.
|
|
|
|
Note a lookahead never changes the accumulated output nor the
|
|
context.
|
|
|
|
For an example, see `lookahead/2`.
|
|
"""
|
|
@spec lookahead_not(t) :: t
|
|
@spec lookahead_not(t, t) :: t
|
|
def lookahead_not(combinator \\ empty(), to_lookahead)
|
|
when is_combinator(combinator) and is_combinator(to_lookahead) do
|
|
[{:lookahead, Enum.reverse(to_lookahead), :negative} | combinator]
|
|
end
|
|
|
|
@doc """
|
|
Invokes `call` to emit the AST that post traverses the `to_post_traverse`
|
|
combinator results.
|
|
|
|
This is similar to `post_traverse/3`. In `post_traverse/3`, `call` is
|
|
invoked to process the combinator results. In here, it is invoked to
|
|
emit AST that in its turn will process the combinator results.
|
|
The invoked function must return the same types as `post_traverse/3`.
|
|
|
|
`call` is a `{module, function, args}` and it will receive 5
|
|
additional arguments. The AST representation of the rest of the
|
|
parsed binary, the parser results, context, line and offset will
|
|
be prepended to `args`. `call` is invoked at compile time and is
|
|
useful in combinators that avoid injecting runtime dependencies.
|
|
|
|
The line and offset will represent the location after the combinators.
|
|
To retrieve the position before the combinators, use `quoted_pre_traverse/3`.
|
|
|
|
This function must be used only when you want to emit code that
|
|
has no runtime dependencies in other modules. In most cases,
|
|
using `post_traverse/3` is better, since it doesn't work on ASTs
|
|
and instead works at runtime.
|
|
"""
|
|
@spec quoted_post_traverse(t, mfargs) :: t
|
|
@spec quoted_post_traverse(t, t, mfargs) :: t
|
|
def quoted_post_traverse(combinator \\ empty(), to_post_traverse, {_, _, _} = call)
|
|
when is_combinator(combinator) and is_combinator(to_post_traverse) do
|
|
quoted_traverse(combinator, to_post_traverse, :post, call)
|
|
end
|
|
|
|
@doc """
|
|
The same as `quoted_post_traverse/3` but receives the line and offset
|
|
from before the wrapped combinators.
|
|
|
|
`quoted_post_traverse/3` should be preferred as it keeps less stack
|
|
information. Use `quoted_pre_traverse/3` only if you have to access
|
|
the line and offset from before the given combinators.
|
|
"""
|
|
@spec quoted_pre_traverse(t, mfargs) :: t
|
|
@spec quoted_pre_traverse(t, t, mfargs) :: t
|
|
def quoted_pre_traverse(combinator \\ empty(), to_pre_traverse, {_, _, _} = call)
|
|
when is_combinator(combinator) and is_combinator(to_pre_traverse) do
|
|
quoted_traverse(combinator, to_pre_traverse, :pre, call)
|
|
end
|
|
|
|
@doc ~S"""
|
|
Maps over the combinator results with the remote or local function in `call`.
|
|
|
|
`call` is either a `{module, function, args}` representing
|
|
a remote call, a `{function, args}` representing a local call
|
|
or an atom `function` representing `{function, []}`.
|
|
|
|
Each parser result will be invoked individually for the `call`.
|
|
Each result be prepended to the given `args`. The `args` will
|
|
be injected at the compile site and therefore must be escapable
|
|
via `Macro.escape/1`.
|
|
|
|
See `post_traverse/3` for a low level version of this function.
|
|
|
|
## Examples
|
|
|
|
defmodule MyParser do
|
|
import NimbleParsec
|
|
|
|
defparsec :letters_to_string_chars,
|
|
ascii_char([?a..?z])
|
|
|> ascii_char([?a..?z])
|
|
|> ascii_char([?a..?z])
|
|
|> map({Integer, :to_string, []})
|
|
end
|
|
|
|
MyParser.letters_to_string_chars("abc")
|
|
#=> {:ok, ["97", "98", "99"], "", %{}, {1, 0}, 3}
|
|
"""
|
|
@spec map(t, call) :: t
|
|
@spec map(t, t, call) :: t
|
|
def map(combinator \\ empty(), to_map, call)
|
|
when is_combinator(combinator) and is_combinator(to_map) do
|
|
var = Macro.var(:var, __MODULE__)
|
|
call = compile_call!([var], call, "map")
|
|
quoted_post_traverse(combinator, to_map, {__MODULE__, :__map__, [var, call]})
|
|
end
|
|
|
|
@doc ~S"""
|
|
Reduces over the combinator results with the remote or local function in `call`.
|
|
|
|
`call` is either a `{module, function, args}` representing
|
|
a remote call, a `{function, args}` representing a local call
|
|
or an atom `function` representing `{function, []}`.
|
|
|
|
The parser results to be reduced will be prepended to the
|
|
given `args`. The `args` will be injected at the compile site
|
|
and therefore must be escapable via `Macro.escape/1`.
|
|
|
|
See `post_traverse/3` for a low level version of this function.
|
|
|
|
## Examples
|
|
|
|
defmodule MyParser do
|
|
import NimbleParsec
|
|
|
|
defparsec :letters_to_reduced_chars,
|
|
ascii_char([?a..?z])
|
|
|> ascii_char([?a..?z])
|
|
|> ascii_char([?a..?z])
|
|
|> reduce({Enum, :join, ["-"]})
|
|
end
|
|
|
|
MyParser.letters_to_reduced_chars("abc")
|
|
#=> {:ok, ["97-98-99"], "", %{}, {1, 0}, 3}
|
|
"""
|
|
@spec reduce(t, call) :: t
|
|
@spec reduce(t, t, call) :: t
|
|
def reduce(combinator \\ empty(), to_reduce, call)
|
|
when is_combinator(combinator) and is_combinator(to_reduce) do
|
|
compile_call!([], call, "reduce")
|
|
quoted_post_traverse(combinator, to_reduce, {__MODULE__, :__reduce__, [call]})
|
|
end
|
|
|
|
@doc """
|
|
Wraps the results of the given combinator in `to_wrap` in a list.
|
|
"""
|
|
@spec wrap(t) :: t
|
|
@spec wrap(t, t) :: t
|
|
def wrap(combinator \\ empty(), to_wrap)
|
|
when is_combinator(combinator) and is_combinator(to_wrap) do
|
|
quoted_post_traverse(combinator, to_wrap, {__MODULE__, :__wrap__, []})
|
|
end
|
|
|
|
@doc """
|
|
Tags the result of the given combinator in `to_tag` in a tuple with
|
|
`tag` as first element.
|
|
|
|
## Examples
|
|
|
|
defmodule MyParser do
|
|
import NimbleParsec
|
|
|
|
defparsec :integer, integer(min: 1) |> tag(:integer)
|
|
end
|
|
|
|
MyParser.integer("1234")
|
|
#=> {:ok, [integer: [1234]], "", %{}, {1, 0}, 4}
|
|
|
|
Notice, however, that the integer result is wrapped in a list, because
|
|
the parser is expected to emit multiple tokens. When you are sure that
|
|
only a single token is emitted, you should use `unwrap_and_tag/3`.
|
|
"""
|
|
@spec tag(t, term) :: t
|
|
@spec tag(t, t, term) :: t
|
|
def tag(combinator \\ empty(), to_tag, tag)
|
|
when is_combinator(combinator) and is_combinator(to_tag) do
|
|
quoted_post_traverse(combinator, to_tag, {__MODULE__, :__tag__, [Macro.escape(tag)]})
|
|
end
|
|
|
|
@doc """
|
|
Unwraps and tags the result of the given combinator in `to_tag` in a tuple with
|
|
`tag` as first element.
|
|
|
|
## Examples
|
|
|
|
defmodule MyParser do
|
|
import NimbleParsec
|
|
|
|
defparsec :integer, integer(min: 1) |> unwrap_and_tag(:integer)
|
|
end
|
|
|
|
MyParser.integer("1234")
|
|
#=> {:ok, [integer: 1234], "", %{}, {1, 0}, 4}
|
|
|
|
|
|
In case the combinator emits greater than one token, an error will be raised.
|
|
See `tag/3` for more information.
|
|
"""
|
|
@spec unwrap_and_tag(t, term) :: t
|
|
@spec unwrap_and_tag(t, t, term) :: t
|
|
def unwrap_and_tag(combinator \\ empty(), to_tag, tag)
|
|
when is_combinator(combinator) and is_combinator(to_tag) do
|
|
quoted_post_traverse(
|
|
combinator,
|
|
to_tag,
|
|
{__MODULE__, :__unwrap_and_tag__, [Macro.escape(tag)]}
|
|
)
|
|
end
|
|
|
|
@doc """
|
|
Inspects the combinator state given to `to_debug` with the given `opts`.
|
|
"""
|
|
@spec debug(t) :: t
|
|
@spec debug(t, t) :: t
|
|
def debug(combinator \\ empty(), to_debug)
|
|
when is_combinator(combinator) and is_combinator(to_debug) do
|
|
quoted_pre_traverse(combinator, to_debug, {__MODULE__, :__debug__, []})
|
|
end
|
|
|
|
@doc ~S"""
|
|
Defines a string binary value.
|
|
|
|
## Examples
|
|
|
|
defmodule MyParser do
|
|
import NimbleParsec
|
|
|
|
defparsec :string_t, string("T")
|
|
end
|
|
|
|
MyParser.string_t("T")
|
|
#=> {:ok, ["T"], "", %{}, {1, 0}, 1}
|
|
|
|
MyParser.string_t("not T")
|
|
#=> {:error, "expected a string \"T\"", "not T", %{}, {1, 0}, 0}
|
|
|
|
"""
|
|
@spec string(binary) :: t
|
|
@spec string(t, binary) :: t
|
|
def string(combinator \\ empty(), binary)
|
|
when is_combinator(combinator) and is_binary(binary) do
|
|
[{:string, binary} | combinator]
|
|
end
|
|
|
|
@doc """
|
|
Ignores the output of combinator given in `to_ignore`.
|
|
|
|
## Examples
|
|
|
|
defmodule MyParser do
|
|
import NimbleParsec
|
|
|
|
defparsec :ignorable, string("T") |> ignore() |> integer(2)
|
|
end
|
|
|
|
MyParser.ignorable("T12")
|
|
#=> {:ok, [12], "", %{}, {1, 0}, 2}
|
|
|
|
"""
|
|
@spec ignore(t) :: t
|
|
@spec ignore(t, t) :: t
|
|
def ignore(combinator \\ empty(), to_ignore)
|
|
when is_combinator(combinator) and is_combinator(to_ignore) do
|
|
if to_ignore == empty() do
|
|
to_ignore
|
|
else
|
|
quoted_constant_traverse(combinator, to_ignore, {__MODULE__, :__constant__, [[]]})
|
|
end
|
|
end
|
|
|
|
@doc """
|
|
Replaces the output of combinator given in `to_replace` by a single value.
|
|
|
|
The `value` will be injected at the compile site
|
|
and therefore must be escapable via `Macro.escape/1`.
|
|
|
|
## Examples
|
|
|
|
defmodule MyParser do
|
|
import NimbleParsec
|
|
|
|
defparsec :replaceable, string("T") |> replace("OTHER") |> integer(2, 2)
|
|
end
|
|
|
|
MyParser.replaceable("T12")
|
|
#=> {:ok, ["OTHER", 12], "", %{}, {1, 0}, 2}
|
|
|
|
"""
|
|
@spec replace(t, term) :: t
|
|
@spec replace(t, t, term) :: t
|
|
def replace(combinator \\ empty(), to_replace, value)
|
|
when is_combinator(combinator) and is_combinator(to_replace) do
|
|
value = Macro.escape(value)
|
|
quoted_constant_traverse(combinator, to_replace, {__MODULE__, :__constant__, [[value]]})
|
|
end
|
|
|
|
@doc """
|
|
Allows the combinator given on `to_repeat` to appear zero or more times.
|
|
|
|
Beware! Since `repeat/2` allows zero entries, it cannot be used inside
|
|
`choice/2`, because it will always succeed and may lead to unused function
|
|
warnings since any further choice won't ever be attempted. For example,
|
|
because `repeat/2` always succeeds, the `string/2` combinator below it
|
|
won't ever run:
|
|
|
|
choice([
|
|
repeat(ascii_char([?a..?z])),
|
|
string("OK")
|
|
])
|
|
|
|
Instead of `repeat/2`, you may want to use `times/3` with the flags `:min`
|
|
and `:max`.
|
|
|
|
Also beware! If you attempt to repeat a combinator that can match nothing,
|
|
like `optional/2`, `repeat/2` will not terminate. For example, consider
|
|
this combinator:
|
|
|
|
repeat(optional(utf8_char([?a])))
|
|
|
|
This combinator will never terminate because `repeat/2` chooses the empty
|
|
option of `optional/2` every time. Since the goal of the parser above is
|
|
to parse 0 or more `?a` characters, it can be represented by
|
|
`repeat(utf8_char([?a]))`, because `repeat/2` allows 0 or more matches.
|
|
|
|
## Examples
|
|
|
|
defmodule MyParser do
|
|
import NimbleParsec
|
|
|
|
defparsec :repeat_lower, repeat(ascii_char([?a..?z]))
|
|
end
|
|
|
|
MyParser.repeat_lower("abcd")
|
|
#=> {:ok, [?a, ?b, ?c, ?d], "", %{}, {1, 0}, 4}
|
|
|
|
MyParser.repeat_lower("1234")
|
|
#=> {:ok, [], "1234", %{}, {1, 0}, 0}
|
|
|
|
"""
|
|
@spec repeat(t) :: t
|
|
@spec repeat(t, t) :: t
|
|
@spec repeat(t, opts) :: t
|
|
@spec repeat(t, t, opts) :: t
|
|
def repeat(combinator \\ empty(), to_repeat, opts \\ [])
|
|
when is_combinator(combinator) and is_combinator(to_repeat) and is_list(opts) do
|
|
non_empty!(to_repeat, "repeat")
|
|
quoted_repeat_while(combinator, to_repeat, {__MODULE__, :__cont_context__, []}, opts)
|
|
end
|
|
|
|
@doc """
|
|
Marks the given combinator should appear eventually.
|
|
|
|
Any other data before the combinator appears is discarded.
|
|
If the combinator never appears, then it is an error.
|
|
|
|
**Note:** this can be potentially a very expensive operation
|
|
as it executes the given combinator byte by byte until finding
|
|
an eventual match or ultimately failing. For example, if you
|
|
are looking for an integer, it is preferable to discard
|
|
everything that is not an integer
|
|
|
|
ignore(ascii_string([not: ?0..?9]))
|
|
|
|
rather than eventually look for an integer
|
|
|
|
eventually(ascii_string([?0..?9]))
|
|
|
|
## Examples
|
|
|
|
defmodule MyParser do
|
|
import NimbleParsec
|
|
|
|
hour = integer(min: 1, max: 2)
|
|
defparsec :extract_hour, eventually(hour)
|
|
end
|
|
|
|
MyParser.extract_hour("let's meet at 12?")
|
|
#=> {:ok, [12], "?", %{}, {1, 0}, 16}
|
|
|
|
"""
|
|
@spec eventually(t) :: t
|
|
@spec eventually(t, t) :: t
|
|
def eventually(combinator \\ empty(), eventually)
|
|
when is_combinator(combinator) and is_combinator(eventually) do
|
|
non_empty!(eventually, "eventually")
|
|
[{:eventually, Enum.reverse(eventually)} | combinator]
|
|
end
|
|
|
|
@doc ~S"""
|
|
Repeats while the given remote or local function `while` returns
|
|
`{:cont, context}`.
|
|
|
|
If the combinator `to_repeat` stops matching, then the whole repeat
|
|
loop stops successfully, hence it is important to assert the terminated
|
|
value after repeating.
|
|
|
|
In case repetition should stop, `while` must return `{:halt, context}`.
|
|
|
|
`while` is either a `{module, function, args}` representing
|
|
a remote call, a `{function, args}` representing a local call
|
|
or an atom `function` representing `{function, []}`.
|
|
|
|
The function given in `while` will receive 4 additional arguments.
|
|
The `rest` of the binary to be parsed, the parser context, the
|
|
current line and the current offset will be prepended to the
|
|
given `args`. The `args` will be injected at the compile site
|
|
and therefore must be escapable via `Macro.escape/1`.
|
|
|
|
## Examples
|
|
|
|
defmodule MyParser do
|
|
import NimbleParsec
|
|
|
|
defparsec :string_with_quotes,
|
|
ascii_char([?"])
|
|
|> repeat_while(
|
|
choice([
|
|
~S(\") |> string() |> replace(?"),
|
|
utf8_char([])
|
|
]),
|
|
{:not_quote, []}
|
|
)
|
|
|> ascii_char([?"])
|
|
|> reduce({List, :to_string, []})
|
|
|
|
defp not_quote(<<?", _::binary>>, context, _, _), do: {:halt, context}
|
|
defp not_quote(_, context, _, _), do: {:cont, context}
|
|
end
|
|
|
|
MyParser.string_with_quotes(~S("string with quotes \" inside"))
|
|
{:ok, ["\"string with quotes \" inside\""], "", %{}, {1, 0}, 30}
|
|
|
|
Note you can use `lookahead/2` and `lookahead_not/2` with
|
|
`repeat/2` (instead of `repeat_while/3`) to write a combinator
|
|
that repeats while a combinator matches (or does not match).
|
|
For example, the same combinator above could be written as:
|
|
|
|
defmodule MyParser do
|
|
import NimbleParsec
|
|
|
|
defparsec :string_with_quotes,
|
|
ascii_char([?"])
|
|
|> repeat(
|
|
lookahead_not(ascii_char([?"]))
|
|
|> choice([
|
|
~S(\") |> string() |> replace(?"),
|
|
utf8_char([])
|
|
])
|
|
)
|
|
|> ascii_char([?"])
|
|
|> reduce({List, :to_string, []})
|
|
end
|
|
|
|
MyParser.string_with_quotes(~S("string with quotes \" inside"))
|
|
{:ok, ["\"string with quotes \" inside\""], "", %{}, {1, 0}, 30}
|
|
|
|
However, `repeat_while` is still useful when the condition to
|
|
repeat comes from the context passed around.
|
|
"""
|
|
@spec repeat_while(t, call) :: t
|
|
@spec repeat_while(t, t, call) :: t
|
|
@spec repeat_while(t, t, call, opts) :: t
|
|
def repeat_while(combinator \\ empty(), to_repeat, while, opts \\ [])
|
|
when is_combinator(combinator) and is_combinator(to_repeat) and is_list(opts) do
|
|
non_empty!(to_repeat, "repeat_while")
|
|
compile_call!([], while, "repeat_while")
|
|
quoted_repeat_while(combinator, to_repeat, {__MODULE__, :__repeat_while__, [while]}, opts)
|
|
end
|
|
|
|
@doc """
|
|
Invokes `while` to emit the AST that will repeat `to_repeat`
|
|
while the AST code returns `{:cont, context}`.
|
|
|
|
In case repetition should stop, `while` must return `{:halt, context}`.
|
|
|
|
`while` is a `{module, function, args}` and it will receive 4
|
|
additional arguments. The AST representations of the binary to be
|
|
parsed, context, line and offset will be prepended to `args`. `while`
|
|
is invoked at compile time and is useful in combinators that avoid
|
|
injecting runtime dependencies.
|
|
"""
|
|
@spec quoted_repeat_while(t, mfargs) :: t
|
|
@spec quoted_repeat_while(t, t, mfargs) :: t
|
|
@spec quoted_repeat_while(t, t, mfargs, opts) :: t
|
|
def quoted_repeat_while(combinator \\ empty(), to_repeat, {_, _, _} = while, opts \\ [])
|
|
when is_combinator(combinator) and is_combinator(to_repeat) and is_list(opts) do
|
|
non_empty!(to_repeat, "quoted_repeat_while")
|
|
[{:repeat, Enum.reverse(to_repeat), while, opts[:gen_times]} | combinator]
|
|
end
|
|
|
|
@doc """
|
|
Allow the combinator given on `to_repeat` to appear at least, at most
|
|
or exactly a given amount of times.
|
|
|
|
## Examples
|
|
|
|
defmodule MyParser do
|
|
import NimbleParsec
|
|
|
|
defparsec :minimum_lower, times(ascii_char([?a..?z]), min: 2)
|
|
end
|
|
|
|
MyParser.minimum_lower("abcd")
|
|
#=> {:ok, [?a, ?b, ?c, ?d], "", %{}, {1, 0}, 4}
|
|
|
|
MyParser.minimum_lower("ab12")
|
|
#=> {:ok, [?a, ?b], "12", %{}, {1, 0}, 2}
|
|
|
|
MyParser.minimum_lower("a123")
|
|
#=> {:ok, [], "a123", %{}, {1, 0}, 0}
|
|
|
|
"""
|
|
@spec times(t, pos_integer | [min_and_max]) :: t
|
|
@spec times(t, t, pos_integer | [min_and_max]) :: t
|
|
def times(combinator \\ empty(), to_repeat, count_or_min_max)
|
|
|
|
def times(combinator, to_repeat, n)
|
|
when is_combinator(combinator) and is_combinator(to_repeat) and is_integer(n) and n >= 1 do
|
|
non_empty!(to_repeat, "times")
|
|
duplicate(combinator, to_repeat, n)
|
|
end
|
|
|
|
def times(combinator, to_repeat, opts)
|
|
when is_combinator(combinator) and is_combinator(to_repeat) and is_list(opts) do
|
|
{min, max} = validate_min_and_max!(opts)
|
|
non_empty!(to_repeat, "times")
|
|
|
|
combinator =
|
|
if min > 0 do
|
|
duplicate(combinator, to_repeat, min)
|
|
else
|
|
combinator
|
|
end
|
|
|
|
to_repeat = Enum.reverse(to_repeat)
|
|
|
|
combinator =
|
|
if max do
|
|
[{:times, to_repeat, max - min} | combinator]
|
|
else
|
|
[{:repeat, to_repeat, {__MODULE__, :__cont_context__, []}, opts[:gen_times]} | combinator]
|
|
end
|
|
|
|
combinator
|
|
end
|
|
|
|
@doc """
|
|
Chooses one of the given combinators.
|
|
|
|
Expects at least two choices.
|
|
|
|
## Beware! Char combinators
|
|
|
|
Note both `utf8_char/2` and `ascii_char/2` allow multiple ranges to
|
|
be given. Therefore, instead this:
|
|
|
|
choice([
|
|
ascii_char([?a..?z]),
|
|
ascii_char([?A..?Z]),
|
|
])
|
|
|
|
One should simply prefer:
|
|
|
|
ascii_char([?a..?z, ?A..?Z])
|
|
|
|
As the latter is compiled more efficiently by `NimbleParsec`.
|
|
|
|
## Beware! Always successful combinators
|
|
|
|
If a combinator that always succeeds is given as a choice, that choice
|
|
will always succeed which may lead to unused function warnings since
|
|
any further choice won't ever be attempted. For example, because `repeat/2`
|
|
always succeeds, the `string/2` combinator below it won't ever run:
|
|
|
|
choice([
|
|
repeat(ascii_char([?0..?9])),
|
|
string("OK")
|
|
])
|
|
|
|
Instead of `repeat/2`, you may want to use `times/3` with the flags `:min`
|
|
and `:max`.
|
|
|
|
## Beware! Overlapping choices
|
|
|
|
In case choices overlap, there is no guarantee which error will be the one
|
|
effectively returned. For example, imagine this choice:
|
|
|
|
choice([
|
|
string("<abc>foo</abc>"),
|
|
string("<abc>")
|
|
]
|
|
|
|
Since both choices can be activated for an input starting with "abc",
|
|
NimbleParsec guarantees it will return the error from one of them, but
|
|
not which.
|
|
"""
|
|
@spec choice(nonempty_list(t)) :: t
|
|
@spec choice(t, nonempty_list(t)) :: t
|
|
@spec choice(t, nonempty_list(t), opts) :: t
|
|
def choice(combinator \\ empty(), [_, _ | _] = choices, opts \\ [])
|
|
when is_combinator(combinator) do
|
|
choices = Enum.map(choices, &Enum.reverse/1)
|
|
weights = opts[:gen_weights]
|
|
|
|
if weights && length(weights) != length(choices) do
|
|
raise ArgumentError, ":gen_weights must be a list of the same size as choices"
|
|
end
|
|
|
|
[{:choice, choices, weights} | combinator]
|
|
end
|
|
|
|
@doc """
|
|
Marks the given combinator as `optional`.
|
|
|
|
It is equivalent to `choice([combinator, empty()])`.
|
|
"""
|
|
@spec optional(t) :: t
|
|
@spec optional(t, t) :: t
|
|
def optional(combinator \\ empty(), optional) do
|
|
choice(combinator, [optional, empty()])
|
|
end
|
|
|
|
@doc """
|
|
Defines a combinator to consume the next `n` bytes from the input.
|
|
|
|
## Examples
|
|
|
|
defmodule MyParser do
|
|
import NimbleParsec
|
|
|
|
defparsec :three_bytes, bytes(3)
|
|
end
|
|
|
|
MyParser.three_bytes("abc")
|
|
#=> {:ok, ["abc"], "", %{}, {1, 0}, 3}
|
|
|
|
MyParser.three_bytes("ab")
|
|
#=> {:error, "expected 3 bytes", "ab", %{}, {1, 0}, 0}
|
|
"""
|
|
@spec bytes(pos_integer) :: t
|
|
@spec bytes(t, pos_integer) :: t
|
|
def bytes(combinator \\ empty(), count)
|
|
when is_combinator(combinator) and is_integer(count) and count > 0 do
|
|
[{:bytes, count} | combinator]
|
|
end
|
|
|
|
## Helpers
|
|
|
|
defp validate_min_and_max!(count_or_opts, required_min \\ 0)
|
|
|
|
defp validate_min_and_max!(count, required_min)
|
|
when is_integer(count) do
|
|
validate_min_and_max!([min: count], required_min)
|
|
end
|
|
|
|
defp validate_min_and_max!(opts, required_min) do
|
|
min = opts[:min]
|
|
max = opts[:max]
|
|
|
|
cond do
|
|
min && max ->
|
|
validate_min_or_max!(:min, min, required_min)
|
|
validate_min_or_max!(:max, max, 1)
|
|
|
|
max <= min and
|
|
raise ArgumentError,
|
|
"expected :max to be strictly greater than :min, got: #{min} and #{max}"
|
|
|
|
min ->
|
|
validate_min_or_max!(:min, min, required_min)
|
|
|
|
max ->
|
|
validate_min_or_max!(:max, max, 1)
|
|
|
|
true ->
|
|
raise ArgumentError, "expected :min or :max to be given"
|
|
end
|
|
|
|
{min || required_min, max}
|
|
end
|
|
|
|
defp validate_min_or_max!(kind, value, min) do
|
|
unless is_integer(value) and value >= min do
|
|
raise ArgumentError,
|
|
"expected #{kind} to be an integer greater than or equal to #{min}, " <>
|
|
"got: #{inspect(value)}"
|
|
end
|
|
end
|
|
|
|
defp split_ranges!(ranges, context) do
|
|
Enum.split_with(ranges, &split_range!(&1, context))
|
|
end
|
|
|
|
defp split_range!(x, _context) when is_integer(x), do: true
|
|
defp split_range!(_.._//step, _context) when abs(step) == 1, do: true
|
|
defp split_range!({:not, x}, _context) when is_integer(x), do: false
|
|
defp split_range!({:not, _.._//step}, _context) when abs(step) == 1, do: false
|
|
|
|
defp split_range!(range, context) do
|
|
raise ArgumentError, "unknown range #{inspect(range)} given to #{context}"
|
|
end
|
|
|
|
defp compile_call!(extra, {module, function, args}, _context)
|
|
when is_atom(module) and is_atom(function) and is_list(args) do
|
|
quote do
|
|
unquote(module).unquote(function)(
|
|
unquote_splicing(extra),
|
|
unquote_splicing(Macro.escape(args))
|
|
)
|
|
end
|
|
end
|
|
|
|
defp compile_call!(extra, {function, args}, _context)
|
|
when is_atom(function) and is_list(args) do
|
|
quote do
|
|
unquote(function)(unquote_splicing(extra), unquote_splicing(Macro.escape(args)))
|
|
end
|
|
end
|
|
|
|
defp compile_call!(extra, function, _context) when is_atom(function) do
|
|
quote do
|
|
unquote(function)(unquote_splicing(extra))
|
|
end
|
|
end
|
|
|
|
defp compile_call!(_args, unknown, context) do
|
|
raise ArgumentError, "unknown call given to #{context}, got: #{inspect(unknown)}"
|
|
end
|
|
|
|
defp non_empty!([], action) do
|
|
raise ArgumentError, "cannot call #{action} on empty combinator"
|
|
end
|
|
|
|
defp non_empty!(combinator, action) do
|
|
if Enum.any?(combinator, &is_list/1) do
|
|
raise ArgumentError,
|
|
"invalid combinator given to #{action}, got a list of combinators instead"
|
|
end
|
|
end
|
|
|
|
## Inner combinators
|
|
|
|
defp quoted_constant_traverse(combinator, to_traverse, call) do
|
|
case to_traverse do
|
|
[{:traverse, inner_traverse, :constant, inner_call}] ->
|
|
[{:traverse, inner_traverse, :constant, [call | inner_call]} | combinator]
|
|
|
|
_ ->
|
|
[{:traverse, Enum.reverse(to_traverse), :constant, [call]} | combinator]
|
|
end
|
|
end
|
|
|
|
defp quoted_traverse(combinator, to_traverse, pre_or_pos, call) do
|
|
[{:traverse, Enum.reverse(to_traverse), pre_or_pos, [call]} | combinator]
|
|
end
|
|
|
|
defp bin_segment(combinator, inclusive, exclusive, modifier) do
|
|
[{:bin_segment, inclusive, exclusive, modifier} | combinator]
|
|
end
|
|
|
|
## Traverse callbacks
|
|
|
|
@doc false
|
|
def __pre_traverse__(rest, acc, context, line, offset, call) do
|
|
compile_call!([rest, acc, context, line, offset], call, "pre_traverse")
|
|
end
|
|
|
|
@doc false
|
|
def __post_traverse__(rest, acc, context, line, offset, call) do
|
|
compile_call!([rest, acc, context, line, offset], call, "post_traverse")
|
|
end
|
|
|
|
@doc false
|
|
def __lookahead__(rest, _acc, context, line, offset, call) do
|
|
compile_call!([rest, context, line, offset], call, "lookahead")
|
|
end
|
|
|
|
@doc false
|
|
def __wrap__(rest, acc, context, _line, _offset) do
|
|
{:{}, [], [rest, [reverse_now_or_later(acc)], context]}
|
|
end
|
|
|
|
@doc false
|
|
def __tag__(rest, acc, context, _line, _offset, tag) do
|
|
{:{}, [], [rest, [{tag, reverse_now_or_later(acc)}], context]}
|
|
end
|
|
|
|
@doc false
|
|
def __unwrap_and_tag__(rest, acc, context, _line, _offset, tag) when is_list(acc) do
|
|
case acc do
|
|
[one] -> {:{}, [], [rest, [{tag, one}], context]}
|
|
many -> raise "unwrap_and_tag/3 expected a single token, got: #{inspect(many)}"
|
|
end
|
|
end
|
|
|
|
def __unwrap_and_tag__(rest, acc, context, _line, _offset, tag) do
|
|
quoted =
|
|
quote do
|
|
case :lists.reverse(unquote(acc)) do
|
|
[one] -> one
|
|
many -> raise "unwrap_and_tag/3 expected a single token, got: #{inspect(many)}"
|
|
end
|
|
end
|
|
|
|
{:{}, [], [rest, [{tag, quoted}], context]}
|
|
end
|
|
|
|
@doc false
|
|
def __debug__(rest, acc, context, line, offset) do
|
|
quote bind_quoted: [rest: rest, acc: acc, context: context, line: line, offset: offset] do
|
|
IO.puts("""
|
|
== DEBUG ==
|
|
Bin: #{inspect(rest)}
|
|
Acc: #{inspect(:lists.reverse(acc))}
|
|
Ctx: #{inspect(context)}
|
|
Lin: #{inspect(line)}
|
|
Off: #{inspect(offset)}
|
|
""")
|
|
|
|
{rest, acc, context}
|
|
end
|
|
end
|
|
|
|
@doc false
|
|
def __constant__(rest, _acc, context, _line, _offset, constant) do
|
|
{:{}, [], [rest, constant, context]}
|
|
end
|
|
|
|
@doc false
|
|
def __line__(rest, acc, context, line, _offset) do
|
|
{:{}, [], [rest, [{reverse_now_or_later(acc), line}], context]}
|
|
end
|
|
|
|
@doc false
|
|
def __byte_offset__(rest, acc, context, _line, offset) do
|
|
{:{}, [], [rest, [{reverse_now_or_later(acc), offset}], context]}
|
|
end
|
|
|
|
@doc false
|
|
def __map__(rest, acc, context, _line, _offset, var, call) do
|
|
ast =
|
|
quote do
|
|
Enum.map(unquote(acc), fn unquote(var) -> unquote(call) end)
|
|
end
|
|
|
|
{:{}, [], [rest, ast, context]}
|
|
end
|
|
|
|
@doc false
|
|
def __reduce__(rest, acc, context, _line, _offset, call) do
|
|
{:{}, [], [rest, [compile_call!([reverse_now_or_later(acc)], call, "reduce")], context]}
|
|
end
|
|
|
|
## Repeat callbacks
|
|
|
|
@doc false
|
|
def __cont_context__(_rest, context, _line, _offset) do
|
|
{:cont, context}
|
|
end
|
|
|
|
@doc false
|
|
def __repeat_while__(quoted, context, line, offset, call) do
|
|
compile_call!([quoted, context, line, offset], call, "repeat_while")
|
|
end
|
|
|
|
## Chars callbacks
|
|
|
|
defp min_max_compile_runtime_chars(combinator, to_repeat, count, compile, _runtime, args)
|
|
when is_integer(count) and count >= 0 do
|
|
chars = duplicate(to_repeat, count)
|
|
quoted_post_traverse(combinator, chars, {__MODULE__, compile, [count | args]})
|
|
end
|
|
|
|
defp min_max_compile_runtime_chars(combinator, to_repeat, opts, compile, runtime, args)
|
|
when is_list(opts) do
|
|
{min, max} = validate_min_and_max!(opts)
|
|
|
|
chars =
|
|
if min > 0 do
|
|
min_max_compile_runtime_chars(empty(), to_repeat, min, compile, runtime, args)
|
|
else
|
|
empty()
|
|
end
|
|
|
|
chars =
|
|
if max do
|
|
times(chars, to_repeat, max: max - min)
|
|
else
|
|
repeat(chars, to_repeat)
|
|
end
|
|
|
|
quoted_post_traverse(combinator, chars, {__MODULE__, runtime, [min, max | args]})
|
|
end
|
|
|
|
@doc false
|
|
def __runtime_string__(rest, acc, context, _line, _offset, _min, _max, _type) do
|
|
ast = quote(do: List.to_string(unquote(reverse_now_or_later(acc))))
|
|
{:{}, [], [rest, [ast], context]}
|
|
end
|
|
|
|
@doc false
|
|
def __compile_string__(rest, acc, context, _line, _offset, _count, type) when is_list(acc) do
|
|
acc =
|
|
for entry <- :lists.reverse(acc) do
|
|
{:"::", [], [entry, type]}
|
|
end
|
|
|
|
{:{}, [], [rest, [{:<<>>, [], acc}], context]}
|
|
end
|
|
|
|
def __compile_string__(rest, acc, context, _line, _offset, _count, _type) do
|
|
ast = quote(do: List.to_string(unquote(reverse_now_or_later(acc))))
|
|
{:{}, [], [rest, [ast], context]}
|
|
end
|
|
|
|
@doc false
|
|
def __runtime_integer__(rest, acc, context, _line, _offset, min, _max)
|
|
when is_integer(min) and min > 0 do
|
|
ast =
|
|
quote do
|
|
[head | tail] = unquote(reverse_now_or_later(acc))
|
|
[:lists.foldl(fn x, acc -> x - ?0 + acc * 10 end, head, tail)]
|
|
end
|
|
|
|
{:{}, [], [rest, ast, context]}
|
|
end
|
|
|
|
def __runtime_integer__(rest, acc, context, _line, _offset, _min, _max) do
|
|
ast =
|
|
quote do
|
|
[head | tail] = unquote(reverse_now_or_later(acc))
|
|
[:lists.foldl(fn x, acc -> x - ?0 + acc * 10 end, head - ?0, tail)]
|
|
end
|
|
|
|
{:{}, [], [rest, ast, context]}
|
|
end
|
|
|
|
@doc false
|
|
def __compile_integer__(rest, acc, context, _line, _offset, _count) when is_list(acc) do
|
|
ast =
|
|
acc
|
|
|> quoted_ascii_to_integer(1)
|
|
|> Enum.reduce(&{:+, [], [&2, &1]})
|
|
|
|
{:{}, [], [rest, [ast], context]}
|
|
end
|
|
|
|
defp reverse_now_or_later(list) when is_list(list), do: :lists.reverse(list)
|
|
defp reverse_now_or_later(expr), do: quote(do: :lists.reverse(unquote(expr)))
|
|
|
|
defp quoted_ascii_to_integer([var | vars], 1) do
|
|
[quote(do: unquote(var) - ?0) | quoted_ascii_to_integer(vars, 10)]
|
|
end
|
|
|
|
defp quoted_ascii_to_integer([var | vars], index) do
|
|
[quote(do: (unquote(var) - ?0) * unquote(index)) | quoted_ascii_to_integer(vars, index * 10)]
|
|
end
|
|
|
|
defp quoted_ascii_to_integer([], _index) do
|
|
[]
|
|
end
|
|
end
|