320 lines
11 KiB
Elixir
320 lines
11 KiB
Elixir
defmodule Gettext.Merger do
|
|
@moduledoc false
|
|
|
|
alias Expo.PO
|
|
alias Expo.Message
|
|
alias Expo.Messages
|
|
alias Gettext.Fuzzy
|
|
alias Gettext.Plural
|
|
|
|
@new_po_informative_comment """
|
|
# "msgid"s in this file come from POT (.pot) files.
|
|
##
|
|
## Do not add, change, or remove "msgid"s manually here as
|
|
## they're tied to the ones in the corresponding POT file
|
|
## (with the same domain).
|
|
##
|
|
## Use "mix gettext.extract --merge" or "mix gettext.merge"
|
|
## to merge POT files into PO files.
|
|
"""
|
|
|
|
@doc """
|
|
Merges two `Gettext.PO` structs representing a PO file and an updated POT (or
|
|
PO) file into a new `Gettext.PO` struct.
|
|
|
|
`old` is an existing PO file (that contains messages) which will be
|
|
"updated" with the messages in the `new` POT or PO file. messages in
|
|
`old` will kept as long as they match with messages in `new`; all other
|
|
messages will be discarded (as `new` is considered to be the reference).
|
|
|
|
The `Gettext.PO` struct that this function returns is *always* meant to be a PO
|
|
file, not a POT file.
|
|
|
|
`new` can be:
|
|
|
|
* a POT file (usually created or updated by the `mix gettext.extract` task) or
|
|
* a newly created PO file with up-to-date source references (but old messages)
|
|
|
|
Note that all translator comments in `new` will be discarded in favour of the
|
|
ones in `old`. Reference comments and extracted comments will be taken from
|
|
`new` instead.
|
|
|
|
The following rules are observed:
|
|
|
|
* matching messages are merged as follows:
|
|
* existing msgstr are preserved (the ones in the POT file are empty anyways)
|
|
* existing translator comments are preserved (there are no translator
|
|
comments in POT files)
|
|
* existing extracted comments are replaced by new extracted comments
|
|
* existing references are discarded (as they're now outdated) and replaced
|
|
by the references in the POT file
|
|
|
|
"""
|
|
@spec merge(Messages.t(), Messages.t(), String.t(), Keyword.t(), Keyword.t()) ::
|
|
{Messages.t(), map()}
|
|
def merge(%Messages{} = old, %Messages{} = new, locale, opts, gettext_config)
|
|
when is_binary(locale) and is_list(opts) do
|
|
opts = put_plural_forms_opt(opts, old, locale)
|
|
|
|
stats = %{new: 0, exact_matches: 0, fuzzy_matches: 0, removed: 0, marked_as_obsolete: 0}
|
|
|
|
{messages, stats} = merge_messages(old.messages, new.messages, opts, gettext_config, stats)
|
|
|
|
po = %Messages{
|
|
top_comments: old.top_comments,
|
|
headers: old.headers,
|
|
file: old.file,
|
|
messages: messages
|
|
}
|
|
|
|
{po, stats}
|
|
end
|
|
|
|
defp merge_messages(old, new, opts, gettext_config, stats) do
|
|
fuzzy? = Keyword.fetch!(opts, :fuzzy)
|
|
fuzzy_threshold = Keyword.fetch!(opts, :fuzzy_threshold)
|
|
plural_forms = Keyword.fetch!(opts, :plural_forms)
|
|
custom_flags_to_keep = Keyword.get(gettext_config, :custom_flags_to_keep, [])
|
|
|
|
old = Map.new(old, &{Message.key(&1), &1})
|
|
|
|
{messages, {stats, unused}} =
|
|
Enum.map_reduce(new, {stats, _unused = old}, fn message, {stats_acc, unused} ->
|
|
key = Message.key(message)
|
|
message = adjust_number_of_plural_forms(message, plural_forms)
|
|
|
|
case Map.fetch(old, key) do
|
|
{:ok, exact_match} ->
|
|
stats = update_in(stats_acc.exact_matches, &(&1 + 1))
|
|
|
|
{merge_two_messages(exact_match, message, custom_flags_to_keep),
|
|
{stats, Map.delete(unused, key)}}
|
|
|
|
:error when fuzzy? ->
|
|
case maybe_merge_fuzzy(message, old, key, fuzzy_threshold) do
|
|
{:matched, match, fuzzy_merged} ->
|
|
stats_acc = update_in(stats_acc.fuzzy_matches, &(&1 + 1))
|
|
unused = Map.delete(unused, Message.key(match))
|
|
|
|
fuzzy_merged =
|
|
if Keyword.get(opts, :store_previous_message_on_fuzzy_match, false) do
|
|
Map.update!(fuzzy_merged, :previous_messages, fn previous ->
|
|
Enum.uniq_by(previous ++ [match], &Message.key/1)
|
|
end)
|
|
else
|
|
fuzzy_merged
|
|
end
|
|
|
|
{fuzzy_merged, {stats_acc, unused}}
|
|
|
|
:nomatch ->
|
|
stats_acc = update_in(stats_acc.new, &(&1 + 1))
|
|
{message, {stats_acc, unused}}
|
|
end
|
|
|
|
:error ->
|
|
stats_acc = update_in(stats_acc.new, &(&1 + 1))
|
|
{message, {stats_acc, unused}}
|
|
end
|
|
end)
|
|
|
|
messages = Enum.map(messages, &%{&1 | obsolete: false})
|
|
|
|
{messages, stats} =
|
|
case Keyword.get(opts, :on_obsolete, :delete) do
|
|
:mark_as_obsolete ->
|
|
{messages ++ (unused |> Map.values() |> Enum.map(&%{&1 | obsolete: true})),
|
|
put_in(stats.marked_as_obsolete, map_size(unused))}
|
|
|
|
:delete ->
|
|
{messages, put_in(stats.removed, map_size(unused))}
|
|
end
|
|
|
|
{messages, stats}
|
|
end
|
|
|
|
defp adjust_number_of_plural_forms(%Message.Plural{} = message, plural_forms)
|
|
when plural_forms > 0 do
|
|
new_msgstr = Map.new(0..(plural_forms - 1), &{&1, [""]})
|
|
%{message | msgstr: new_msgstr}
|
|
end
|
|
|
|
defp adjust_number_of_plural_forms(%Message.Singular{} = message, _plural_forms) do
|
|
message
|
|
end
|
|
|
|
defp maybe_merge_fuzzy(message, old, key, fuzzy_threshold) do
|
|
if matched = find_fuzzy_match(old, key, fuzzy_threshold) do
|
|
{:matched, matched, Fuzzy.merge(message, matched)}
|
|
else
|
|
:nomatch
|
|
end
|
|
end
|
|
|
|
defp find_fuzzy_match(messages, key, threshold) do
|
|
matcher = Fuzzy.matcher(threshold)
|
|
|
|
candidates =
|
|
for {k, message} <- messages,
|
|
match = matcher.(k, key),
|
|
match != :nomatch,
|
|
do: {message, match}
|
|
|
|
if candidates == [] do
|
|
nil
|
|
else
|
|
{message, _match} = Enum.max_by(candidates, fn {_t, {:match, distance}} -> distance end)
|
|
message
|
|
end
|
|
end
|
|
|
|
# msgid, msgid_plural: they're the same
|
|
# msgctxt: it's the same, even if it's not present (nil)
|
|
# msgstr: new.msgstr should be empty since it comes from a POT file
|
|
# comments: new has no translator comments as it comes from POT
|
|
# extracted_comments: we should take the new most recent ones
|
|
# flags: we should take the new flags and preserve the fuzzy flag
|
|
# references: new contains the updated and most recent references
|
|
|
|
defp merge_two_messages(old, new, custom_flags_to_keep) do
|
|
old
|
|
|> Message.merge(new)
|
|
|> Map.merge(%{
|
|
comments: old.comments,
|
|
extracted_comments: new.extracted_comments,
|
|
flags: merge_flags(old, new, custom_flags_to_keep),
|
|
references: new.references
|
|
})
|
|
end
|
|
|
|
defp merge_flags(old_message, new_message, custom_flags_to_keep) do
|
|
# Force the "fuzzy" flag.
|
|
flags_to_keep = Enum.uniq(["fuzzy" | custom_flags_to_keep])
|
|
|
|
%{flags: flags} =
|
|
Enum.reduce(flags_to_keep, new_message, fn flag, message ->
|
|
if Message.has_flag?(old_message, flag) do
|
|
Message.append_flag(message, flag)
|
|
else
|
|
message
|
|
end
|
|
end)
|
|
|
|
flags
|
|
end
|
|
|
|
@doc """
|
|
Returns the contents of a new PO file to be written at `po_file` from the POT
|
|
template in `pot_file`.
|
|
|
|
The new PO file will have:
|
|
|
|
* the `Language` header set based on the locale (extracted from the path)
|
|
* the messages of the POT file (no merging is needed as there are no
|
|
messages in the PO file)
|
|
|
|
Comments in `pot_file` that start with `##` will be discarded and not copied
|
|
over the new PO file as they're meant to be comments generated by tools or
|
|
comments directed to developers.
|
|
"""
|
|
def new_po_file(po_file, pot_file, locale, opts) when is_binary(locale) and is_list(opts) do
|
|
pot = PO.parse_file!(pot_file)
|
|
opts = put_plural_forms_opt(opts, pot, locale)
|
|
plural_forms = Keyword.fetch!(opts, :plural_forms)
|
|
plural_forms_header = Keyword.fetch!(opts, :plural_forms_header)
|
|
|
|
po = %Messages{
|
|
top_comments: String.split(@new_po_informative_comment, "\n", trim: true),
|
|
headers: headers_for_new_po_file(locale, plural_forms_header),
|
|
file: po_file,
|
|
messages: Enum.map(pot.messages, &prepare_new_message(&1, plural_forms))
|
|
}
|
|
|
|
stats = %{
|
|
new: length(po.messages),
|
|
exact_matches: 0,
|
|
fuzzy_matches: 0,
|
|
removed: 0,
|
|
marked_as_obsolete: 0
|
|
}
|
|
|
|
{po, stats}
|
|
end
|
|
|
|
@doc false
|
|
@spec prune_references(messages :: Messages.t(), gettext_config :: Keyword.t()) :: Messages.t()
|
|
def prune_references(%Messages{} = all, gettext_config) when is_list(gettext_config) do
|
|
cond do
|
|
# Empty out all references.
|
|
not Keyword.get(gettext_config, :write_reference_comments, true) ->
|
|
put_in(all, [Access.key!(:messages), Access.all(), Access.key(:references)], [])
|
|
|
|
# Remove lines from references and unique them.
|
|
not Keyword.get(gettext_config, :write_reference_line_numbers, true) ->
|
|
update_in(
|
|
all,
|
|
[Access.key!(:messages), Access.all(), Access.key(:references)],
|
|
&remove_line_and_unique_references/1
|
|
)
|
|
|
|
true ->
|
|
all
|
|
end
|
|
end
|
|
|
|
defp remove_line_and_unique_references(references) do
|
|
{unique_refs, _} =
|
|
references
|
|
|> update_in([Access.all(), Access.all()], fn
|
|
{file, _line} -> file
|
|
file -> file
|
|
end)
|
|
|> Enum.map_reduce(MapSet.new(), fn line, existing_references ->
|
|
unique_line = Enum.uniq(line) -- MapSet.to_list(existing_references)
|
|
{unique_line, MapSet.union(existing_references, MapSet.new(unique_line))}
|
|
end)
|
|
|
|
Enum.reject(unique_refs, &match?([], &1))
|
|
end
|
|
|
|
defp headers_for_new_po_file(locale, plural_forms_header) do
|
|
[
|
|
"",
|
|
~s(Language: #{locale}\n),
|
|
~s(Plural-Forms: #{plural_forms_header}\n)
|
|
]
|
|
end
|
|
|
|
defp prepare_new_message(message, plural_forms) do
|
|
message
|
|
|> strip_double_hash_comments()
|
|
|> adjust_number_of_plural_forms(plural_forms)
|
|
end
|
|
|
|
defp strip_double_hash_comments(%{comments: comments} = message) do
|
|
%{message | comments: Enum.reject(comments, &match?("#" <> _, &1))}
|
|
end
|
|
|
|
# TODO: simplify code here once we remove support for :plural_forms.
|
|
defp put_plural_forms_opt(opts, messages, locale) do
|
|
plural_mod = Application.get_env(:gettext, :plural_forms, Gettext.Plural)
|
|
default_nplurals = plural_mod.nplurals(Plural.plural_info(locale, messages, plural_mod))
|
|
|
|
opts = Keyword.put_new(opts, :plural_forms, default_nplurals)
|
|
|
|
Keyword.put_new_lazy(opts, :plural_forms_header, fn ->
|
|
requested_nplurals = Keyword.fetch!(opts, :plural_forms)
|
|
|
|
# If nplurals is overridden to a non-default value by the user the
|
|
# implementation will not be able to provide a correct header therefore
|
|
# the header is just set to `nplurals=#{n}` and it is up to the user to
|
|
# put a complete plural forms header themselves.
|
|
if requested_nplurals == default_nplurals do
|
|
Plural.plural_forms_header_impl(locale, messages, plural_mod)
|
|
else
|
|
"nplurals=#{requested_nplurals}"
|
|
end
|
|
end)
|
|
end
|
|
end
|