Files
voice_recognition/whisper/deps/gettext/lib/gettext/merger.ex

320 lines
11 KiB
Elixir

defmodule Gettext.Merger do
@moduledoc false
alias Expo.PO
alias Expo.Message
alias Expo.Messages
alias Gettext.Fuzzy
alias Gettext.Plural
@new_po_informative_comment """
# "msgid"s in this file come from POT (.pot) files.
##
## Do not add, change, or remove "msgid"s manually here as
## they're tied to the ones in the corresponding POT file
## (with the same domain).
##
## Use "mix gettext.extract --merge" or "mix gettext.merge"
## to merge POT files into PO files.
"""
@doc """
Merges two `Gettext.PO` structs representing a PO file and an updated POT (or
PO) file into a new `Gettext.PO` struct.
`old` is an existing PO file (that contains messages) which will be
"updated" with the messages in the `new` POT or PO file. messages in
`old` will kept as long as they match with messages in `new`; all other
messages will be discarded (as `new` is considered to be the reference).
The `Gettext.PO` struct that this function returns is *always* meant to be a PO
file, not a POT file.
`new` can be:
* a POT file (usually created or updated by the `mix gettext.extract` task) or
* a newly created PO file with up-to-date source references (but old messages)
Note that all translator comments in `new` will be discarded in favour of the
ones in `old`. Reference comments and extracted comments will be taken from
`new` instead.
The following rules are observed:
* matching messages are merged as follows:
* existing msgstr are preserved (the ones in the POT file are empty anyways)
* existing translator comments are preserved (there are no translator
comments in POT files)
* existing extracted comments are replaced by new extracted comments
* existing references are discarded (as they're now outdated) and replaced
by the references in the POT file
"""
@spec merge(Messages.t(), Messages.t(), String.t(), Keyword.t(), Keyword.t()) ::
{Messages.t(), map()}
def merge(%Messages{} = old, %Messages{} = new, locale, opts, gettext_config)
when is_binary(locale) and is_list(opts) do
opts = put_plural_forms_opt(opts, old, locale)
stats = %{new: 0, exact_matches: 0, fuzzy_matches: 0, removed: 0, marked_as_obsolete: 0}
{messages, stats} = merge_messages(old.messages, new.messages, opts, gettext_config, stats)
po = %Messages{
top_comments: old.top_comments,
headers: old.headers,
file: old.file,
messages: messages
}
{po, stats}
end
defp merge_messages(old, new, opts, gettext_config, stats) do
fuzzy? = Keyword.fetch!(opts, :fuzzy)
fuzzy_threshold = Keyword.fetch!(opts, :fuzzy_threshold)
plural_forms = Keyword.fetch!(opts, :plural_forms)
custom_flags_to_keep = Keyword.get(gettext_config, :custom_flags_to_keep, [])
old = Map.new(old, &{Message.key(&1), &1})
{messages, {stats, unused}} =
Enum.map_reduce(new, {stats, _unused = old}, fn message, {stats_acc, unused} ->
key = Message.key(message)
message = adjust_number_of_plural_forms(message, plural_forms)
case Map.fetch(old, key) do
{:ok, exact_match} ->
stats = update_in(stats_acc.exact_matches, &(&1 + 1))
{merge_two_messages(exact_match, message, custom_flags_to_keep),
{stats, Map.delete(unused, key)}}
:error when fuzzy? ->
case maybe_merge_fuzzy(message, old, key, fuzzy_threshold) do
{:matched, match, fuzzy_merged} ->
stats_acc = update_in(stats_acc.fuzzy_matches, &(&1 + 1))
unused = Map.delete(unused, Message.key(match))
fuzzy_merged =
if Keyword.get(opts, :store_previous_message_on_fuzzy_match, false) do
Map.update!(fuzzy_merged, :previous_messages, fn previous ->
Enum.uniq_by(previous ++ [match], &Message.key/1)
end)
else
fuzzy_merged
end
{fuzzy_merged, {stats_acc, unused}}
:nomatch ->
stats_acc = update_in(stats_acc.new, &(&1 + 1))
{message, {stats_acc, unused}}
end
:error ->
stats_acc = update_in(stats_acc.new, &(&1 + 1))
{message, {stats_acc, unused}}
end
end)
messages = Enum.map(messages, &%{&1 | obsolete: false})
{messages, stats} =
case Keyword.get(opts, :on_obsolete, :delete) do
:mark_as_obsolete ->
{messages ++ (unused |> Map.values() |> Enum.map(&%{&1 | obsolete: true})),
put_in(stats.marked_as_obsolete, map_size(unused))}
:delete ->
{messages, put_in(stats.removed, map_size(unused))}
end
{messages, stats}
end
defp adjust_number_of_plural_forms(%Message.Plural{} = message, plural_forms)
when plural_forms > 0 do
new_msgstr = Map.new(0..(plural_forms - 1), &{&1, [""]})
%{message | msgstr: new_msgstr}
end
defp adjust_number_of_plural_forms(%Message.Singular{} = message, _plural_forms) do
message
end
defp maybe_merge_fuzzy(message, old, key, fuzzy_threshold) do
if matched = find_fuzzy_match(old, key, fuzzy_threshold) do
{:matched, matched, Fuzzy.merge(message, matched)}
else
:nomatch
end
end
defp find_fuzzy_match(messages, key, threshold) do
matcher = Fuzzy.matcher(threshold)
candidates =
for {k, message} <- messages,
match = matcher.(k, key),
match != :nomatch,
do: {message, match}
if candidates == [] do
nil
else
{message, _match} = Enum.max_by(candidates, fn {_t, {:match, distance}} -> distance end)
message
end
end
# msgid, msgid_plural: they're the same
# msgctxt: it's the same, even if it's not present (nil)
# msgstr: new.msgstr should be empty since it comes from a POT file
# comments: new has no translator comments as it comes from POT
# extracted_comments: we should take the new most recent ones
# flags: we should take the new flags and preserve the fuzzy flag
# references: new contains the updated and most recent references
defp merge_two_messages(old, new, custom_flags_to_keep) do
old
|> Message.merge(new)
|> Map.merge(%{
comments: old.comments,
extracted_comments: new.extracted_comments,
flags: merge_flags(old, new, custom_flags_to_keep),
references: new.references
})
end
defp merge_flags(old_message, new_message, custom_flags_to_keep) do
# Force the "fuzzy" flag.
flags_to_keep = Enum.uniq(["fuzzy" | custom_flags_to_keep])
%{flags: flags} =
Enum.reduce(flags_to_keep, new_message, fn flag, message ->
if Message.has_flag?(old_message, flag) do
Message.append_flag(message, flag)
else
message
end
end)
flags
end
@doc """
Returns the contents of a new PO file to be written at `po_file` from the POT
template in `pot_file`.
The new PO file will have:
* the `Language` header set based on the locale (extracted from the path)
* the messages of the POT file (no merging is needed as there are no
messages in the PO file)
Comments in `pot_file` that start with `##` will be discarded and not copied
over the new PO file as they're meant to be comments generated by tools or
comments directed to developers.
"""
def new_po_file(po_file, pot_file, locale, opts) when is_binary(locale) and is_list(opts) do
pot = PO.parse_file!(pot_file)
opts = put_plural_forms_opt(opts, pot, locale)
plural_forms = Keyword.fetch!(opts, :plural_forms)
plural_forms_header = Keyword.fetch!(opts, :plural_forms_header)
po = %Messages{
top_comments: String.split(@new_po_informative_comment, "\n", trim: true),
headers: headers_for_new_po_file(locale, plural_forms_header),
file: po_file,
messages: Enum.map(pot.messages, &prepare_new_message(&1, plural_forms))
}
stats = %{
new: length(po.messages),
exact_matches: 0,
fuzzy_matches: 0,
removed: 0,
marked_as_obsolete: 0
}
{po, stats}
end
@doc false
@spec prune_references(messages :: Messages.t(), gettext_config :: Keyword.t()) :: Messages.t()
def prune_references(%Messages{} = all, gettext_config) when is_list(gettext_config) do
cond do
# Empty out all references.
not Keyword.get(gettext_config, :write_reference_comments, true) ->
put_in(all, [Access.key!(:messages), Access.all(), Access.key(:references)], [])
# Remove lines from references and unique them.
not Keyword.get(gettext_config, :write_reference_line_numbers, true) ->
update_in(
all,
[Access.key!(:messages), Access.all(), Access.key(:references)],
&remove_line_and_unique_references/1
)
true ->
all
end
end
defp remove_line_and_unique_references(references) do
{unique_refs, _} =
references
|> update_in([Access.all(), Access.all()], fn
{file, _line} -> file
file -> file
end)
|> Enum.map_reduce(MapSet.new(), fn line, existing_references ->
unique_line = Enum.uniq(line) -- MapSet.to_list(existing_references)
{unique_line, MapSet.union(existing_references, MapSet.new(unique_line))}
end)
Enum.reject(unique_refs, &match?([], &1))
end
defp headers_for_new_po_file(locale, plural_forms_header) do
[
"",
~s(Language: #{locale}\n),
~s(Plural-Forms: #{plural_forms_header}\n)
]
end
defp prepare_new_message(message, plural_forms) do
message
|> strip_double_hash_comments()
|> adjust_number_of_plural_forms(plural_forms)
end
defp strip_double_hash_comments(%{comments: comments} = message) do
%{message | comments: Enum.reject(comments, &match?("#" <> _, &1))}
end
# TODO: simplify code here once we remove support for :plural_forms.
defp put_plural_forms_opt(opts, messages, locale) do
plural_mod = Application.get_env(:gettext, :plural_forms, Gettext.Plural)
default_nplurals = plural_mod.nplurals(Plural.plural_info(locale, messages, plural_mod))
opts = Keyword.put_new(opts, :plural_forms, default_nplurals)
Keyword.put_new_lazy(opts, :plural_forms_header, fn ->
requested_nplurals = Keyword.fetch!(opts, :plural_forms)
# If nplurals is overridden to a non-default value by the user the
# implementation will not be able to provide a correct header therefore
# the header is just set to `nplurals=#{n}` and it is up to the user to
# put a complete plural forms header themselves.
if requested_nplurals == default_nplurals do
Plural.plural_forms_header_impl(locale, messages, plural_mod)
else
"nplurals=#{requested_nplurals}"
end
end)
end
end