203 lines
		
	
	
		
			6.0 KiB
		
	
	
	
		
			Elixir
		
	
	
	
	
	
			
		
		
	
	
			203 lines
		
	
	
		
			6.0 KiB
		
	
	
	
		
			Elixir
		
	
	
	
	
	
defmodule DNSCluster do
 | 
						|
  @moduledoc """
 | 
						|
  Simple DNS based cluster discovery.
 | 
						|
 | 
						|
  A DNS query is made every `:interval` milliseconds to discover new ips.
 | 
						|
  Nodes will only be joined if their node basename matches the basename of the
 | 
						|
  current node. For example if `node()` is `myapp-123@fdaa:1:36c9:a7b:198:c4b1:73c6:1`,
 | 
						|
  a `Node.connect/1` attempt will be made against every IP returned by the DNS query,
 | 
						|
  but will only be successful if there is a node running on the remote host with the same
 | 
						|
  basename, for example `myapp-123@fdaa:1:36c9:a7b:198:c4b1:73c6:2`. Nodes running on
 | 
						|
  remote hosts, but with different basenames will fail to connect and will be ignored.
 | 
						|
 | 
						|
  ## Examples
 | 
						|
 | 
						|
  To start in your supervision tree, add the child:
 | 
						|
 | 
						|
      children = [
 | 
						|
        ...,
 | 
						|
        {DNSCluster, query: "myapp.internal"}
 | 
						|
      ]
 | 
						|
 | 
						|
  See the `start_link/1` docs for all available options.
 | 
						|
 | 
						|
  If you require more advanced clustering options and strategies, see the
 | 
						|
  [libcluster](https://hexdocs.pm/libcluster) library.
 | 
						|
  """
 | 
						|
  use GenServer
 | 
						|
  require Logger
 | 
						|
 | 
						|
  defmodule Resolver do
 | 
						|
    @moduledoc false
 | 
						|
 | 
						|
    require Record
 | 
						|
    Record.defrecord(:hostent, Record.extract(:hostent, from_lib: "kernel/include/inet.hrl"))
 | 
						|
 | 
						|
    def basename(node_name) when is_atom(node_name) do
 | 
						|
      [basename, _] = String.split(to_string(node_name), "@")
 | 
						|
      basename
 | 
						|
    end
 | 
						|
 | 
						|
    def connect_node(node_name) when is_atom(node_name), do: Node.connect(node_name)
 | 
						|
 | 
						|
    def list_nodes, do: Node.list(:visible)
 | 
						|
 | 
						|
    def lookup(query, type) when is_binary(query) and type in [:a, :aaaa] do
 | 
						|
      case :inet_res.getbyname(~c"#{query}", type) do
 | 
						|
        {:ok, hostent(h_addr_list: addr_list)} -> addr_list
 | 
						|
        {:error, _} -> []
 | 
						|
      end
 | 
						|
    end
 | 
						|
  end
 | 
						|
 | 
						|
  @doc ~S"""
 | 
						|
  Starts DNS based cluster discovery.
 | 
						|
 | 
						|
  ## Options
 | 
						|
 | 
						|
    * `:name` - the name of the cluster. Defaults to `DNSCluster`.
 | 
						|
    * `:query` - the required DNS query for node discovery, for example: `"myapp.internal"`.
 | 
						|
      The value `:ignore` can be used to ignore starting the DNSCluster.
 | 
						|
    * `:interval` - the millisec interval between DNS queries. Defaults to `5000`.
 | 
						|
    * `:connect_timeout` - the millisec timeout to allow discovered nodes to connect.
 | 
						|
      Defaults to `10_000`.
 | 
						|
 | 
						|
  ## Examples
 | 
						|
 | 
						|
      iex> DNSCluster.start_link(query: "myapp.internal")
 | 
						|
      {:ok, pid}
 | 
						|
 | 
						|
      iex> DNSCluster.start_link(query: :ignore)
 | 
						|
      :ignore
 | 
						|
  """
 | 
						|
  def start_link(opts) do
 | 
						|
    GenServer.start_link(__MODULE__, opts, name: Keyword.get(opts, :name, __MODULE__))
 | 
						|
  end
 | 
						|
 | 
						|
  @impl true
 | 
						|
  def init(opts) do
 | 
						|
    case Keyword.fetch(opts, :query) do
 | 
						|
      {:ok, :ignore} ->
 | 
						|
        :ignore
 | 
						|
 | 
						|
      {:ok, query} when is_binary(query) ->
 | 
						|
        warn_on_invalid_dist()
 | 
						|
        resolver = Keyword.get(opts, :resolver, Resolver)
 | 
						|
 | 
						|
        state = %{
 | 
						|
          interval: Keyword.get(opts, :interval, 5_000),
 | 
						|
          basename: resolver.basename(node()),
 | 
						|
          query: query,
 | 
						|
          log: Keyword.get(opts, :log, false),
 | 
						|
          poll_timer: nil,
 | 
						|
          connect_timeout: Keyword.get(opts, :connect_timeout, 10_000),
 | 
						|
          resolver: resolver
 | 
						|
        }
 | 
						|
 | 
						|
        {:ok, state, {:continue, :discover_ips}}
 | 
						|
 | 
						|
      {:ok, other} ->
 | 
						|
        raise ArgumentError, "expected :query to be a string, got: #{inspect(other)}"
 | 
						|
 | 
						|
      :error ->
 | 
						|
        raise ArgumentError, "missing required :query option in #{inspect(opts)}"
 | 
						|
    end
 | 
						|
  end
 | 
						|
 | 
						|
  @impl true
 | 
						|
  def handle_continue(:discover_ips, state) do
 | 
						|
    {:noreply, do_discovery(state)}
 | 
						|
  end
 | 
						|
 | 
						|
  @impl true
 | 
						|
  def handle_info(:discover_ips, state) do
 | 
						|
    {:noreply, do_discovery(state)}
 | 
						|
  end
 | 
						|
 | 
						|
  defp do_discovery(state) do
 | 
						|
    state
 | 
						|
    |> connect_new_nodes()
 | 
						|
    |> schedule_next_poll()
 | 
						|
  end
 | 
						|
 | 
						|
  defp connect_new_nodes(%{resolver: resolver, connect_timeout: timeout} = state) do
 | 
						|
    node_names = for name <- resolver.list_nodes(), into: MapSet.new(), do: to_string(name)
 | 
						|
 | 
						|
    ips = discover_ips(state)
 | 
						|
 | 
						|
    _results =
 | 
						|
      ips
 | 
						|
      |> Enum.map(fn ip -> "#{state.basename}@#{ip}" end)
 | 
						|
      |> Enum.filter(fn node_name -> !Enum.member?(node_names, node_name) end)
 | 
						|
      |> Task.async_stream(
 | 
						|
        fn new_name ->
 | 
						|
          if resolver.connect_node(:"#{new_name}") do
 | 
						|
            log(state, "#{node()} connected to #{new_name}")
 | 
						|
          end
 | 
						|
        end,
 | 
						|
        max_concurrency: max(1, length(ips)),
 | 
						|
        timeout: timeout
 | 
						|
      )
 | 
						|
      |> Enum.to_list()
 | 
						|
 | 
						|
    state
 | 
						|
  end
 | 
						|
 | 
						|
  defp log(state, msg) do
 | 
						|
    if level = state.log, do: Logger.log(level, msg)
 | 
						|
  end
 | 
						|
 | 
						|
  defp schedule_next_poll(state) do
 | 
						|
    %{state | poll_timer: Process.send_after(self(), :discover_ips, state.interval)}
 | 
						|
  end
 | 
						|
 | 
						|
  defp discover_ips(%{resolver: resolver, query: query}) do
 | 
						|
    [:a, :aaaa]
 | 
						|
    |> Enum.flat_map(&resolver.lookup(query, &1))
 | 
						|
    |> Enum.uniq()
 | 
						|
    |> Enum.map(&to_string(:inet.ntoa(&1)))
 | 
						|
  end
 | 
						|
 | 
						|
  defp warn_on_invalid_dist do
 | 
						|
    release? = is_binary(System.get_env("RELEASE_NAME"))
 | 
						|
    net_state = if function_exported?(:net_kernel, :get_state, 0), do: :net_kernel.get_state()
 | 
						|
 | 
						|
    cond do
 | 
						|
      !net_state ->
 | 
						|
        :ok
 | 
						|
 | 
						|
      net_state.started == :no and release? ->
 | 
						|
        Logger.warning("""
 | 
						|
        node not running in distributed mode. Ensure the following exports are set in your rel/env.sh.eex file:
 | 
						|
 | 
						|
            #!/bin/sh
 | 
						|
 | 
						|
            export RELEASE_DISTRIBUTION=name
 | 
						|
            export RELEASE_NODE="myapp@fully-qualified-host-or-ip"
 | 
						|
        """)
 | 
						|
 | 
						|
      net_state.started == :no or
 | 
						|
          (!release? and net_state.started != :no and net_state[:name_domain] != :longnames) ->
 | 
						|
        Logger.warning("""
 | 
						|
        node not running in distributed mode. When running outside of a release, you must start net_kernel manually with
 | 
						|
        longnames.
 | 
						|
        https://www.erlang.org/doc/man/net_kernel.html#start-2
 | 
						|
        """)
 | 
						|
 | 
						|
      net_state[:name_domain] != :longnames and release? ->
 | 
						|
        Logger.warning("""
 | 
						|
        node not running with longnames which are required for DNS discovery.
 | 
						|
        Ensure the following exports are set in your rel/env.sh.eex file:
 | 
						|
 | 
						|
            #!/bin/sh
 | 
						|
 | 
						|
            export RELEASE_DISTRIBUTION=name
 | 
						|
            export RELEASE_NODE="myapp@fully-qualified-host-or-ip"
 | 
						|
        """)
 | 
						|
 | 
						|
      true ->
 | 
						|
        :ok
 | 
						|
    end
 | 
						|
  end
 | 
						|
end
 |