api-v2/deps/hackney/src/hackney_url.erl
2025-04-16 10:03:13 -03:00

506 lines
17 KiB
Erlang

%% -*- erlang -*-
%%%
%%% This file is part of hackney_lib released under the Apache 2 license.
%%% See the NOTICE for more information.
%%%
%%% Copyright (c) 2012-2015 Benoît Chesneau <benoitc@e-engura.org>
%%% Copyright (c) 2011, Magnus Klaar <magnus.klaar@gmail.com>
%%%
%% @doc module to manage URLs.
-module(hackney_url).
-export([parse_url/1,
transport_scheme/1,
unparse_url/1,
urldecode/1, urldecode/2,
urlencode/1, urlencode/2,
parse_qs/1,
qs/1, qs/2,
make_url/3,
fix_path/1,
pathencode/1,
normalize/1, normalize/2,
property/2]).
-export([idnconvert_hostname/1]).
-include("hackney_lib.hrl").
-type qs_vals() :: [{binary() | atom() | list() | integer(), binary() | true}].
-type qs_opt() :: noplus | upper.
%% @doc Parse an URL and return a #hackney_url record.
-spec parse_url(URL::binary()|list()) -> hackney_url().
parse_url(URL) when is_list(URL) ->
case unicode:characters_to_binary(URL) of
URL1 when is_binary(URL1) ->
parse_url(URL1);
_ ->
parse_url(unicode:characters_to_binary(list_to_binary(URL)))
end;
parse_url(<<"http://", Rest/binary>>) ->
parse_url(Rest, #hackney_url{transport=hackney_tcp,
scheme=http});
parse_url(<<"https://", Rest/binary>>) ->
parse_url(Rest, #hackney_url{transport=hackney_ssl,
scheme=https});
parse_url(<<"http+unix://", Rest/binary>>) ->
parse_url(Rest, #hackney_url{transport=hackney_local_tcp, scheme=http_unix});
parse_url(URL) ->
parse_url(URL, #hackney_url{transport=hackney_tcp, scheme=http}).
parse_url(URL, S) ->
{URL1, Fragment} = cut_fragment(URL),
{URL2, Query} = cut_query(URL1),
RawPath = << (raw_query(Query))/binary, (raw_fragment(Fragment))/binary >>,
case binary:split(URL2, <<"/">>) of
[URL2] ->
parse_addr1(URL2, S#hackney_url{raw_path = RawPath,
path = <<>>,
qs = Query,
fragment = Fragment});
[Addr, <<>>] ->
Path = <<"/">>,
parse_addr1(Addr, S#hackney_url{raw_path = << Path/binary, RawPath/binary >>,
path = Path,
qs = Query,
fragment = Fragment});
[Addr, Path] ->
parse_addr(Addr, S#hackney_url{raw_path = <<"/", Path/binary, RawPath/binary >>,
path = <<"/", Path/binary >>,
qs = Query,
fragment = Fragment})
end.
raw_fragment(<<"">>) -> <<"">>;
raw_fragment(Fragment) -> <<"#", Fragment/binary>>.
raw_query(<<>>) -> <<>>;
raw_query(Query) -> <<"?", Query/binary>>.
property(transport, URL) -> URL#hackney_url.transport;
property(scheme, URL) -> URL#hackney_url.scheme;
property(netloc, URL) -> URL#hackney_url.netloc;
property(raw_path, URL) -> URL#hackney_url.raw_path;
property(path, URL) -> URL#hackney_url.path;
property(qs, URL) -> URL#hackney_url.qs;
property(fragment, URL) -> URL#hackney_url.fragment;
property(host, URL) -> URL#hackney_url.host;
property(port, URL) -> URL#hackney_url.port;
property(user, URL) -> URL#hackney_url.user;
property(password, URL) -> URL#hackney_url.password;
property(_, _) -> erlang:error(badarg).
%% @doc Normalizes the encoding of an URL.
%% Use the {@link hackney_url:pathencode/1} to encode an URL.
-spec normalize(URL) -> NormalizedUrl when
URL :: binary() | list() | hackney_url(),
NormalizedUrl :: hackney_url().
normalize(Url) ->
normalize(Url, fun hackney_url:pathencode/1).
%% @doc Normalizes the encoding of an URL.
-spec normalize(URL, Fun) -> NormalizedUrl when
URL :: binary() | list() | hackney_url(),
Fun :: fun(),
NormalizedUrl :: hackney_url().
normalize(Url, Fun) when is_list(Url) orelse is_binary(Url) ->
normalize(parse_url(Url), Fun);
normalize(#hackney_url{}=Url, Fun) when is_function(Fun, 1) ->
#hackney_url{scheme=Scheme,
host = Host0,
port = Port,
netloc = Netloc0,
path = Path} = Url,
{Host, Netloc} = case inet_parse:address(Host0) of
{ok, {_, _, _, _}} ->
{Host0, Netloc0};
{ok, {_, _, _, _, _, _, _, _}} ->
{Host0, Netloc0};
_ ->
Host1 = binary_to_list(
urldecode(unicode:characters_to_binary(Host0))
),
%% encode domain if needed
Host2 = case Scheme of
http_unix -> Host1;
_ -> idnconvert_hostname(Host1)
end,
Netloc1 = case {Scheme, Port} of
{http, 80} -> list_to_binary(Host2);
{https, 443} -> list_to_binary(Host2);
{http_unix, _} -> list_to_binary(Host2);
_ ->
iolist_to_binary([Host2, ":", integer_to_list(Port)])
end,
{Host2, Netloc1}
end,
Path1 = Fun(Path),
Url#hackney_url{host=Host, netloc=Netloc, path=Path1}.
transport_scheme(hackney_tcp) ->
http;
transport_scheme(hackney_ssl) ->
https;
transport_scheme(hackney_local_tcp) ->
http_unix.
is_ascii(Host) ->
lists:all(fun(C) -> idna_ucs:is_ascii(C) end, Host).
idnconvert_hostname(Host) ->
case is_ascii(Host) of
true ->
Host;
false ->
idna:utf8_to_ascii(Host)
end.
unparse_url(#hackney_url{}=Url) ->
#hackney_url{scheme = Scheme,
netloc = Netloc,
path = Path,
qs = Qs,
fragment = Fragment,
user = User,
password = Password} = Url,
Scheme1 = case Scheme of
http -> <<"http://">>;
https -> <<"https://">>;
http_unix -> <<"http+unix://">>
end,
Netloc1 = case User of
<<>> ->
Netloc;
_ when Password /= <<>>, Password /= <<"">> ->
EncodedUser = urlencode(User),
EncodedPassword = urlencode(Password),
<< EncodedUser/binary, ":", EncodedPassword/binary, "@", Netloc/binary >>;
_ ->
EncodedUser = urlencode(User),
<< EncodedUser/binary, "@", Netloc/binary >>
end,
Qs1 = case Qs of
<<>> -> <<>>;
_ -> << "?", Qs/binary >>
end,
Fragment1 = case Fragment of
<<>> -> <<>>;
_ -> << "#", Fragment/binary >>
end,
Path1 = case Path of
nil -> <<>>;
undefined -> <<>>;
_ -> Path
end,
<< Scheme1/binary, Netloc1/binary, Path1/binary, Qs1/binary, Fragment1/binary >>.
%% @private
parse_addr1(Addr, S) ->
case binary:split(Addr, <<"?">>) of
[_Addr] ->
parse_addr(Addr, S);
[Addr1, Query] ->
RawPath = << "?", Query/binary, (S#hackney_url.raw_path)/binary >>,
parse_addr(Addr1, S#hackney_url{raw_path=RawPath, qs=Query})
end.
parse_addr(Addr, S) ->
case binary:split(Addr, <<"@">>) of
[Addr] ->
parse_netloc(Addr, S#hackney_url{netloc=Addr});
[Credentials, Addr1] ->
case binary:split(Credentials, <<":">>) of
[User, Password] ->
parse_netloc(Addr1, S#hackney_url{netloc=Addr1,
user = urldecode(User),
password = urldecode(Password)});
[User] ->
parse_netloc(Addr1, S#hackney_url{netloc = Addr1,
user = urldecode(User),
password = <<>> })
end
end.
parse_netloc(<<"[", Rest/binary>>, #hackney_url{transport=Transport}=S) ->
case binary:split(Rest, <<"]">>, [trim]) of
[Host] when Transport =:= hackney_tcp ->
S#hackney_url{host=binary_to_list(Host), port=80};
[Host] when Transport =:= hackney_ssl ->
S#hackney_url{host=binary_to_list(Host), port=443};
[Host, <<":", Port/binary>>] when Port /= <<>> ->
S#hackney_url{host=binary_to_list(Host),
port=list_to_integer(binary_to_list(Port))};
_ ->
parse_netloc(Rest, S)
end;
parse_netloc(Netloc, #hackney_url{transport=Transport}=S) ->
case binary:split(Netloc, <<":">>, [trim]) of
[Host] when Transport =:= hackney_tcp ->
S#hackney_url{host=unicode:characters_to_list((Host)),
port=80};
[Host] when Transport =:= hackney_ssl ->
S#hackney_url{host=unicode:characters_to_list(Host),
port=443};
[Host] when Transport =:= hackney_local_tcp ->
S#hackney_url{host=unicode:characters_to_list(urldecode(Host)),
port=0};
[Host, Port] ->
S#hackney_url{host=unicode:characters_to_list(Host),
port=list_to_integer(binary_to_list(Port))}
end.
cut_query(Path) ->
case binary:split(Path, <<"?">>) of
[_Path] ->
{Path, <<>>};
[Path1, Query] ->
{Path1, Query}
end.
cut_fragment(S) ->
case binary:split(S, <<"#">>) of
[_S] ->
{S, <<>>};
[S1, F] ->
{S1, F}
end.
%% @doc Decode an URL encoded binary.
%% @equiv urldecode(Bin, crash)
-spec urldecode(binary()) -> binary().
urldecode(Bin) when is_binary(Bin) ->
urldecode(Bin, <<>>, crash).
%% @doc Decode an URL encoded binary.
%% The second argument specifies how to handle percent characters that are not
%% followed by two valid hex characters. Use `skip' to ignore such errors,
%% if `crash' is used the function will fail with the reason `badarg'.
-spec urldecode(binary(), crash | skip) -> binary().
urldecode(Bin, OnError) when is_binary(Bin) ->
urldecode(Bin, <<>>, OnError).
-spec urldecode(binary(), binary(), crash | skip) -> binary().
urldecode(<<$%, H, L, Rest/binary>>, Acc, OnError) ->
G = unhex(H),
M = unhex(L),
if G =:= error; M =:= error ->
case OnError of skip -> ok; crash -> erlang:error(badarg) end,
urldecode(<<H, L, Rest/binary>>, <<Acc/binary, $%>>, OnError);
true ->
urldecode(Rest, <<Acc/binary, (G bsl 4 bor M)>>, OnError)
end;
urldecode(<<$%, Rest/binary>>, Acc, OnError) ->
case OnError of skip -> ok; crash -> erlang:error(badarg) end,
urldecode(Rest, <<Acc/binary, $%>>, OnError);
urldecode(<<$+, Rest/binary>>, Acc, OnError) ->
urldecode(Rest, <<Acc/binary, $ >>, OnError);
urldecode(<<C, Rest/binary>>, Acc, OnError) ->
urldecode(Rest, <<Acc/binary, C>>, OnError);
urldecode(<<>>, Acc, _OnError) ->
Acc.
-spec unhex(byte()) -> byte() | error.
unhex(C) when C >= $0, C =< $9 -> C - $0;
unhex(C) when C >= $A, C =< $F -> C - $A + 10;
unhex(C) when C >= $a, C =< $f -> C - $a + 10;
unhex(_) -> error.
%% @doc URL encode a string binary.
-spec urlencode(binary() | string()) -> binary().
urlencode(Bin) ->
urlencode(Bin, []).
%% @doc URL encode a string binary.
%% The `noplus' option disables the default behaviour of quoting space
%% characters, `\s', as `+'. The `lower' option overrides the default behaviour
%% of writing hex numbers using uppercase letters to using lowercase letters
%% instead.
-spec urlencode(binary() | string(), [qs_opt()]) -> binary().
urlencode(Bin, Opts) ->
Plus = not proplists:get_value(noplus, Opts, false),
Lower = proplists:get_value(lower, Opts, false),
urlencode(hackney_bstr:to_binary(Bin), <<>>, Plus, Lower).
-spec urlencode(binary(), binary(), boolean(), boolean()) -> binary().
urlencode(<<C, Rest/binary>>, Acc, P=Plus, Lower) ->
if C >= $0, C =< $9 -> urlencode(Rest, <<Acc/binary, C>>, P, Lower);
C >= $A, C =< $Z -> urlencode(Rest, <<Acc/binary, C>>, P, Lower);
C >= $a, C =< $z -> urlencode(Rest, <<Acc/binary, C>>, P, Lower);
C =:= $.; C =:= $-; C =:= $~; C =:= $_; C =:= $*; C =:= $@ ->
urlencode(Rest, <<Acc/binary, C>>, P, Lower);
C =:= $(; C =:= $); C =:= $!; C =:= $$ ->
urlencode(Rest, <<Acc/binary, C>>, P, Lower);
C =:= $ , Plus ->
urlencode(Rest, <<Acc/binary, $+>>, P, Lower);
true ->
H = C band 16#F0 bsr 4, L = C band 16#0F,
H1 = if Lower -> tohexl(H); true -> tohexu(H) end,
L1 = if Lower -> tohexl(L); true -> tohexu(L) end,
urlencode(Rest, <<Acc/binary, $%, H1, L1>>, P, Lower)
end;
urlencode(<<>>, Acc, _Plus, _Lower) ->
Acc.
-spec tohexu(byte()) -> byte().
tohexu(C) when C < 10 -> $0 + C;
tohexu(C) when C < 16 -> $A + C - 10.
-spec tohexl(byte()) -> byte().
tohexl(C) when C < 10 -> $0 + C;
tohexl(C) when C < 16 -> $a + C - 10.
%% Parse a query or a form from a binary and return a list of properties.
-spec parse_qs(binary()) -> qs_vals().
parse_qs(<<>>) ->
[];
parse_qs(Bin) ->
Tokens = hackney_bstr:split(Bin, <<"&">>, [trim_all, global]),
[case hackney_bstr:split(Token, <<"=">>, [trim_all]) of
[T] ->
{urldecode(T), true};
[Name, Value] ->
{urldecode(Name), urldecode(Value)}
end || Token <- Tokens].
%% @doc Encode query properties to binary.
-spec qs(qs_vals()) -> binary().
qs(KVs) ->
qs(KVs, []).
%% @doc Encode query properties to binary.
%% Opts are passed to {@link urlencode/2.}
-spec qs(qs_vals(), [qs_opt()]) -> binary().
qs(KVs, Opts) ->
qs(KVs, Opts, []).
qs([], _Opts, Acc) ->
hackney_bstr:join(lists:reverse(Acc), <<"&">>);
qs([{K, V}|R], Opts, Acc) ->
K1 = urlencode(K, Opts),
V1 = urlencode(V, Opts),
Line = << K1/binary, "=", V1/binary >>,
qs(R, Opts, [Line | Acc]).
%% @doc Construct an URL from a base URL, a path and a list of
%% properties to give to the URL.
-spec make_url(binary(), binary() | [binary()], binary() | qs_vals())
-> binary().
make_url(Url, Path, Query) when is_list(Query) ->
%% a list of properties has been passed
make_url(Url, Path, qs(Query));
make_url(Url, Path, Query) when is_binary(Path) ->
make_url(Url, [Path], Query);
make_url(Url, PathParts, Query) when is_binary(Query) ->
%% create path
PathParts1 = [fix_path(P) || P <- PathParts, P /= "", P /= "/" orelse P /= <<"/">>],
Path = hackney_bstr:join([<<>> | PathParts1], <<"/">>),
%% initialise the query
Query1 = case Query of
<<>> -> <<>>;
_ -> << "?", Query/binary >>
end,
%% make the final uri
iolist_to_binary([fix_path(Url), Path, Query1]).
fix_path(Path) when is_list(Path) ->
fix_path(list_to_binary(Path));
fix_path(<<>>) ->
<<>>;
fix_path(<<"/", Path/binary>>) ->
fix_path(Path);
fix_path(Path) ->
case binary:part(Path, {size(Path), -1}) of
<<"/">> -> binary:part(Path, {0, size(Path) - 1});
_ -> Path
end.
%% @doc Encode an URL path.
%% @equiv pathencode(Bin, [])
-spec pathencode(binary() | list()) -> binary().
pathencode(Path) when is_list(Path) ->
pathencode(list_to_binary(Path));
pathencode(Path) when is_binary(Path) ->
case binary:split(Path, <<"/">>, [global]) of
[Path] -> partial_pathencode(Path, <<>>);
Parts ->
do_partial_pathencode(Parts, [])
end;
pathencode(undefined) ->
<<>>;
pathencode(nil) ->
<<>>;
pathencode(_) ->
erlang:error(badarg).
do_partial_pathencode([], Acc) ->
hackney_bstr:join(lists:reverse(Acc), <<"/">>);
do_partial_pathencode([Part | Rest], Acc) ->
do_partial_pathencode(Rest, [partial_pathencode(Part, <<>>) | Acc]).
partial_pathencode(<<C, Rest/binary>> = Bin, Acc) ->
if C >= $0, C =< $9 -> partial_pathencode(Rest, <<Acc/binary, C>>);
C >= $A, C =< $Z -> partial_pathencode(Rest, <<Acc/binary, C>>);
C >= $a, C =< $z -> partial_pathencode(Rest, <<Acc/binary, C>>);
C =:= $;; C =:= $=; C =:= $,; C =:= $:; C =:= $*; C =:= $@; C =:= $(; C =:= $) ->
partial_pathencode(Rest, <<Acc/binary, C>>);
C =:= $.; C =:= $-; C =:= $+; C =:= $~; C =:= $_ ->
partial_pathencode(Rest, <<Acc/binary, C>>);
C =:= $ ->
partial_pathencode(Rest, <<Acc/binary, $+>>);
C =:= $% ->
%% special case, when a % is passed to the path, check if
%% it's a valid escape sequence. If the sequence is valid we
%% don't try to encode it and continue, else, we encode it.
%% the behaviour is similar to the one you find in chrome:
%% http://src.chromium.org/viewvc/chrome/trunk/src/url/url_canon_path.cc
case Bin of
<< $%, H, L, Rest1/binary >> ->
G = unhex(H),
M = unhex(L),
if G =:= error; M =:= error ->
H1 = C band 16#F0 bsr 4, L1 = C band 16#0F,
H2 = tohexu(H1),
L2 = tohexu(L1),
partial_pathencode(Rest, <<Acc/binary, $%, H2, L2>>);
true ->
partial_pathencode(Rest1, <<Acc/binary, $%, H, L>>)
end;
_ ->
H1 = C band 16#F0 bsr 4, L1 = C band 16#0F,
H2 = tohexu(H1),
L2 = tohexu(L1),
partial_pathencode(Rest, <<Acc/binary, $%, H2, L2>>)
end;
true ->
H = C band 16#F0 bsr 4, L = C band 16#0F,
H1 = tohexu(H),
L1 = tohexu(L),
partial_pathencode(Rest, <<Acc/binary, $%, H1, L1>>)
end;
partial_pathencode(<<>>, Acc) ->
Acc.