api-v2/deps/hackney/src/hackney_bstr.erl
2025-04-16 10:03:13 -03:00

450 lines
12 KiB
Erlang

%%% -*- erlang -*-
%%%
%%% This file is part of hackney_lib released under the Apache 2 license.
%%% See the NOTICE for more information.
%%%
-module(hackney_bstr).
-export([to_binary/1,
to_lower/1, to_upper/1,
char_to_lower/1, char_to_upper/1,
join/2,
to_hex/1,
token_ci/2, token/2,
list/2,
nonempty_list/2,
params/2,
parameterized_tokens/1,
whitespace/2,
digits/1, digits/2, digits/3,
alpha/2,
word/2,
trim/1]).
%% BEGIN: Remove when OTP 17 not officially supported
-export([split/3]).
-export_type([cp/0]).
-opaque cp() :: {'am' | 'bm', binary()}.
-type part() :: {Start :: non_neg_integer(), Length :: integer()}.
%% END: Remove when OTP 17 not officially supported
-export([quoted_string/2]).
to_binary(V) when is_list(V) ->
list_to_binary(V);
to_binary(V) when is_atom(V) ->
atom_to_binary(V, latin1);
to_binary(V) when is_integer(V) ->
list_to_binary(integer_to_list(V));
to_binary(V) when is_binary(V) ->
V.
%% @doc Convert a binary string to lowercase.
-spec to_lower(binary()|atom()|list()) -> binary().
to_lower(L) when is_binary(L) ->
<< << (char_to_lower(C)) >> || << C >> <= L >>;
to_lower(L) ->
to_lower(to_binary(L)).
-spec to_upper(binary()|atom()|list()) -> binary().
to_upper(U) when is_binary(U)->
<< << (char_to_upper(C)) >> || << C >> <= U >>;
to_upper(L) ->
to_upper(to_binary(L)).
%% @doc Convert [A-Z] characters to lowercase.
%% @end
%% We gain noticeable speed by matching each value directly.
-spec char_to_lower(char()) -> char().
char_to_lower($A) -> $a;
char_to_lower($B) -> $b;
char_to_lower($C) -> $c;
char_to_lower($D) -> $d;
char_to_lower($E) -> $e;
char_to_lower($F) -> $f;
char_to_lower($G) -> $g;
char_to_lower($H) -> $h;
char_to_lower($I) -> $i;
char_to_lower($J) -> $j;
char_to_lower($K) -> $k;
char_to_lower($L) -> $l;
char_to_lower($M) -> $m;
char_to_lower($N) -> $n;
char_to_lower($O) -> $o;
char_to_lower($P) -> $p;
char_to_lower($Q) -> $q;
char_to_lower($R) -> $r;
char_to_lower($S) -> $s;
char_to_lower($T) -> $t;
char_to_lower($U) -> $u;
char_to_lower($V) -> $v;
char_to_lower($W) -> $w;
char_to_lower($X) -> $x;
char_to_lower($Y) -> $y;
char_to_lower($Z) -> $z;
char_to_lower(Ch) -> Ch.
%% @doc Convert [a-z] characters to uppercase.
-spec char_to_upper(char()) -> char().
char_to_upper($a) -> $A;
char_to_upper($b) -> $B;
char_to_upper($c) -> $C;
char_to_upper($d) -> $D;
char_to_upper($e) -> $E;
char_to_upper($f) -> $F;
char_to_upper($g) -> $G;
char_to_upper($h) -> $H;
char_to_upper($i) -> $I;
char_to_upper($j) -> $J;
char_to_upper($k) -> $K;
char_to_upper($l) -> $L;
char_to_upper($m) -> $M;
char_to_upper($n) -> $N;
char_to_upper($o) -> $O;
char_to_upper($p) -> $P;
char_to_upper($q) -> $Q;
char_to_upper($r) -> $R;
char_to_upper($s) -> $S;
char_to_upper($t) -> $T;
char_to_upper($u) -> $U;
char_to_upper($v) -> $V;
char_to_upper($w) -> $W;
char_to_upper($x) -> $X;
char_to_upper($y) -> $Y;
char_to_upper($z) -> $Z;
char_to_upper(Ch) -> Ch.
join([], _Separator) ->
<<>>;
join([S], _separator) ->
S;
join(L, Separator) ->
iolist_to_binary(join(lists:reverse(L), Separator, [])).
join([], _Separator, Acc) ->
Acc;
join([S | Rest], Separator, []) ->
join(Rest, Separator, [S]);
join([S | Rest], Separator, Acc) ->
join(Rest, Separator, [S, Separator | Acc]).
to_hex([]) ->
[];
to_hex(Bin) when is_binary(Bin) ->
to_hex(binary_to_list(Bin));
to_hex([H|T]) ->
[to_digit(H div 16), to_digit(H rem 16) | to_hex(T)].
to_digit(N) when N < 10 -> $0 + N;
to_digit(N) -> $a + N-10.
%% @doc Parse a case-insensitive token.
%%
%% Changes all characters to lowercase.
-spec token_ci(binary(), fun()) -> any().
token_ci(Data, Fun) ->
token(Data, Fun, ci, <<>>).
%% @doc Parse a token.
-spec token(binary(), fun()) -> any().
token(Data, Fun) ->
token(Data, Fun, cs, <<>>).
-spec token(binary(), fun(), ci | cs, binary()) -> any().
token(<<>>, Fun, _Case, Acc) ->
Fun(<<>>, Acc);
token(Data = << C, _Rest/binary >>, Fun, _Case, Acc)
when C =:= $(; C =:= $); C =:= $<; C =:= $>; C =:= $@;
C =:= $,; C =:= $;; C =:= $:; C =:= $\\; C =:= $";
C =:= $/; C =:= $[; C =:= $]; C =:= $?; C =:= $=;
C =:= ${; C =:= $}; C =:= $\s; C =:= $\t;
C < 32; C =:= 127 ->
Fun(Data, Acc);
token(<< C, Rest/binary >>, Fun, Case = ci, Acc) ->
C2 = char_to_lower(C),
token(Rest, Fun, Case, << Acc/binary, C2 >>);
token(<< C, Rest/binary >>, Fun, Case, Acc) ->
token(Rest, Fun, Case, << Acc/binary, C >>).
%% @doc Parse a non-empty list of the given type.
-spec nonempty_list(binary(), fun()) -> [any(), ...] | {error, badarg}.
nonempty_list(Data, Fun) ->
case list(Data, Fun, []) of
{error, badarg} -> {error, badarg};
[] -> {error, badarg};
L -> lists:reverse(L)
end.
%% @doc Parse a list of the given type.
-spec list(binary(), fun()) -> list() | {error, badarg}.
list(Data, Fun) ->
case list(Data, Fun, []) of
{error, badarg} -> {error, badarg};
L -> lists:reverse(L)
end.
-spec list(binary(), fun(), [binary()]) -> [any()] | {error, badarg}.
%% From the RFC:
%% <blockquote>Wherever this construct is used, null elements are allowed,
%% but do not contribute to the count of elements present.
%% That is, "(element), , (element) " is permitted, but counts
%% as only two elements. Therefore, where at least one element is required,
%% at least one non-null element MUST be present.</blockquote>
list(Data, Fun, Acc) ->
whitespace(Data,
fun (<<>>) -> Acc;
(<< $,, Rest/binary >>) -> list(Rest, Fun, Acc);
(Rest) -> Fun(Rest,
fun (D, I) -> whitespace(D,
fun (<<>>) -> [I|Acc];
(<< $,, R/binary >>) -> list(R, Fun,
[I|Acc]);
(_Any) -> {error, badarg}
end)
end)
end).
%% @doc Parse a list of parameters (a=b;c=d).
-spec params(binary(), fun()) -> any().
params(Data, Fun) ->
params(Data, Fun, []).
-spec params(binary(), fun(), [{binary(), binary()}]) -> any().
params(Data, Fun, Acc) ->
whitespace(Data,
fun (<< $;, Rest/binary >>) ->
param(Rest,
fun (Rest2, Attr, Value) ->
params(Rest2, Fun, [{Attr, Value}|Acc])
end);
(Rest) ->
Fun(Rest, lists:reverse(Acc))
end).
-spec param(binary(), fun()) -> any().
param(Data, Fun) ->
whitespace(Data,
fun (Rest) ->
token_ci(Rest,
fun (_Rest2, <<>>) -> {error, badarg};
(<< $=, Rest2/binary >>, Attr) ->
word(Rest2,
fun (Rest3, Value) ->
Fun(Rest3, Attr, Value)
end);
(_Rest2, _Attr) -> {error, badarg}
end)
end).
%% @doc Parse a non empty list of tokens followed with optional parameters.
-spec parameterized_tokens(binary()) -> any().
parameterized_tokens(Data) ->
nonempty_list(Data,
fun (D, Fun) ->
token(D,
fun (_Rest, <<>>) -> {error, badarg};
(Rest, Token) ->
parameterized_tokens_params(Rest,
fun (Rest2, Params) ->
Fun(Rest2, {Token, Params})
end, [])
end)
end).
-spec parameterized_tokens_params(binary(), fun(),
[binary() | {binary(), binary()}]) -> any().
parameterized_tokens_params(Data, Fun, Acc) ->
whitespace(Data,
fun (<< $;, Rest/binary >>) ->
parameterized_tokens_param(Rest,
fun (Rest2, Param) ->
parameterized_tokens_params(Rest2, Fun,
[Param|Acc])
end);
(Rest) ->
Fun(Rest, lists:reverse(Acc))
end).
-spec parameterized_tokens_param(binary(), fun()) -> any().
parameterized_tokens_param(Data, Fun) ->
whitespace(Data,
fun (Rest) ->
token(Rest,
fun (_Rest2, <<>>) ->
{error, badarg};
(<< $=, Rest2/binary >>, Attr) ->
word(Rest2,
fun (Rest3, Value) ->
Fun(Rest3, {Attr, Value})
end);
(Rest2, Attr) ->
Fun(Rest2, Attr)
end)
end).
%% @doc Skip whitespace.
-spec whitespace(binary(), fun()) -> any().
whitespace(<< C, Rest/binary >>, Fun)
when C =:= $\s; C =:= $\t ->
whitespace(Rest, Fun);
whitespace(Data, Fun) ->
Fun(Data).
%% @doc Parse a list of digits as a non negative integer.
-spec digits(binary()) -> non_neg_integer() | {error, badarg}.
digits(Data) ->
digits(Data,
fun (Rest, I) ->
whitespace(Rest,
fun (<<>>) ->
I;
(_Rest2) ->
{error, badarg}
end)
end).
-spec digits(binary(), fun()) -> any().
digits(<< C, Rest/binary >>, Fun)
when C >= $0, C =< $9 ->
digits(Rest, Fun, C - $0);
digits(_Data, _Fun) ->
{error, badarg}.
-spec digits(binary(), fun(), non_neg_integer()) -> any().
digits(<< C, Rest/binary >>, Fun, Acc)
when C >= $0, C =< $9 ->
digits(Rest, Fun, Acc * 10 + (C - $0));
digits(Data, Fun, Acc) ->
Fun(Data, Acc).
%% @doc Parse a list of case-insensitive alpha characters.
%%
%% Changes all characters to lowercase.
-spec alpha(binary(), fun()) -> any().
alpha(Data, Fun) ->
alpha(Data, Fun, <<>>).
-spec alpha(binary(), fun(), binary()) -> any().
alpha(<<>>, Fun, Acc) ->
Fun(<<>>, Acc);
alpha(<< C, Rest/binary >>, Fun, Acc)
when C >= $a andalso C =< $z;
C >= $A andalso C =< $Z ->
C2 = char_to_lower(C),
alpha(Rest, Fun, << Acc/binary, C2 >>);
alpha(Data, Fun, Acc) ->
Fun(Data, Acc).
%% @doc Parse either a token or a quoted string.
-spec word(binary(), fun()) -> any().
word(Data = << $", _/binary >>, Fun) ->
quoted_string(Data, Fun);
word(Data, Fun) ->
token(Data,
fun (_Rest, <<>>) -> {error, badarg};
(Rest, Token) -> Fun(Rest, Token)
end).
-spec trim(binary()) -> binary().
trim(Data) ->
re:replace(Data, "^\\s+|\\s+$", "", [{return, binary}, global]).
-spec quoted_string(binary(), fun()) -> any().
quoted_string(<< $", Rest/binary >>, Fun) ->
quoted_string(Rest, Fun, <<>>).
-spec quoted_string(binary(), fun(), binary()) -> any().
quoted_string(<<>>, _Fun, _Acc) ->
{error, badarg};
quoted_string(<< $", Rest/binary >>, Fun, Acc) ->
Fun(Rest, Acc);
quoted_string(<< $\\, C, Rest/binary >>, Fun, Acc) ->
quoted_string(Rest, Fun, << Acc/binary, C >>);
quoted_string(<< C, Rest/binary >>, Fun, Acc) ->
quoted_string(Rest, Fun, << Acc/binary, C >>).
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% binary:split/3 from OTP 18
%% remove when support for < 18
%% formally dropped
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-spec split(Subject, Pattern, Options) -> Parts when
Subject :: binary(),
Pattern :: binary() | [binary()] | cp(),
Options :: [Option],
Option :: {scope, part()} | trim | global | trim_all,
Parts :: [binary()].
split(Haystack,Needles,Options) ->
try
{Part,Global,Trim,TrimAll} =
get_opts_split(Options,{no,false,false,false}),
Moptlist = case Part of
no ->
[];
{A,B} ->
[{scope,{A,B}}]
end,
MList = if
Global ->
binary:matches(Haystack,Needles,Moptlist);
true ->
case binary:match(Haystack,Needles,Moptlist) of
nomatch -> [];
Match -> [Match]
end
end,
do_split(Haystack,MList,0,Trim,TrimAll)
catch
_:_ ->
erlang:error(badarg)
end.
do_split(H,[],N,true,_) when N >= byte_size(H) ->
[];
do_split(H,[],N,_,true) when N >= byte_size(H) ->
[];
do_split(H,[],N,_,_) ->
[binary:part(H,{N,byte_size(H)-N})];
do_split(H,[{A,B}|T],N,Trim,TrimAll) ->
case binary:part(H,{N,A-N}) of
<<>> when TrimAll == true ->
do_split(H,T,A+B,Trim,TrimAll);
<<>> ->
Rest = do_split(H,T,A+B,Trim,TrimAll),
case {Trim, Rest} of
{true,[]} ->
[];
_ ->
[<<>> | Rest]
end;
Oth ->
[Oth | do_split(H,T,A+B,Trim,TrimAll)]
end.
get_opts_split([],{Part,Global,Trim,TrimAll}) ->
{Part,Global,Trim,TrimAll};
get_opts_split([{scope,{A,B}} | T],{_Part,Global,Trim,TrimAll}) ->
get_opts_split(T,{{A,B},Global,Trim,TrimAll});
get_opts_split([global | T],{Part,_Global,Trim,TrimAll}) ->
get_opts_split(T,{Part,true,Trim,TrimAll});
get_opts_split([trim | T],{Part,Global,_Trim,TrimAll}) ->
get_opts_split(T,{Part,Global,true,TrimAll});
get_opts_split([trim_all | T],{Part,Global,Trim,_TrimAll}) ->
get_opts_split(T,{Part,Global,Trim,true});
get_opts_split(_,_) ->
throw(badopt).
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% END binary:split from OTP 18
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%