From 21eb3d5468b27caca0da1a14c9a45d36d1410829 Mon Sep 17 00:00:00 2001 From: Marc Worrell Date: Tue, 18 Jun 2024 16:43:20 +0200 Subject: [PATCH 1/5] z_html: add property filter callback --- src/z_html.erl | 199 +++++++++++++++++++++++++++++++++++++------------ 1 file changed, 151 insertions(+), 48 deletions(-) diff --git a/src/z_html.erl b/src/z_html.erl index 31db85c..74d8df4 100644 --- a/src/z_html.erl +++ b/src/z_html.erl @@ -1,9 +1,9 @@ %% @author Marc Worrell -%% @copyright 2009-2022 Marc Worrell +%% @copyright 2009-2024 Marc Worrell %% @doc Utility functions for html processing. Also used for property filtering (by m_rsc_update). %% @end -%% Copyright 2009-2022 Marc Worrell +%% Copyright 2009-2024 Marc Worrell %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -46,7 +46,29 @@ abs_links/2 ]). --type text() :: iodata() | {trans, list( {atom(), binary()} )}. +-type unsafe_props() :: #{ props_key() => unsafe_props_value() } + | [ {props_key(), unsafe_props_value()} | props_key() ]. +-type unsafe_props_value() :: unsafe_props() + | number() + | atom() + | list( unsafe_props_value() ) + | unsafe_text() + | boolean(). + +-type props() :: #{ props_key() => props_value() } + | [ {props_key(), props_value()} | props_key() ]. +-type props_key() :: atom() | binary(). +-type props_value() :: props() + | number() + | atom() + | list( props_value() ) + | trans() + | boolean() + | binary(). + +-type text() :: iodata() | trans(). +-type trans() :: {trans, list( {atom()|binary(), text()} )}. + -type unsafe_text() :: iodata() | {trans, list( {atom(), iodata()} )} | {trans, list( {binary(), iodata()} )} @@ -59,10 +81,19 @@ -type sanitize_options() :: [ sanitize_option() ]. -type sanitize_option() :: {elt_extra, list( binary() )} | {attr_extra, list( binary() )} - | {element, function()}. + | {element, function()} + | {property, function()}. -export_type([ + unsafe_props/0, + unsafe_props_value/0, + + props/0, + props_key/0, + props_value/0, + text/0, + trans/0, unsafe_text/0, maybe_text/0, maybe_unsafe_text/0, @@ -84,24 +115,47 @@ ]). -%% @doc Escape all properties used for an update statement. Only leaves the body property intact. --spec escape_props(list() | map()) -> list() | map(). +%% @doc Escape all properties used for an update statement. +-spec escape_props(Props) -> Props1 when + Props:: unsafe_props(), + Props1 :: props(). escape_props(Props) -> escape_props(Props, []). --spec escape_props(list() | map(), Options::list()) -> list() | map(). +%% @doc Escape all properties used for an update statement. +-spec escape_props(Props, Options) -> Props1 when + Props :: unsafe_props(), + Options :: sanitize_options(), + Props1 :: props(). escape_props(Props, Options) when is_list(Props) -> - lists:map( - fun({P, V}) -> - V1 = escape_props1(z_convert:to_binary(P), V, Options), - {P, V1} + lists:filtermap( + fun + ({P, V}) -> + V1 = escape_props1(z_convert:to_binary(P), V, Options), + case property_filter_cb(P, V1, Options) of + true -> {true, {P, V1}}; + PV -> PV + end; + (P) when is_atom(P) -> + case property_filter_cb(P, true, Options) of + true -> {true, {P, true}}; + PV -> PV + end end, Props); escape_props(Props, Options) when is_map(Props) -> maps:fold( fun(K, V, Acc) -> K1 = z_convert:to_binary(K), - Acc#{ K1 => escape_props1(K1, V, Options)} + V1 = escape_props1(K1, V, Options), + case property_filter_cb(K1, V1, Options) of + {true, {K2, V2}} -> + Acc#{ K2 => V2 }; + true -> + Acc#{ K1 => V1 }; + false -> + Acc + end end, #{}, Props). @@ -155,21 +209,24 @@ sanitize_type(_, Ks, V, Options) when is_list(V) -> sanitize_list(Ks, V, Options sanitize_type(_, _Ks, V, _Options) -> escape_value(V). sanitize_list(Ks, L, Options) when is_list(L) -> - lists:map( + lists:filtermap( fun ({P, V}) -> P1 = z_convert:to_binary(P), V1 = escape_props1(P1, V, Options), - {P1, V1}; + case property_filter_cb(P1, V1, Options) of + true -> {true, P, V1}; + PV -> PV + end; (V) when is_list(V), Ks =:= [] -> - escape_props(V, Options); + {true, escape_props(V, Options)}; (V) when is_map(V) -> - escape_props(V, Options); + {true, escape_props(V, Options)}; (V) when Ks =:= [] -> - escape_value(V); + {true, escape_value(V)}; (V) -> [Type|Ks1] = Ks, - sanitize_type(Type, Ks1, V, Options) + {true, sanitize_type(Type, Ks1, V, Options)} end, L); sanitize_list(Ks, Map, Options) when is_map(Map) -> @@ -210,28 +267,47 @@ escape_value(V) -> V. %% @doc Checks if all properties are properly escaped --spec escape_props_check(list() | map()) -> list() | map(). +-spec escape_props_check(Props) -> Props1 when + Props :: unsafe_props(), + Props1 :: props(). escape_props_check(Props) -> escape_props_check(Props, []). --spec escape_props_check(list() | map(), Options::list()) -> list() | map(). +-spec escape_props_check(Props, Options) -> Props1 when + Props :: unsafe_props(), + Options :: sanitize_options(), + Props1 :: props(). escape_props_check(Props, Options) when is_list(Props) -> - lists:map( + lists:filtermap( fun ({P, V}) -> V1 = escape_props_check1(z_convert:to_binary(P), V, Options), - {P, V1}; - (V) when is_list(V); is_map(V)-> - escape_props_check(V, Options); - (V) -> - escape_value_check(V) + case property_filter_cb(P, V1, Options) of + true -> {true, {P, V1}}; + PV -> PV + end; + (P) when is_atom(P) -> + case property_filter_cb(P, true, Options) of + true -> {true, {P, true}}; + PV -> PV + end end, Props); escape_props_check(Props, Options) when is_map(Props) -> - maps:map( - fun(P, V) -> - escape_props_check1(z_convert:to_binary(P), V, Options) + maps:fold( + fun(K, V, Acc) -> + K1 = z_convert:to_binary(K), + V1 = escape_props_check1(K1, V, Options), + case property_filter_cb(K1, V1, Options) of + {true, {K2, V2}} -> + Acc#{ K2 => V2 }; + true -> + Acc#{ K1 => V1 }; + false -> + Acc + end end, + #{}, Props). @@ -272,21 +348,24 @@ sanitize_type_check(<<"list">>, V, Options) -> sanitize_list_check(V, Options); sanitize_type_check(<<"int">>, V, _Options) -> sanitize_int(V); sanitize_type_check(<<"unsafe">>, V, _Options) -> V; sanitize_type_check(_, V, Options) when is_map(V) -> escape_props_check(V, Options); -sanitize_type_check(_, V, Options) when is_list(V) -> escape_props_check(V, Options); +sanitize_type_check(_, V, Options) when is_list(V) -> sanitize_list_check(V, Options); sanitize_type_check(_, V, _Options) -> escape_value_check(V). sanitize_list_check(L, Options) when is_list(L) -> - lists:map( + lists:filtermap( fun ({P, V}) -> P1 = z_convert:to_binary(P), V1 = escape_props_check1(P1, V, Options), - {P1, V1}; + case property_filter_cb(P1, V1, Options) of + true -> {true, {P, V1}}; + PV -> PV + end; (V) when is_list(V); is_map(V)-> - escape_props_check(V, Options); + {true, escape_props_check(V, Options)}; (V) -> - escape_value_check(V) + {true, escape_value_check(V)} end, L); sanitize_list_check(Map, Options) when is_map(Map) -> @@ -326,14 +405,12 @@ escape_value_check(V) -> escape({trans, Tr}) when is_list(Tr) -> Tr1 = lists:filtermap( fun - ({Lang, V}) when is_atom(Lang) -> - V1 = z_convert:to_binary(V), - {true, {Lang, escape(V1)}}; - ({Lang, V}) when is_binary(Lang) -> + ({Lang, V}) when is_binary(Lang); is_atom(Lang) -> try - Lang1 = binary_to_existing_atom(Lang, utf8), + Lang1 = sanitize_iso639_1(Lang), + Lang2 = binary_to_existing_atom(Lang1, utf8), V1 = z_convert:to_binary(V), - {true, {Lang1, escape(V1)}} + {true, {Lang2, escape(V1)}} catch _:_ -> false end; @@ -360,7 +437,7 @@ escape(B) when is_binary(B) -> escape1(<<>>, Acc) -> Acc; escape1(<<"€", T/binary>>, Acc) -> - escape1(T, <>); + escape1(T, <>); escape1(<<$&, T/binary>>, Acc) -> escape1(T, <>); escape1(<<$<, T/binary>>, Acc) -> @@ -380,14 +457,12 @@ escape1(<>, Acc) -> escape_check({trans, Tr}) when is_list(Tr) -> Tr1 = lists:filtermap( fun - ({Lang, V}) when is_atom(Lang) -> - V1 = z_convert:to_binary(V), - {true, {Lang, escape_check(V1)}}; - ({Lang, V}) when is_binary(Lang) -> + ({Lang, V}) when is_binary(Lang); is_atom(Lang) -> try - Lang1 = binary_to_existing_atom(Lang, utf8), + Lang1 = sanitize_iso639_1(Lang), + Lang2 = binary_to_existing_atom(Lang1, utf8), V1 = z_convert:to_binary(V), - {true, {Lang1, escape_check(V1)}} + {true, {Lang2, escape_check(V1)}} catch _:_ -> false end; @@ -413,10 +488,38 @@ escape_check(B) when is_binary(B) -> escape_check(Other) -> Other. +property_filter_cb(K, V, Options) -> + case lists:keyfind(property, 1, Options) of + {property, F} -> + F(K, V); + false -> + true + end. + +-define(is_lower_alpha(C), (C >= $a andalso C =< $z)). +-define(is_alpha(C), ((C >= $a andalso C =< $z) orelse (C >= $A andalso C =< $Z))). + +% Sanitize a language code, so that "it looks like" an ISO639-1 code. +sanitize_iso639_1(Lang) when is_binary(Lang) -> + case Lang of + <> when ?is_lower_alpha(A), ?is_lower_alpha(B) -> + Lang; + <> when ?is_lower_alpha(A), ?is_lower_alpha(B) -> + Rest1 = << <> || <> <= Rest, ?is_alpha(C) >>, + <>; + <<$x, $-, Rest/binary>> -> + Rest1 = << <> || <> <= Rest, ?is_alpha(C) >>, + <<$x, $-, Rest1/binary>>; + _ -> + <<"x-other">> + end; +sanitize_iso639_1(Lang) when is_atom(Lang) -> + sanitize_iso639_1(atom_to_binary(Lang, utf8)). + escape_check1(<<>>, Acc) -> Acc; escape_check1(<<"€", T/binary>>, Acc) -> - escape_check1(T, <>); + escape_check1(T, <>); escape_check1(<<"&", T/binary>>, Acc) -> escape_check1(T, <>); escape_check1(<<"<", T/binary>>, Acc) -> From 54eaf4a107c06c4a6d07a26ebe810411869dcd9a Mon Sep 17 00:00:00 2001 From: Marc Worrell Date: Wed, 22 Oct 2025 18:18:21 +0200 Subject: [PATCH 2/5] Fix test, support iso639-2 --- src/z_html.erl | 53 +++++++++++++++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 20 deletions(-) diff --git a/src/z_html.erl b/src/z_html.erl index afa0fcc..77c2273 100644 --- a/src/z_html.erl +++ b/src/z_html.erl @@ -223,7 +223,7 @@ sanitize_list(Ks, L, Options) when is_list(L) -> lists:filtermap( fun ({trans, Tr} = V) when is_list(Tr) -> - escape(V); + {true, escape(V)}; ({P, V}) -> P1 = z_convert:to_binary(P), V1 = escape_props1(P1, V, Options), @@ -422,10 +422,9 @@ escape({trans, Tr}) when is_list(Tr) -> fun ({Lang, V}) when is_binary(Lang); is_atom(Lang) -> try - Lang1 = sanitize_iso639_1(Lang), - Lang2 = binary_to_existing_atom(Lang1, utf8), + Lang1 = sanitize_iso639(Lang), V1 = z_convert:to_binary(V), - {true, {Lang2, escape(V1)}} + {true, {Lang1, escape(V1)}} catch _:_ -> false end; @@ -474,10 +473,9 @@ escape_check({trans, Tr}) when is_list(Tr) -> fun ({Lang, V}) when is_binary(Lang); is_atom(Lang) -> try - Lang1 = sanitize_iso639_1(Lang), - Lang2 = binary_to_existing_atom(Lang1, utf8), + Lang1 = sanitize_iso639(Lang), V1 = z_convert:to_binary(V), - {true, {Lang2, escape_check(V1)}} + {true, {Lang1, escape_check(V1)}} catch _:_ -> false end; @@ -512,24 +510,39 @@ property_filter_cb(K, V, Options) -> end. -define(is_lower_alpha(C), (C >= $a andalso C =< $z)). --define(is_alpha(C), ((C >= $a andalso C =< $z) orelse (C >= $A andalso C =< $Z))). - -% Sanitize a language code, so that "it looks like" an ISO639-1 code. -sanitize_iso639_1(Lang) when is_binary(Lang) -> - case Lang of +-define(is_alpha(C), ((C >= $a andalso C =< $z) + orelse (C >= $A andalso C =< $Z))). +-define(is_alpha_digit(C), ((C >= $a andalso C =< $z) + orelse (C >= $A andalso C =< $Z) + orelse (C >= $0 andalso C =< $9))). + +% Sanitize a language code, so that "it looks like" an ISO639-1 or -2 code. +sanitize_iso639(Lang) when is_binary(Lang) -> + {Lang1, BaseLang} = case Lang of <> when ?is_lower_alpha(A), ?is_lower_alpha(B) -> - Lang; + {Lang, Lang}; <> when ?is_lower_alpha(A), ?is_lower_alpha(B) -> - Rest1 = << <> || <> <= Rest, ?is_alpha(C) >>, - <>; + Rest1 = << <> || <> <- Rest, ?is_alpha(X) >>, + {<>, <>}; + <> when ?is_lower_alpha(A), ?is_lower_alpha(B), ?is_lower_alpha(C) -> + {Lang, Lang}; + <> when ?is_lower_alpha(A), ?is_lower_alpha(B), ?is_lower_alpha(C) -> + Rest1 = << <> || <> <- Rest, ?is_alpha_digit(X) >>, + {<>, <>}; <<$x, $-, Rest/binary>> -> - Rest1 = << <> || <> <= Rest, ?is_alpha(C) >>, - <<$x, $-, Rest1/binary>>; + Rest1 = << <> || <> <- Rest, ?is_alpha(C) >>, + {<<$x, $-, Rest1/binary>>, <<"x-other">>}; _ -> - <<"x-other">> + {<<"x-other">>, <<"x-other">>} + end, + try + binary_to_existing_atom(Lang1, utf8) + catch + _:_ when is_atom(BaseLang) -> BaseLang; + _:_ when is_binary(BaseLang) -> binary_to_atom(BaseLang, utf8) end; -sanitize_iso639_1(Lang) when is_atom(Lang) -> - sanitize_iso639_1(atom_to_binary(Lang, utf8)). +sanitize_iso639(Lang) when is_atom(Lang) -> + sanitize_iso639(atom_to_binary(Lang, utf8)). escape_check1(<<>>, Acc) -> Acc; From 772e67e29905f6ee5bbb99f732f9f8a609eafa5f Mon Sep 17 00:00:00 2001 From: Marc Worrell Date: Wed, 22 Oct 2025 18:20:47 +0200 Subject: [PATCH 3/5] Fix type --- src/z_html.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/z_html.erl b/src/z_html.erl index 77c2273..8d8f961 100644 --- a/src/z_html.erl +++ b/src/z_html.erl @@ -78,7 +78,7 @@ | binary(). -type text() :: iodata() | trans(). --type trans() :: {trans, list( {atom()|binary(), text()} )}. +-type trans() :: {trans, list( {atom()|binary(), iodata()} )}. -type unsafe_text() :: iodata() | {trans, list( {atom(), iodata()} )} From 2e33304167067d6f8e8f51817ea100f9b3c6b902 Mon Sep 17 00:00:00 2001 From: Marc Worrell Date: Wed, 22 Oct 2025 18:25:23 +0200 Subject: [PATCH 4/5] Fix binary comprehension --- src/z_html.erl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/z_html.erl b/src/z_html.erl index 8d8f961..d04dd22 100644 --- a/src/z_html.erl +++ b/src/z_html.erl @@ -522,15 +522,15 @@ sanitize_iso639(Lang) when is_binary(Lang) -> <> when ?is_lower_alpha(A), ?is_lower_alpha(B) -> {Lang, Lang}; <> when ?is_lower_alpha(A), ?is_lower_alpha(B) -> - Rest1 = << <> || <> <- Rest, ?is_alpha(X) >>, + Rest1 = << <> || <> <= Rest, ?is_alpha(X) >>, {<>, <>}; <> when ?is_lower_alpha(A), ?is_lower_alpha(B), ?is_lower_alpha(C) -> {Lang, Lang}; <> when ?is_lower_alpha(A), ?is_lower_alpha(B), ?is_lower_alpha(C) -> - Rest1 = << <> || <> <- Rest, ?is_alpha_digit(X) >>, + Rest1 = << <> || <> <= Rest, ?is_alpha_digit(X) >>, {<>, <>}; <<$x, $-, Rest/binary>> -> - Rest1 = << <> || <> <- Rest, ?is_alpha(C) >>, + Rest1 = << <> || <> <= Rest, ?is_alpha(C) >>, {<<$x, $-, Rest1/binary>>, <<"x-other">>}; _ -> {<<"x-other">>, <<"x-other">>} From 466938e0431762736e93508de40a039313e45f06 Mon Sep 17 00:00:00 2001 From: Marc Worrell Date: Thu, 23 Oct 2025 08:54:50 +0200 Subject: [PATCH 5/5] Type --- src/z_html.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/z_html.erl b/src/z_html.erl index d04dd22..ac5be22 100644 --- a/src/z_html.erl +++ b/src/z_html.erl @@ -531,9 +531,9 @@ sanitize_iso639(Lang) when is_binary(Lang) -> {<>, <>}; <<$x, $-, Rest/binary>> -> Rest1 = << <> || <> <= Rest, ?is_alpha(C) >>, - {<<$x, $-, Rest1/binary>>, <<"x-other">>}; + {<<$x, $-, Rest1/binary>>, 'x-other'}; _ -> - {<<"x-other">>, <<"x-other">>} + {<<"x-other">>, 'x-other'} end, try binary_to_existing_atom(Lang1, utf8)