WITH extracted as (
select
extract_feature(feature) as index,
extract_weight(feature) as value
from
input l
LATERAL VIEW explode(features) r as feature
),
mapping as (
select
index,
build_bins(value, 5, true) as quantiles — 5 bins with auto bin shrinking
from
extracted
group by
index
),
bins as (
select
to_map(index, quantiles) as quantiles
from
mapping
)
select
l.features as original,
feature_binning(l.features, r.quantiles) as features
from
input l
cross join bins r
> [“name#Jacob”,”gender#Male”,”age:20.0″] [“name#Jacob”,”gender#Male”,”age:2″]
> [“name#Isabella”,”gender#Female”,”age:20.0″] [“name#Isabella”,”gender#Female”,”age:2″]