WITH docs as (
select 1 as docid, array(‘apple:1.0’, ‘orange:2.0’, ‘banana:1.0’, ‘kuwi:0’) as features
union all
select 2 as docid, array(‘apple:1.0’, ‘orange:0’, ‘banana:2.0’, ‘kuwi:1.0’) as features
union all
select 3 as docid, array(‘apple:2.0’, ‘orange:0’, ‘banana:2.0’, ‘kuwi:1.0’) as features
)
select
l.docid as doc1,
r.docid as doc2,
cosine_distance(l.features, r.features) as distance,
distance2similarity(cosine_distance(l.features, r.features)) as similarity
from
docs l
CROSS JOIN docs r
where
l.docid != r.docid
order by
doc1 asc,
distance asc;
doc1 doc2 distance similarity
1 3 0.45566893 0.6869694
1 2 0.5 0.6666667
2 3 0.04742068 0.95472616
2 1 0.5 0.6666667
3 2 0.04742068 0.95472616
3 1 0.45566893 0.6869694