This repository has no description
1from app.dedup import content_hash, collapse_forks
2from app.types import Candidate
3
4
5def test_content_hash_is_deterministic_and_prefix_based():
6 a = content_hash("hello world" + "x" * 1000)
7 b = content_hash("hello world" + "x" * 1000)
8 assert a == b
9 # only the first 500 chars matter -> differing tails hash the same
10 assert content_hash("p" * 500 + "AAA") == content_hash("p" * 500 + "BBB")
11
12
13def test_content_hash_handles_none_and_empty():
14 assert content_hash(None) == content_hash("")
15
16
17def _cand(key, h, dist):
18 return Candidate(key=key, content_hash=h, distance=dist, seeds=["s"])
19
20
21def test_collapse_forks_keeps_min_distance_per_content():
22 cands = [
23 _cand("repoA", "samehash", 0.20),
24 _cand("repoB", "samehash", 0.10), # fork with closer distance -> winner
25 _cand("repoC", "other", 0.30),
26 ]
27 out = collapse_forks(cands)
28 keys = {c.key for c in out}
29 assert keys == {"repoB", "repoC"}
30 assert len(out) == 2