This repository has no description
1"""Shared domain types for the recommendation pipeline.
2
3These are deliberately plain dataclasses so the pure stages (merge / dedup /
4rank) are trivially unit-testable without a database or network.
5"""
6
7from __future__ import annotations
8
9from dataclasses import dataclass, field
10
11
12@dataclass
13class Candidate:
14 """A recommended repo (or issue) accumulated across the user's seeds.
15
16 `distance` is the best (minimum) cosine distance seen for this candidate.
17 `seeds` records which of the user's seed repos surfaced it — its length is
18 the consensus signal (more seeds agreeing -> higher rank). `payload` holds
19 the raw DB row fields used later for shaping (name, owner_handle, etc.).
20 """
21
22 key: str # repo_did for repos; issue uri for issues
23 content_hash: str
24 distance: float
25 seeds: list[str] = field(default_factory=list)
26 primary_seed: str = "" # seed that gave the best (min) distance
27 payload: dict = field(default_factory=dict)
28
29 @property
30 def consensus(self) -> int:
31 return len(self.seeds)