This repository has no description
0

Configure Feed

Select the types of activity you want to include in your feed.

at main 5.7 kB View raw
1from __future__ import annotations 2 3import json 4from pathlib import Path 5 6import numpy as np 7import pytest 8 9from app.config import Settings 10from app.dedup import content_hash, row_content_hash 11from app.git_store import GitDataStore, load_git_store 12from app import db, recommend 13 14 15def _unit(v: list[float]) -> np.ndarray: 16 a = np.asarray(v, dtype=np.float32) 17 return a / np.linalg.norm(a) 18 19 20def _write_bundle(root: Path) -> None: 21 data = root / "data" 22 data.mkdir(parents=True) 23 repo_vecs = np.stack( 24 [ 25 _unit([1, 0, 0]), 26 _unit([0.9, 0.1, 0]), 27 _unit([0, 1, 0]), 28 ] 29 ) 30 issue_vecs = np.stack( 31 [ 32 _unit([0.95, 0.05, 0]), 33 _unit([0, 0.95, 0.05]), 34 ] 35 ) 36 np.save(data / "repos.f32.npy", repo_vecs) 37 np.save(data / "issues.f32.npy", issue_vecs) 38 39 repos = [ 40 { 41 "row": 0, 42 "subject_uri": "at://did:plc:alice/sh.tangled.repo/r1", 43 "repo_did": "did:repo:alice-r1", 44 "repo_name": "alice-r1", 45 "owner_handle": "alice", 46 "description": "Alice repo one", 47 "topics": ["nix"], 48 "created_at": "2026-01-01T00:00:00Z", 49 "content_len": 200, 50 "content_sha500": "aaa", 51 "embedding_model": "gemini-embedding-001", 52 "embedded_at": "2026-01-01T00:00:00Z", 53 }, 54 { 55 "row": 1, 56 "subject_uri": "at://did:plc:bob/sh.tangled.repo/r9", 57 "repo_did": "did:repo:bob-r9", 58 "repo_name": "bob-r9", 59 "owner_handle": "bob", 60 "description": "Bob similar repo", 61 "topics": ["cli"], 62 "created_at": "2026-01-02T00:00:00Z", 63 "content_len": 180, 64 "content_sha500": "bbb", 65 "embedding_model": "gemini-embedding-001", 66 "embedded_at": "2026-01-02T00:00:00Z", 67 }, 68 { 69 "row": 2, 70 "subject_uri": "at://did:plc:carol/sh.tangled.repo/web", 71 "repo_did": "did:repo:carol-web", 72 "repo_name": "web", 73 "owner_handle": "carol", 74 "description": "Different topic", 75 "topics": ["web"], 76 "created_at": "2026-01-03T00:00:00Z", 77 "content_len": 500, 78 "content_sha500": "ccc", 79 "embedding_model": "gemini-embedding-001", 80 "embedded_at": "2026-01-03T00:00:00Z", 81 }, 82 ] 83 issues = [ 84 { 85 "row": 0, 86 "subject_uri": "at://did:plc:bob/sh.tangled.repo.issue/i1", 87 "repo_did": "did:repo:bob-r9", 88 "rkey": "i1", 89 "repo_uri": "at://did:plc:bob/sh.tangled.repo/r9", 90 "author_did": "did:plc:other", 91 "title": "Fix CLI", 92 "body": "details", 93 "owner_handle": "bob", 94 "repo_name": "bob-r9", 95 "repo_description": "Bob similar repo", 96 "created_at": "2026-01-04T00:00:00Z", 97 "embedding_model": "gemini-embedding-001", 98 }, 99 { 100 "row": 1, 101 "subject_uri": "at://did:plc:carol/sh.tangled.repo.issue/i9", 102 "repo_did": "did:repo:carol-web", 103 "rkey": "i9", 104 "repo_uri": "at://did:plc:carol/sh.tangled.repo/web", 105 "author_did": "did:plc:carol", 106 "title": "Web thing", 107 "body": "body", 108 "owner_handle": "carol", 109 "repo_name": "web", 110 "repo_description": "Different topic", 111 "created_at": "2026-01-05T00:00:00Z", 112 "embedding_model": "gemini-embedding-001", 113 }, 114 ] 115 (data / "repos.jsonl").write_text( 116 "\n".join(json.dumps(r) for r in repos) + "\n", encoding="utf-8" 117 ) 118 (data / "issues.jsonl").write_text( 119 "\n".join(json.dumps(r) for r in issues) + "\n", encoding="utf-8" 120 ) 121 (root / "manifest.json").write_text( 122 json.dumps( 123 { 124 "model": "gemini-embedding-001", 125 "dim": 3, 126 "metric": "cosine", 127 "counts": {"repos": 3, "issues": 2}, 128 } 129 ), 130 encoding="utf-8", 131 ) 132 133 134@pytest.fixture() 135def git_bundle(tmp_path, monkeypatch): 136 root = tmp_path / "bundle" 137 _write_bundle(root) 138 monkeypatch.setenv("DATA_STORAGE", "git") 139 monkeypatch.setenv("REC_DATA_DIR", str(root)) 140 monkeypatch.delenv("REC_DATA_GIT_URL", raising=False) 141 from app.config import get_settings 142 143 get_settings.cache_clear() 144 load_git_store(get_settings()) 145 yield root 146 get_settings.cache_clear() 147 148 149def test_row_content_hash_prefers_sha500(): 150 assert row_content_hash({"content_sha500": "deadbeef", "content": "x"}) == "deadbeef" 151 assert content_hash("hello") == row_content_hash({"content": "hello"}) 152 153 154def test_git_store_load_and_knn(git_bundle): 155 store = GitDataStore.load_from_dir(git_bundle) 156 seeds = store.load_seeds("did:plc:alice", min_chars=120) 157 assert len(seeds) == 1 158 assert seeds[0]["repo_did"] == "did:repo:alice-r1" 159 160 hits = store.knn_repos(seeds[0]["etext"], ["did:repo:alice-r1"], limit=5, min_chars=120) 161 assert hits 162 assert hits[0]["repo_did"] == "did:repo:bob-r9" 163 assert hits[0]["distance"] < 0.2 164 165 166def test_git_recommend_end_to_end(git_bundle): 167 res = recommend.recommend("did:plc:alice") 168 assert res.profile.sources.tangled.repos == 1 169 assert res.repos 170 assert res.repos[0].name == "bob-r9" 171 assert res.issues 172 assert res.issues[0].issueUri.endswith("/i1") 173 174 175def test_db_dispatch_git_mode(git_bundle): 176 counts = db.embedding_counts() 177 assert counts["readmes_embedded"] == 3 178 assert db.ping() is True 179 assert db.get_questionnaire("at://x") is None