This repository has no description
0

Configure Feed

Select the types of activity you want to include in your feed.

at main 1.6 kB View raw
1from __future__ import annotations 2 3import re 4from typing import Any 5from urllib.parse import urlencode 6 7import httpx 8 9APPVIEW_BASE = "https://tangled.org" 10SEARCH_PATH = "/search" 11 12# href="/owner/repo" — exclude site chrome and static assets 13REPO_HREF = re.compile(r'href="/([a-zA-Z0-9._-]+)/([a-zA-Z0-9._-]+)"') 14TOTAL_RE = re.compile(r"Returned\s+(\d+)\s+of\s+(\d+)", re.I) 15 16SKIP_OWNERS = frozenset( 17 { 18 "static", 19 "search", 20 "login", 21 "signup", 22 "explore", 23 "settings", 24 "blog", 25 "docs", 26 "brand", 27 "chat", 28 "pwa-manifest.json", 29 } 30) 31 32 33def parse_search_total(html: str) -> int | None: 34 match = TOTAL_RE.search(html) 35 if not match: 36 return None 37 return int(match.group(2)) 38 39 40def parse_repo_links(html: str) -> list[tuple[str, str]]: 41 seen: set[tuple[str, str]] = set() 42 out: list[tuple[str, str]] = [] 43 for owner, repo in REPO_HREF.findall(html): 44 if owner in SKIP_OWNERS or owner.endswith(".json"): 45 continue 46 key = (owner, repo) 47 if key not in seen: 48 seen.add(key) 49 out.append(key) 50 return out 51 52 53def fetch_search_page( 54 client: httpx.Client, 55 *, 56 offset: int = 0, 57 limit: int = 100, 58 sort: str = "newest", 59 query: str = "", 60) -> tuple[str, list[tuple[str, str]], int | None]: 61 params = {"q": query, "sort": sort, "offset": offset, "limit": limit} 62 url = f"{APPVIEW_BASE}{SEARCH_PATH}?{urlencode(params)}" 63 resp = client.get(url) 64 resp.raise_for_status() 65 html = resp.text 66 return html, parse_repo_links(html), parse_search_total(html)