Monorepo for Tangled
tangled.org
1package repos_indexer
2
3import (
4 "context"
5 "os"
6 "testing"
7
8 "github.com/blevesearch/bleve/v2"
9 "github.com/stretchr/testify/assert"
10 "github.com/stretchr/testify/require"
11 "tangled.org/core/appview/models"
12 "tangled.org/core/appview/pagination"
13)
14
15func setupTestIndexer(t *testing.T) (*Indexer, func()) {
16 t.Helper()
17
18 tmpDir, err := os.MkdirTemp("", "repo_indexer_test")
19 require.NoError(t, err)
20
21 ix := NewIndexer(tmpDir)
22
23 mapping, err := generateRepoIndexMapping()
24 require.NoError(t, err)
25
26 indexer, err := bleve.New(tmpDir, mapping)
27 require.NoError(t, err)
28 ix.indexer = indexer
29
30 cleanup := func() {
31 ix.indexer.Close()
32 os.RemoveAll(tmpDir)
33 }
34
35 return ix, cleanup
36}
37
38func TestBasicIndexingAndSearch(t *testing.T) {
39 ix, cleanup := setupTestIndexer(t)
40 defer cleanup()
41
42 ctx := context.Background()
43
44 err := ix.Index(ctx,
45 models.Repo{
46 Id: 1,
47 Did: "did:plc:alice",
48 Name: "web-framework",
49 Knot: "example.com",
50 Description: "A modern web framework for Go",
51 Website: "https://example.com/web-framework",
52 Topics: []string{"web", "framework", "golang"},
53 RepoStats: &models.RepoStats{Language: "Go"},
54 },
55 models.Repo{
56 Id: 2,
57 Did: "did:plc:bob",
58 Name: "cli-tool",
59 Knot: "example.com",
60 Description: "Command line utility for developers",
61 Website: "",
62 Topics: []string{"cli", "tool"},
63 RepoStats: &models.RepoStats{Language: "Rust"},
64 },
65 models.Repo{
66 Id: 3,
67 Did: "did:plc:alice",
68 Name: "javascript-parser",
69 Knot: "example.com",
70 Description: "Fast JavaScript parser",
71 Website: "",
72 Topics: []string{"javascript", "parser"},
73 RepoStats: &models.RepoStats{Language: "JavaScript"},
74 },
75 )
76 require.NoError(t, err)
77
78 // search by name
79 result, err := ix.Search(ctx, models.RepoSearchOptions{
80 Keywords: []string{"framework"},
81 Page: pagination.Page{Limit: 10},
82 })
83 require.NoError(t, err)
84 assert.Equal(t, uint64(1), result.Total)
85 assert.Contains(t, result.Hits, int64(1))
86
87 // search by description
88 result, err = ix.Search(ctx, models.RepoSearchOptions{
89 Keywords: []string{"utility"},
90 Page: pagination.Page{Limit: 10},
91 })
92 require.NoError(t, err)
93 assert.Equal(t, uint64(1), result.Total)
94 assert.Contains(t, result.Hits, int64(2))
95
96 // search by website
97 result, err = ix.Search(ctx, models.RepoSearchOptions{
98 Keywords: []string{"example.com/web-framework"},
99 Page: pagination.Page{Limit: 10},
100 })
101 require.NoError(t, err)
102 assert.Equal(t, uint64(1), result.Total)
103 assert.Contains(t, result.Hits, int64(1))
104}
105
106func TestLanguageFiltering(t *testing.T) {
107 ix, cleanup := setupTestIndexer(t)
108 defer cleanup()
109
110 ctx := context.Background()
111
112 err := ix.Index(ctx,
113 models.Repo{
114 Id: 1,
115 Did: "did:plc:alice",
116 Name: "go-project",
117 RepoStats: &models.RepoStats{Language: "Go"},
118 },
119 models.Repo{
120 Id: 2,
121 Did: "did:plc:bob",
122 Name: "rust-project",
123 RepoStats: &models.RepoStats{Language: "Rust"},
124 },
125 models.Repo{
126 Id: 3,
127 Did: "did:plc:alice",
128 Name: "another-go-project",
129 RepoStats: &models.RepoStats{Language: "Go"},
130 },
131 )
132 require.NoError(t, err)
133
134 // filter by go language
135 result, err := ix.Search(ctx, models.RepoSearchOptions{
136 Language: "Go",
137 Page: pagination.Page{Limit: 10},
138 })
139 require.NoError(t, err)
140 assert.Equal(t, uint64(2), result.Total)
141 assert.Contains(t, result.Hits, int64(1))
142 assert.Contains(t, result.Hits, int64(3))
143
144 // filter by rust language
145 result, err = ix.Search(ctx, models.RepoSearchOptions{
146 Language: "Rust",
147 Page: pagination.Page{Limit: 10},
148 })
149 require.NoError(t, err)
150 assert.Equal(t, uint64(1), result.Total)
151 assert.Contains(t, result.Hits, int64(2))
152}
153
154func TestTopicExactMatching(t *testing.T) {
155 ix, cleanup := setupTestIndexer(t)
156 defer cleanup()
157
158 ctx := context.Background()
159
160 err := ix.Index(ctx,
161 models.Repo{
162 Id: 1,
163 Did: "did:plc:alice",
164 Name: "js-tool",
165 Topics: []string{"javascript", "tool"},
166 RepoStats: &models.RepoStats{},
167 },
168 models.Repo{
169 Id: 2,
170 Did: "did:plc:bob",
171 Name: "java-app",
172 Topics: []string{"java", "application"},
173 RepoStats: &models.RepoStats{},
174 },
175 models.Repo{
176 Id: 3,
177 Did: "did:plc:alice",
178 Name: "cli-tool",
179 Topics: []string{"cli", "tool"},
180 RepoStats: &models.RepoStats{},
181 },
182 )
183 require.NoError(t, err)
184
185 // exact match for "javascript" topic
186 result, err := ix.Search(ctx, models.RepoSearchOptions{
187 Topics: []string{"javascript"},
188 Page: pagination.Page{Limit: 10},
189 })
190 require.NoError(t, err)
191 assert.Equal(t, uint64(1), result.Total)
192 assert.Contains(t, result.Hits, int64(1))
193
194 // exact match for "tool" topic (should match repos 1 and 3)
195 result, err = ix.Search(ctx, models.RepoSearchOptions{
196 Topics: []string{"tool"},
197 Page: pagination.Page{Limit: 10},
198 })
199 require.NoError(t, err)
200 assert.Equal(t, uint64(2), result.Total)
201 assert.Contains(t, result.Hits, int64(1))
202 assert.Contains(t, result.Hits, int64(3))
203}
204
205func TestTopicTextSearch(t *testing.T) {
206 ix, cleanup := setupTestIndexer(t)
207 defer cleanup()
208
209 ctx := context.Background()
210
211 err := ix.Index(ctx,
212 models.Repo{
213 Id: 1,
214 Did: "did:plc:alice",
215 Name: "js-tool",
216 Topics: []string{"JavaScript"},
217 RepoStats: &models.RepoStats{},
218 },
219 models.Repo{
220 Id: 2,
221 Did: "did:plc:bob",
222 Name: "java-app",
223 Topics: []string{"Java"},
224 RepoStats: &models.RepoStats{},
225 },
226 )
227 require.NoError(t, err)
228
229 result, err := ix.Search(ctx, models.RepoSearchOptions{
230 Keywords: []string{"Java"},
231 Page: pagination.Page{Limit: 10},
232 })
233 require.NoError(t, err)
234 assert.Equal(t, uint64(2), result.Total)
235 assert.Contains(t, result.Hits, int64(1))
236 assert.Contains(t, result.Hits, int64(2))
237}
238
239func TestNegatedFilters(t *testing.T) {
240 ix, cleanup := setupTestIndexer(t)
241 defer cleanup()
242
243 ctx := context.Background()
244
245 err := ix.Index(ctx,
246 models.Repo{
247 Id: 1,
248 Did: "did:plc:alice",
249 Name: "active-project",
250 Description: "An active development project",
251 Topics: []string{"active"},
252 RepoStats: &models.RepoStats{Language: "Go"},
253 },
254 models.Repo{
255 Id: 2,
256 Did: "did:plc:bob",
257 Name: "archived-project",
258 Description: "An archived project",
259 Topics: []string{"archived"},
260 RepoStats: &models.RepoStats{Language: "Python"},
261 },
262 models.Repo{
263 Id: 3,
264 Did: "did:plc:alice",
265 Name: "another-project",
266 Description: "Another active project",
267 Topics: []string{"active"},
268 RepoStats: &models.RepoStats{Language: "Go"},
269 },
270 )
271 require.NoError(t, err)
272
273 // exclude archived topic
274 result, err := ix.Search(ctx, models.RepoSearchOptions{
275 NegatedTopics: []string{"archived"},
276 Page: pagination.Page{Limit: 10},
277 })
278 require.NoError(t, err)
279 assert.Equal(t, uint64(2), result.Total)
280 assert.Contains(t, result.Hits, int64(1))
281 assert.Contains(t, result.Hits, int64(3))
282
283 // exclude keyword "archived"
284 result, err = ix.Search(ctx, models.RepoSearchOptions{
285 NegatedKeywords: []string{"archived"},
286 Page: pagination.Page{Limit: 10},
287 })
288 require.NoError(t, err)
289 assert.Equal(t, uint64(2), result.Total)
290 assert.Contains(t, result.Hits, int64(1))
291 assert.Contains(t, result.Hits, int64(3))
292
293 // exclude phrase
294 result, err = ix.Search(ctx, models.RepoSearchOptions{
295 NegatedPhrases: []string{"archived project"},
296 Page: pagination.Page{Limit: 10},
297 })
298 require.NoError(t, err)
299 assert.Equal(t, uint64(2), result.Total)
300 assert.Contains(t, result.Hits, int64(1))
301 assert.Contains(t, result.Hits, int64(3))
302}
303
304func TestPagination(t *testing.T) {
305 ix, cleanup := setupTestIndexer(t)
306 defer cleanup()
307
308 ctx := context.Background()
309
310 // index multiple repos
311 var repos []models.Repo
312 for i := 1; i <= 25; i++ {
313 repos = append(repos, models.Repo{
314 Id: int64(i),
315 Did: "did:plc:alice",
316 Name: "project",
317 Topics: []string{"test"},
318 RepoStats: &models.RepoStats{},
319 })
320 }
321 err := ix.Index(ctx, repos...)
322 require.NoError(t, err)
323
324 // first page
325 result, err := ix.Search(ctx, models.RepoSearchOptions{
326 Topics: []string{"test"},
327 Page: pagination.Page{Limit: 10, Offset: 0},
328 })
329 require.NoError(t, err)
330 assert.Equal(t, uint64(25), result.Total)
331 assert.Len(t, result.Hits, 10)
332
333 // second page
334 result, err = ix.Search(ctx, models.RepoSearchOptions{
335 Topics: []string{"test"},
336 Page: pagination.Page{Limit: 10, Offset: 10},
337 })
338 require.NoError(t, err)
339 assert.Equal(t, uint64(25), result.Total)
340 assert.Len(t, result.Hits, 10)
341
342 // third page - 5 items
343 result, err = ix.Search(ctx, models.RepoSearchOptions{
344 Topics: []string{"test"},
345 Page: pagination.Page{Limit: 10, Offset: 20},
346 })
347 require.NoError(t, err)
348 assert.Equal(t, uint64(25), result.Total)
349 assert.Len(t, result.Hits, 5)
350}
351
352func TestUpdateReindex(t *testing.T) {
353 ix, cleanup := setupTestIndexer(t)
354 defer cleanup()
355
356 ctx := context.Background()
357
358 // initial index
359 err := ix.Index(ctx, models.Repo{
360 Id: 1,
361 Did: "did:plc:alice",
362 Name: "my-project",
363 Description: "Initial description",
364 Topics: []string{"initial"},
365 RepoStats: &models.RepoStats{Language: "Go"},
366 })
367 require.NoError(t, err)
368
369 // search for initial state
370 result, err := ix.Search(ctx, models.RepoSearchOptions{
371 Keywords: []string{"Initial"},
372 Page: pagination.Page{Limit: 10},
373 })
374 require.NoError(t, err)
375 assert.Equal(t, uint64(1), result.Total)
376
377 // update the repo
378 err = ix.Index(ctx, models.Repo{
379 Id: 1,
380 Did: "did:plc:alice",
381 Name: "my-project",
382 Description: "Updated description",
383 Topics: []string{"updated"},
384 RepoStats: &models.RepoStats{Language: "Rust"},
385 })
386 require.NoError(t, err)
387
388 // search for old description should return nothing
389 result, err = ix.Search(ctx, models.RepoSearchOptions{
390 Keywords: []string{"Initial"},
391 Page: pagination.Page{Limit: 10},
392 })
393 require.NoError(t, err)
394 assert.Equal(t, uint64(0), result.Total)
395
396 // search for new description should work
397 result, err = ix.Search(ctx, models.RepoSearchOptions{
398 Keywords: []string{"Updated"},
399 Page: pagination.Page{Limit: 10},
400 })
401 require.NoError(t, err)
402 assert.Equal(t, uint64(1), result.Total)
403
404 // language should be updated
405 result, err = ix.Search(ctx, models.RepoSearchOptions{
406 Language: "Rust",
407 Page: pagination.Page{Limit: 10},
408 })
409 require.NoError(t, err)
410 assert.Equal(t, uint64(1), result.Total)
411}
412
413func TestEmptyResults(t *testing.T) {
414 ix, cleanup := setupTestIndexer(t)
415 defer cleanup()
416
417 ctx := context.Background()
418
419 err := ix.Index(ctx, models.Repo{
420 Id: 1,
421 Did: "did:plc:alice",
422 Name: "my-project",
423 RepoStats: &models.RepoStats{},
424 })
425 require.NoError(t, err)
426
427 // search for non-existent keyword
428 result, err := ix.Search(ctx, models.RepoSearchOptions{
429 Keywords: []string{"nonexistent"},
430 Page: pagination.Page{Limit: 10},
431 })
432 require.NoError(t, err)
433 assert.Equal(t, uint64(0), result.Total)
434 assert.Empty(t, result.Hits)
435
436 // search for non-existent language
437 result, err = ix.Search(ctx, models.RepoSearchOptions{
438 Language: "NonexistentLanguage",
439 Page: pagination.Page{Limit: 10},
440 })
441 require.NoError(t, err)
442 assert.Equal(t, uint64(0), result.Total)
443 assert.Empty(t, result.Hits)
444}
445
446func TestCombinedFilters(t *testing.T) {
447 ix, cleanup := setupTestIndexer(t)
448 defer cleanup()
449
450 ctx := context.Background()
451
452 err := ix.Index(ctx,
453 models.Repo{
454 Id: 1,
455 Did: "did:plc:alice",
456 Name: "web-server",
457 Knot: "example.com",
458 Description: "A web server in Go",
459 Topics: []string{"web", "server"},
460 RepoStats: &models.RepoStats{Language: "Go"},
461 },
462 models.Repo{
463 Id: 2,
464 Did: "did:plc:bob",
465 Name: "web-client",
466 Knot: "example.org",
467 Description: "A web client in Rust",
468 Topics: []string{"web", "client"},
469 RepoStats: &models.RepoStats{Language: "Rust"},
470 },
471 models.Repo{
472 Id: 3,
473 Did: "did:plc:alice",
474 Name: "cli-tool",
475 Knot: "example.com",
476 Description: "A CLI tool in Go",
477 Topics: []string{"cli", "tool"},
478 RepoStats: &models.RepoStats{Language: "Go"},
479 },
480 )
481 require.NoError(t, err)
482
483 // combine language + topic + keyword
484 result, err := ix.Search(ctx, models.RepoSearchOptions{
485 Language: "Go",
486 Topics: []string{"web"},
487 Keywords: []string{"server"},
488 Page: pagination.Page{Limit: 10},
489 })
490 require.NoError(t, err)
491 assert.Equal(t, uint64(1), result.Total)
492 assert.Contains(t, result.Hits, int64(1))
493
494 // combine did + language
495 result, err = ix.Search(ctx, models.RepoSearchOptions{
496 Did: "did:plc:alice",
497 Language: "Go",
498 Page: pagination.Page{Limit: 10},
499 })
500 require.NoError(t, err)
501 assert.Equal(t, uint64(2), result.Total)
502 assert.Contains(t, result.Hits, int64(1))
503 assert.Contains(t, result.Hits, int64(3))
504
505 // combine knot + language
506 result, err = ix.Search(ctx, models.RepoSearchOptions{
507 Knot: "example.com",
508 Language: "Go",
509 Page: pagination.Page{Limit: 10},
510 })
511 require.NoError(t, err)
512 assert.Equal(t, uint64(2), result.Total)
513 assert.Contains(t, result.Hits, int64(1))
514 assert.Contains(t, result.Hits, int64(3))
515}
516
517func TestRepoWithoutLanguage(t *testing.T) {
518 ix, cleanup := setupTestIndexer(t)
519 defer cleanup()
520
521 ctx := context.Background()
522
523 err := ix.Index(ctx,
524 models.Repo{
525 Id: 1,
526 Did: "did:plc:alice",
527 Name: "project-with-language",
528 RepoStats: &models.RepoStats{Language: "Go"},
529 },
530 models.Repo{
531 Id: 2,
532 Did: "did:plc:bob",
533 Name: "project-without-language",
534 RepoStats: &models.RepoStats{Language: ""},
535 },
536 )
537 require.NoError(t, err)
538
539 // search without language filter should return both
540 result, err := ix.Search(ctx, models.RepoSearchOptions{
541 Keywords: []string{"project"},
542 Page: pagination.Page{Limit: 10},
543 })
544 require.NoError(t, err)
545 assert.Equal(t, uint64(2), result.Total)
546
547 // language filter should only return repo with language
548 result, err = ix.Search(ctx, models.RepoSearchOptions{
549 Language: "Go",
550 Page: pagination.Page{Limit: 10},
551 })
552 require.NoError(t, err)
553 assert.Equal(t, uint64(1), result.Total)
554 assert.Contains(t, result.Hits, int64(1))
555}
556
557func TestRepoWithoutTopics(t *testing.T) {
558 ix, cleanup := setupTestIndexer(t)
559 defer cleanup()
560
561 ctx := context.Background()
562
563 err := ix.Index(ctx,
564 models.Repo{
565 Id: 1,
566 Did: "did:plc:alice",
567 Name: "project-with-topics",
568 Topics: []string{"cli", "tool"},
569 RepoStats: &models.RepoStats{},
570 },
571 models.Repo{
572 Id: 2,
573 Did: "did:plc:bob",
574 Name: "project-without-topics",
575 Topics: []string{},
576 RepoStats: &models.RepoStats{},
577 },
578 )
579 require.NoError(t, err)
580
581 // topic filter should only return repo with topics
582 result, err := ix.Search(ctx, models.RepoSearchOptions{
583 Topics: []string{"cli"},
584 Page: pagination.Page{Limit: 10},
585 })
586 require.NoError(t, err)
587 assert.Equal(t, uint64(1), result.Total)
588 assert.Contains(t, result.Hits, int64(1))
589
590 // general search should return both
591 result, err = ix.Search(ctx, models.RepoSearchOptions{
592 Keywords: []string{"project"},
593 Page: pagination.Page{Limit: 10},
594 })
595 require.NoError(t, err)
596 assert.Equal(t, uint64(2), result.Total)
597}
598
599func TestDelete(t *testing.T) {
600 ix, cleanup := setupTestIndexer(t)
601 defer cleanup()
602
603 ctx := context.Background()
604
605 err := ix.Index(ctx,
606 models.Repo{
607 Id: 1,
608 Did: "did:plc:alice",
609 Name: "to-delete",
610 RepoStats: &models.RepoStats{},
611 },
612 models.Repo{
613 Id: 2,
614 Did: "did:plc:bob",
615 Name: "to-keep",
616 RepoStats: &models.RepoStats{},
617 },
618 )
619 require.NoError(t, err)
620
621 // verify both exist
622 result, err := ix.Search(ctx, models.RepoSearchOptions{
623 Page: pagination.Page{Limit: 10},
624 })
625 require.NoError(t, err)
626 assert.Equal(t, uint64(2), result.Total)
627
628 // delete repo 1
629 err = ix.Delete(ctx, 1)
630 require.NoError(t, err)
631
632 // verify only one remains
633 result, err = ix.Search(ctx, models.RepoSearchOptions{
634 Page: pagination.Page{Limit: 10},
635 })
636 require.NoError(t, err)
637 assert.Equal(t, uint64(1), result.Total)
638 assert.Contains(t, result.Hits, int64(2))
639}
640
641func TestStarCountBoosting(t *testing.T) {
642 ix, cleanup := setupTestIndexer(t)
643 defer cleanup()
644
645 ctx := context.Background()
646
647 err := ix.Index(ctx,
648 models.Repo{
649 Id: 1,
650 Did: "did:plc:alice",
651 Name: "repo",
652 Description: "testing",
653 RepoStats: &models.RepoStats{StarCount: 5000},
654 },
655 models.Repo{
656 Id: 2,
657 Did: "did:plc:bob",
658 Name: "repo",
659 Description: "testing",
660 RepoStats: &models.RepoStats{StarCount: 150},
661 },
662 models.Repo{
663 Id: 3,
664 Did: "did:plc:charlie",
665 Name: "repo",
666 Description: "testing",
667 RepoStats: &models.RepoStats{StarCount: 5},
668 },
669 models.Repo{
670 Id: 4,
671 Did: "did:plc:dana",
672 Name: "repo",
673 Description: "testing",
674 RepoStats: &models.RepoStats{StarCount: 25},
675 },
676 )
677 require.NoError(t, err)
678
679 // search for "testing" - should rank by star count when all else equal
680 result, err := ix.Search(ctx, models.RepoSearchOptions{
681 Keywords: []string{"testing"},
682 Page: pagination.Page{Limit: 10},
683 })
684 require.NoError(t, err)
685 assert.Equal(t, uint64(4), result.Total)
686
687 // verify that repos with more stars rank higher than those with fewer
688 popularIdx := -1
689 smallIdx := -1
690 for i, hit := range result.Hits {
691 if hit == 1 { // 5000 stars
692 popularIdx = i
693 }
694 if hit == 3 { // 5 stars
695 smallIdx = i
696 }
697 }
698 assert.True(t, popularIdx < smallIdx, "repo with 5000 stars should rank above repo with 5 stars")
699}
700
701func TestStarBoostingWithForkPenalty(t *testing.T) {
702 ix, cleanup := setupTestIndexer(t)
703 defer cleanup()
704
705 ctx := context.Background()
706
707 err := ix.Index(ctx,
708 models.Repo{
709 Id: 1,
710 Did: "did:plc:alice",
711 Name: "original-popular",
712 Description: "test project",
713 Source: "",
714 RepoStats: &models.RepoStats{StarCount: 100},
715 },
716 models.Repo{
717 Id: 2,
718 Did: "did:plc:bob",
719 Name: "fork-very-popular",
720 Description: "test project",
721 Source: "did:plc:someone/original",
722 RepoStats: &models.RepoStats{StarCount: 1000},
723 },
724 )
725 require.NoError(t, err)
726
727 result, err := ix.Search(ctx, models.RepoSearchOptions{
728 Keywords: []string{"project"},
729 Page: pagination.Page{Limit: 10},
730 })
731 require.NoError(t, err)
732
733 // fork with 1000 stars (4.0x) vs non-fork with 100 stars (2.0 * 2.5 = 5.0x)
734 assert.Equal(t, int64(1), result.Hits[0], "non-fork with fewer stars can still rank higher due to combined boost")
735}