Monorepo for Tangled tangled.org
11

Configure Feed

Select the types of activity you want to include in your feed.

1package repos_indexer 2 3import ( 4 "context" 5 "os" 6 "testing" 7 8 "github.com/blevesearch/bleve/v2" 9 "github.com/stretchr/testify/assert" 10 "github.com/stretchr/testify/require" 11 "tangled.org/core/appview/models" 12 "tangled.org/core/appview/pagination" 13) 14 15func setupTestIndexer(t *testing.T) (*Indexer, func()) { 16 t.Helper() 17 18 tmpDir, err := os.MkdirTemp("", "repo_indexer_test") 19 require.NoError(t, err) 20 21 ix := NewIndexer(tmpDir) 22 23 mapping, err := generateRepoIndexMapping() 24 require.NoError(t, err) 25 26 indexer, err := bleve.New(tmpDir, mapping) 27 require.NoError(t, err) 28 ix.indexer = indexer 29 30 cleanup := func() { 31 ix.indexer.Close() 32 os.RemoveAll(tmpDir) 33 } 34 35 return ix, cleanup 36} 37 38func TestBasicIndexingAndSearch(t *testing.T) { 39 ix, cleanup := setupTestIndexer(t) 40 defer cleanup() 41 42 ctx := context.Background() 43 44 err := ix.Index(ctx, 45 models.Repo{ 46 Id: 1, 47 Did: "did:plc:alice", 48 Name: "web-framework", 49 Knot: "example.com", 50 Description: "A modern web framework for Go", 51 Website: "https://example.com/web-framework", 52 Topics: []string{"web", "framework", "golang"}, 53 RepoStats: &models.RepoStats{Language: "Go"}, 54 }, 55 models.Repo{ 56 Id: 2, 57 Did: "did:plc:bob", 58 Name: "cli-tool", 59 Knot: "example.com", 60 Description: "Command line utility for developers", 61 Website: "", 62 Topics: []string{"cli", "tool"}, 63 RepoStats: &models.RepoStats{Language: "Rust"}, 64 }, 65 models.Repo{ 66 Id: 3, 67 Did: "did:plc:alice", 68 Name: "javascript-parser", 69 Knot: "example.com", 70 Description: "Fast JavaScript parser", 71 Website: "", 72 Topics: []string{"javascript", "parser"}, 73 RepoStats: &models.RepoStats{Language: "JavaScript"}, 74 }, 75 ) 76 require.NoError(t, err) 77 78 // search by name 79 result, err := ix.Search(ctx, models.RepoSearchOptions{ 80 Keywords: []string{"framework"}, 81 Page: pagination.Page{Limit: 10}, 82 }) 83 require.NoError(t, err) 84 assert.Equal(t, uint64(1), result.Total) 85 assert.Contains(t, result.Hits, int64(1)) 86 87 // search by description 88 result, err = ix.Search(ctx, models.RepoSearchOptions{ 89 Keywords: []string{"utility"}, 90 Page: pagination.Page{Limit: 10}, 91 }) 92 require.NoError(t, err) 93 assert.Equal(t, uint64(1), result.Total) 94 assert.Contains(t, result.Hits, int64(2)) 95 96 // search by website 97 result, err = ix.Search(ctx, models.RepoSearchOptions{ 98 Keywords: []string{"example.com/web-framework"}, 99 Page: pagination.Page{Limit: 10}, 100 }) 101 require.NoError(t, err) 102 assert.Equal(t, uint64(1), result.Total) 103 assert.Contains(t, result.Hits, int64(1)) 104} 105 106func TestLanguageFiltering(t *testing.T) { 107 ix, cleanup := setupTestIndexer(t) 108 defer cleanup() 109 110 ctx := context.Background() 111 112 err := ix.Index(ctx, 113 models.Repo{ 114 Id: 1, 115 Did: "did:plc:alice", 116 Name: "go-project", 117 RepoStats: &models.RepoStats{Language: "Go"}, 118 }, 119 models.Repo{ 120 Id: 2, 121 Did: "did:plc:bob", 122 Name: "rust-project", 123 RepoStats: &models.RepoStats{Language: "Rust"}, 124 }, 125 models.Repo{ 126 Id: 3, 127 Did: "did:plc:alice", 128 Name: "another-go-project", 129 RepoStats: &models.RepoStats{Language: "Go"}, 130 }, 131 ) 132 require.NoError(t, err) 133 134 // filter by go language 135 result, err := ix.Search(ctx, models.RepoSearchOptions{ 136 Language: "Go", 137 Page: pagination.Page{Limit: 10}, 138 }) 139 require.NoError(t, err) 140 assert.Equal(t, uint64(2), result.Total) 141 assert.Contains(t, result.Hits, int64(1)) 142 assert.Contains(t, result.Hits, int64(3)) 143 144 // filter by rust language 145 result, err = ix.Search(ctx, models.RepoSearchOptions{ 146 Language: "Rust", 147 Page: pagination.Page{Limit: 10}, 148 }) 149 require.NoError(t, err) 150 assert.Equal(t, uint64(1), result.Total) 151 assert.Contains(t, result.Hits, int64(2)) 152} 153 154func TestTopicExactMatching(t *testing.T) { 155 ix, cleanup := setupTestIndexer(t) 156 defer cleanup() 157 158 ctx := context.Background() 159 160 err := ix.Index(ctx, 161 models.Repo{ 162 Id: 1, 163 Did: "did:plc:alice", 164 Name: "js-tool", 165 Topics: []string{"javascript", "tool"}, 166 RepoStats: &models.RepoStats{}, 167 }, 168 models.Repo{ 169 Id: 2, 170 Did: "did:plc:bob", 171 Name: "java-app", 172 Topics: []string{"java", "application"}, 173 RepoStats: &models.RepoStats{}, 174 }, 175 models.Repo{ 176 Id: 3, 177 Did: "did:plc:alice", 178 Name: "cli-tool", 179 Topics: []string{"cli", "tool"}, 180 RepoStats: &models.RepoStats{}, 181 }, 182 ) 183 require.NoError(t, err) 184 185 // exact match for "javascript" topic 186 result, err := ix.Search(ctx, models.RepoSearchOptions{ 187 Topics: []string{"javascript"}, 188 Page: pagination.Page{Limit: 10}, 189 }) 190 require.NoError(t, err) 191 assert.Equal(t, uint64(1), result.Total) 192 assert.Contains(t, result.Hits, int64(1)) 193 194 // exact match for "tool" topic (should match repos 1 and 3) 195 result, err = ix.Search(ctx, models.RepoSearchOptions{ 196 Topics: []string{"tool"}, 197 Page: pagination.Page{Limit: 10}, 198 }) 199 require.NoError(t, err) 200 assert.Equal(t, uint64(2), result.Total) 201 assert.Contains(t, result.Hits, int64(1)) 202 assert.Contains(t, result.Hits, int64(3)) 203} 204 205func TestTopicTextSearch(t *testing.T) { 206 ix, cleanup := setupTestIndexer(t) 207 defer cleanup() 208 209 ctx := context.Background() 210 211 err := ix.Index(ctx, 212 models.Repo{ 213 Id: 1, 214 Did: "did:plc:alice", 215 Name: "js-tool", 216 Topics: []string{"JavaScript"}, 217 RepoStats: &models.RepoStats{}, 218 }, 219 models.Repo{ 220 Id: 2, 221 Did: "did:plc:bob", 222 Name: "java-app", 223 Topics: []string{"Java"}, 224 RepoStats: &models.RepoStats{}, 225 }, 226 ) 227 require.NoError(t, err) 228 229 result, err := ix.Search(ctx, models.RepoSearchOptions{ 230 Keywords: []string{"Java"}, 231 Page: pagination.Page{Limit: 10}, 232 }) 233 require.NoError(t, err) 234 assert.Equal(t, uint64(2), result.Total) 235 assert.Contains(t, result.Hits, int64(1)) 236 assert.Contains(t, result.Hits, int64(2)) 237} 238 239func TestNegatedFilters(t *testing.T) { 240 ix, cleanup := setupTestIndexer(t) 241 defer cleanup() 242 243 ctx := context.Background() 244 245 err := ix.Index(ctx, 246 models.Repo{ 247 Id: 1, 248 Did: "did:plc:alice", 249 Name: "active-project", 250 Description: "An active development project", 251 Topics: []string{"active"}, 252 RepoStats: &models.RepoStats{Language: "Go"}, 253 }, 254 models.Repo{ 255 Id: 2, 256 Did: "did:plc:bob", 257 Name: "archived-project", 258 Description: "An archived project", 259 Topics: []string{"archived"}, 260 RepoStats: &models.RepoStats{Language: "Python"}, 261 }, 262 models.Repo{ 263 Id: 3, 264 Did: "did:plc:alice", 265 Name: "another-project", 266 Description: "Another active project", 267 Topics: []string{"active"}, 268 RepoStats: &models.RepoStats{Language: "Go"}, 269 }, 270 ) 271 require.NoError(t, err) 272 273 // exclude archived topic 274 result, err := ix.Search(ctx, models.RepoSearchOptions{ 275 NegatedTopics: []string{"archived"}, 276 Page: pagination.Page{Limit: 10}, 277 }) 278 require.NoError(t, err) 279 assert.Equal(t, uint64(2), result.Total) 280 assert.Contains(t, result.Hits, int64(1)) 281 assert.Contains(t, result.Hits, int64(3)) 282 283 // exclude keyword "archived" 284 result, err = ix.Search(ctx, models.RepoSearchOptions{ 285 NegatedKeywords: []string{"archived"}, 286 Page: pagination.Page{Limit: 10}, 287 }) 288 require.NoError(t, err) 289 assert.Equal(t, uint64(2), result.Total) 290 assert.Contains(t, result.Hits, int64(1)) 291 assert.Contains(t, result.Hits, int64(3)) 292 293 // exclude phrase 294 result, err = ix.Search(ctx, models.RepoSearchOptions{ 295 NegatedPhrases: []string{"archived project"}, 296 Page: pagination.Page{Limit: 10}, 297 }) 298 require.NoError(t, err) 299 assert.Equal(t, uint64(2), result.Total) 300 assert.Contains(t, result.Hits, int64(1)) 301 assert.Contains(t, result.Hits, int64(3)) 302} 303 304func TestPagination(t *testing.T) { 305 ix, cleanup := setupTestIndexer(t) 306 defer cleanup() 307 308 ctx := context.Background() 309 310 // index multiple repos 311 var repos []models.Repo 312 for i := 1; i <= 25; i++ { 313 repos = append(repos, models.Repo{ 314 Id: int64(i), 315 Did: "did:plc:alice", 316 Name: "project", 317 Topics: []string{"test"}, 318 RepoStats: &models.RepoStats{}, 319 }) 320 } 321 err := ix.Index(ctx, repos...) 322 require.NoError(t, err) 323 324 // first page 325 result, err := ix.Search(ctx, models.RepoSearchOptions{ 326 Topics: []string{"test"}, 327 Page: pagination.Page{Limit: 10, Offset: 0}, 328 }) 329 require.NoError(t, err) 330 assert.Equal(t, uint64(25), result.Total) 331 assert.Len(t, result.Hits, 10) 332 333 // second page 334 result, err = ix.Search(ctx, models.RepoSearchOptions{ 335 Topics: []string{"test"}, 336 Page: pagination.Page{Limit: 10, Offset: 10}, 337 }) 338 require.NoError(t, err) 339 assert.Equal(t, uint64(25), result.Total) 340 assert.Len(t, result.Hits, 10) 341 342 // third page - 5 items 343 result, err = ix.Search(ctx, models.RepoSearchOptions{ 344 Topics: []string{"test"}, 345 Page: pagination.Page{Limit: 10, Offset: 20}, 346 }) 347 require.NoError(t, err) 348 assert.Equal(t, uint64(25), result.Total) 349 assert.Len(t, result.Hits, 5) 350} 351 352func TestUpdateReindex(t *testing.T) { 353 ix, cleanup := setupTestIndexer(t) 354 defer cleanup() 355 356 ctx := context.Background() 357 358 // initial index 359 err := ix.Index(ctx, models.Repo{ 360 Id: 1, 361 Did: "did:plc:alice", 362 Name: "my-project", 363 Description: "Initial description", 364 Topics: []string{"initial"}, 365 RepoStats: &models.RepoStats{Language: "Go"}, 366 }) 367 require.NoError(t, err) 368 369 // search for initial state 370 result, err := ix.Search(ctx, models.RepoSearchOptions{ 371 Keywords: []string{"Initial"}, 372 Page: pagination.Page{Limit: 10}, 373 }) 374 require.NoError(t, err) 375 assert.Equal(t, uint64(1), result.Total) 376 377 // update the repo 378 err = ix.Index(ctx, models.Repo{ 379 Id: 1, 380 Did: "did:plc:alice", 381 Name: "my-project", 382 Description: "Updated description", 383 Topics: []string{"updated"}, 384 RepoStats: &models.RepoStats{Language: "Rust"}, 385 }) 386 require.NoError(t, err) 387 388 // search for old description should return nothing 389 result, err = ix.Search(ctx, models.RepoSearchOptions{ 390 Keywords: []string{"Initial"}, 391 Page: pagination.Page{Limit: 10}, 392 }) 393 require.NoError(t, err) 394 assert.Equal(t, uint64(0), result.Total) 395 396 // search for new description should work 397 result, err = ix.Search(ctx, models.RepoSearchOptions{ 398 Keywords: []string{"Updated"}, 399 Page: pagination.Page{Limit: 10}, 400 }) 401 require.NoError(t, err) 402 assert.Equal(t, uint64(1), result.Total) 403 404 // language should be updated 405 result, err = ix.Search(ctx, models.RepoSearchOptions{ 406 Language: "Rust", 407 Page: pagination.Page{Limit: 10}, 408 }) 409 require.NoError(t, err) 410 assert.Equal(t, uint64(1), result.Total) 411} 412 413func TestEmptyResults(t *testing.T) { 414 ix, cleanup := setupTestIndexer(t) 415 defer cleanup() 416 417 ctx := context.Background() 418 419 err := ix.Index(ctx, models.Repo{ 420 Id: 1, 421 Did: "did:plc:alice", 422 Name: "my-project", 423 RepoStats: &models.RepoStats{}, 424 }) 425 require.NoError(t, err) 426 427 // search for non-existent keyword 428 result, err := ix.Search(ctx, models.RepoSearchOptions{ 429 Keywords: []string{"nonexistent"}, 430 Page: pagination.Page{Limit: 10}, 431 }) 432 require.NoError(t, err) 433 assert.Equal(t, uint64(0), result.Total) 434 assert.Empty(t, result.Hits) 435 436 // search for non-existent language 437 result, err = ix.Search(ctx, models.RepoSearchOptions{ 438 Language: "NonexistentLanguage", 439 Page: pagination.Page{Limit: 10}, 440 }) 441 require.NoError(t, err) 442 assert.Equal(t, uint64(0), result.Total) 443 assert.Empty(t, result.Hits) 444} 445 446func TestCombinedFilters(t *testing.T) { 447 ix, cleanup := setupTestIndexer(t) 448 defer cleanup() 449 450 ctx := context.Background() 451 452 err := ix.Index(ctx, 453 models.Repo{ 454 Id: 1, 455 Did: "did:plc:alice", 456 Name: "web-server", 457 Knot: "example.com", 458 Description: "A web server in Go", 459 Topics: []string{"web", "server"}, 460 RepoStats: &models.RepoStats{Language: "Go"}, 461 }, 462 models.Repo{ 463 Id: 2, 464 Did: "did:plc:bob", 465 Name: "web-client", 466 Knot: "example.org", 467 Description: "A web client in Rust", 468 Topics: []string{"web", "client"}, 469 RepoStats: &models.RepoStats{Language: "Rust"}, 470 }, 471 models.Repo{ 472 Id: 3, 473 Did: "did:plc:alice", 474 Name: "cli-tool", 475 Knot: "example.com", 476 Description: "A CLI tool in Go", 477 Topics: []string{"cli", "tool"}, 478 RepoStats: &models.RepoStats{Language: "Go"}, 479 }, 480 ) 481 require.NoError(t, err) 482 483 // combine language + topic + keyword 484 result, err := ix.Search(ctx, models.RepoSearchOptions{ 485 Language: "Go", 486 Topics: []string{"web"}, 487 Keywords: []string{"server"}, 488 Page: pagination.Page{Limit: 10}, 489 }) 490 require.NoError(t, err) 491 assert.Equal(t, uint64(1), result.Total) 492 assert.Contains(t, result.Hits, int64(1)) 493 494 // combine did + language 495 result, err = ix.Search(ctx, models.RepoSearchOptions{ 496 Did: "did:plc:alice", 497 Language: "Go", 498 Page: pagination.Page{Limit: 10}, 499 }) 500 require.NoError(t, err) 501 assert.Equal(t, uint64(2), result.Total) 502 assert.Contains(t, result.Hits, int64(1)) 503 assert.Contains(t, result.Hits, int64(3)) 504 505 // combine knot + language 506 result, err = ix.Search(ctx, models.RepoSearchOptions{ 507 Knot: "example.com", 508 Language: "Go", 509 Page: pagination.Page{Limit: 10}, 510 }) 511 require.NoError(t, err) 512 assert.Equal(t, uint64(2), result.Total) 513 assert.Contains(t, result.Hits, int64(1)) 514 assert.Contains(t, result.Hits, int64(3)) 515} 516 517func TestRepoWithoutLanguage(t *testing.T) { 518 ix, cleanup := setupTestIndexer(t) 519 defer cleanup() 520 521 ctx := context.Background() 522 523 err := ix.Index(ctx, 524 models.Repo{ 525 Id: 1, 526 Did: "did:plc:alice", 527 Name: "project-with-language", 528 RepoStats: &models.RepoStats{Language: "Go"}, 529 }, 530 models.Repo{ 531 Id: 2, 532 Did: "did:plc:bob", 533 Name: "project-without-language", 534 RepoStats: &models.RepoStats{Language: ""}, 535 }, 536 ) 537 require.NoError(t, err) 538 539 // search without language filter should return both 540 result, err := ix.Search(ctx, models.RepoSearchOptions{ 541 Keywords: []string{"project"}, 542 Page: pagination.Page{Limit: 10}, 543 }) 544 require.NoError(t, err) 545 assert.Equal(t, uint64(2), result.Total) 546 547 // language filter should only return repo with language 548 result, err = ix.Search(ctx, models.RepoSearchOptions{ 549 Language: "Go", 550 Page: pagination.Page{Limit: 10}, 551 }) 552 require.NoError(t, err) 553 assert.Equal(t, uint64(1), result.Total) 554 assert.Contains(t, result.Hits, int64(1)) 555} 556 557func TestRepoWithoutTopics(t *testing.T) { 558 ix, cleanup := setupTestIndexer(t) 559 defer cleanup() 560 561 ctx := context.Background() 562 563 err := ix.Index(ctx, 564 models.Repo{ 565 Id: 1, 566 Did: "did:plc:alice", 567 Name: "project-with-topics", 568 Topics: []string{"cli", "tool"}, 569 RepoStats: &models.RepoStats{}, 570 }, 571 models.Repo{ 572 Id: 2, 573 Did: "did:plc:bob", 574 Name: "project-without-topics", 575 Topics: []string{}, 576 RepoStats: &models.RepoStats{}, 577 }, 578 ) 579 require.NoError(t, err) 580 581 // topic filter should only return repo with topics 582 result, err := ix.Search(ctx, models.RepoSearchOptions{ 583 Topics: []string{"cli"}, 584 Page: pagination.Page{Limit: 10}, 585 }) 586 require.NoError(t, err) 587 assert.Equal(t, uint64(1), result.Total) 588 assert.Contains(t, result.Hits, int64(1)) 589 590 // general search should return both 591 result, err = ix.Search(ctx, models.RepoSearchOptions{ 592 Keywords: []string{"project"}, 593 Page: pagination.Page{Limit: 10}, 594 }) 595 require.NoError(t, err) 596 assert.Equal(t, uint64(2), result.Total) 597} 598 599func TestDelete(t *testing.T) { 600 ix, cleanup := setupTestIndexer(t) 601 defer cleanup() 602 603 ctx := context.Background() 604 605 err := ix.Index(ctx, 606 models.Repo{ 607 Id: 1, 608 Did: "did:plc:alice", 609 Name: "to-delete", 610 RepoStats: &models.RepoStats{}, 611 }, 612 models.Repo{ 613 Id: 2, 614 Did: "did:plc:bob", 615 Name: "to-keep", 616 RepoStats: &models.RepoStats{}, 617 }, 618 ) 619 require.NoError(t, err) 620 621 // verify both exist 622 result, err := ix.Search(ctx, models.RepoSearchOptions{ 623 Page: pagination.Page{Limit: 10}, 624 }) 625 require.NoError(t, err) 626 assert.Equal(t, uint64(2), result.Total) 627 628 // delete repo 1 629 err = ix.Delete(ctx, 1) 630 require.NoError(t, err) 631 632 // verify only one remains 633 result, err = ix.Search(ctx, models.RepoSearchOptions{ 634 Page: pagination.Page{Limit: 10}, 635 }) 636 require.NoError(t, err) 637 assert.Equal(t, uint64(1), result.Total) 638 assert.Contains(t, result.Hits, int64(2)) 639} 640 641func TestStarCountBoosting(t *testing.T) { 642 ix, cleanup := setupTestIndexer(t) 643 defer cleanup() 644 645 ctx := context.Background() 646 647 err := ix.Index(ctx, 648 models.Repo{ 649 Id: 1, 650 Did: "did:plc:alice", 651 Name: "repo", 652 Description: "testing", 653 RepoStats: &models.RepoStats{StarCount: 5000}, 654 }, 655 models.Repo{ 656 Id: 2, 657 Did: "did:plc:bob", 658 Name: "repo", 659 Description: "testing", 660 RepoStats: &models.RepoStats{StarCount: 150}, 661 }, 662 models.Repo{ 663 Id: 3, 664 Did: "did:plc:charlie", 665 Name: "repo", 666 Description: "testing", 667 RepoStats: &models.RepoStats{StarCount: 5}, 668 }, 669 models.Repo{ 670 Id: 4, 671 Did: "did:plc:dana", 672 Name: "repo", 673 Description: "testing", 674 RepoStats: &models.RepoStats{StarCount: 25}, 675 }, 676 ) 677 require.NoError(t, err) 678 679 // search for "testing" - should rank by star count when all else equal 680 result, err := ix.Search(ctx, models.RepoSearchOptions{ 681 Keywords: []string{"testing"}, 682 Page: pagination.Page{Limit: 10}, 683 }) 684 require.NoError(t, err) 685 assert.Equal(t, uint64(4), result.Total) 686 687 // verify that repos with more stars rank higher than those with fewer 688 popularIdx := -1 689 smallIdx := -1 690 for i, hit := range result.Hits { 691 if hit == 1 { // 5000 stars 692 popularIdx = i 693 } 694 if hit == 3 { // 5 stars 695 smallIdx = i 696 } 697 } 698 assert.True(t, popularIdx < smallIdx, "repo with 5000 stars should rank above repo with 5 stars") 699} 700 701func TestStarBoostingWithForkPenalty(t *testing.T) { 702 ix, cleanup := setupTestIndexer(t) 703 defer cleanup() 704 705 ctx := context.Background() 706 707 err := ix.Index(ctx, 708 models.Repo{ 709 Id: 1, 710 Did: "did:plc:alice", 711 Name: "original-popular", 712 Description: "test project", 713 Source: "", 714 RepoStats: &models.RepoStats{StarCount: 100}, 715 }, 716 models.Repo{ 717 Id: 2, 718 Did: "did:plc:bob", 719 Name: "fork-very-popular", 720 Description: "test project", 721 Source: "did:plc:someone/original", 722 RepoStats: &models.RepoStats{StarCount: 1000}, 723 }, 724 ) 725 require.NoError(t, err) 726 727 result, err := ix.Search(ctx, models.RepoSearchOptions{ 728 Keywords: []string{"project"}, 729 Page: pagination.Page{Limit: 10}, 730 }) 731 require.NoError(t, err) 732 733 // fork with 1000 stars (4.0x) vs non-fork with 100 stars (2.0 * 2.5 = 5.0x) 734 assert.Equal(t, int64(1), result.Hits[0], "non-fork with fewer stars can still rank higher due to combined boost") 735}