11import * as Sentry from '@sentry/node' ;
22import { PrismaClient , Repo , RepoIndexingJobStatus , RepoIndexingJobType } from "@sourcebot/db" ;
33import { createLogger , Logger } from "@sourcebot/logger" ;
4+ import { repoMetadataSchema , RepoIndexingJobMetadata , repoIndexingJobMetadataSchema , RepoMetadata } from '@sourcebot/shared' ;
45import { existsSync } from 'fs' ;
56import { readdir , rm } from 'fs/promises' ;
67import { Job , Queue , ReservedJob , Worker } from "groupmq" ;
78import { Redis } from 'ioredis' ;
9+ import micromatch from 'micromatch' ;
810import { INDEX_CACHE_DIR } from './constants.js' ;
911import { env } from './env.js' ;
10- import { cloneRepository , fetchRepository , isPathAValidGitRepoRoot , unsetGitConfig , upsertGitConfig } from './git.js' ;
12+ import { cloneRepository , fetchRepository , getBranches , getCommitHashForRefName , getTags , isPathAValidGitRepoRoot , unsetGitConfig , upsertGitConfig } from './git.js' ;
13+ import { captureEvent } from './posthog.js' ;
1114import { PromClient } from './promClient.js' ;
12- import { repoMetadataSchema , RepoWithConnections , Settings } from "./types.js" ;
15+ import { RepoWithConnections , Settings } from "./types.js" ;
1316import { getAuthCredentialsForRepo , getRepoPath , getShardPrefix , groupmqLifecycleExceptionWrapper , measure } from './utils.js' ;
1417import { indexGitRepository } from './zoekt.js' ;
1518
@@ -61,7 +64,7 @@ export class RepoIndexManager {
6164 concurrency : this . settings . maxRepoIndexingJobConcurrency ,
6265 ...( env . DEBUG_ENABLE_GROUPMQ_LOGGING === 'true' ? {
6366 logger : true ,
64- } : { } ) ,
67+ } : { } ) ,
6568 } ) ;
6669
6770 this . worker . on ( 'completed' , this . onJobCompleted . bind ( this ) ) ;
@@ -126,7 +129,7 @@ export class RepoIndexManager {
126129 {
127130 AND : [
128131 { status : RepoIndexingJobStatus . FAILED } ,
129- { completedAt : { gt : timeoutDate } } ,
132+ { completedAt : { gt : thresholdDate } } ,
130133 ]
131134 }
132135 ]
@@ -263,7 +266,16 @@ export class RepoIndexManager {
263266
264267 try {
265268 if ( jobType === RepoIndexingJobType . INDEX ) {
266- await this . indexRepository ( repo , logger , abortController . signal ) ;
269+ const revisions = await this . indexRepository ( repo , logger , abortController . signal ) ;
270+
271+ await this . db . repoIndexingJob . update ( {
272+ where : { id } ,
273+ data : {
274+ metadata : {
275+ indexedRevisions : revisions ,
276+ } satisfies RepoIndexingJobMetadata ,
277+ } ,
278+ } ) ;
267279 } else if ( jobType === RepoIndexingJobType . CLEANUP ) {
268280 await this . cleanupRepository ( repo , logger ) ;
269281 }
@@ -285,7 +297,7 @@ export class RepoIndexManager {
285297 // If the repo path exists but it is not a valid git repository root, this indicates
286298 // that the repository is in a bad state. To fix, we remove the directory and perform
287299 // a fresh clone.
288- if ( existsSync ( repoPath ) && ! ( await isPathAValidGitRepoRoot ( { path : repoPath } ) ) ) {
300+ if ( existsSync ( repoPath ) && ! ( await isPathAValidGitRepoRoot ( { path : repoPath } ) ) ) {
289301 const isValidGitRepo = await isPathAValidGitRepoRoot ( {
290302 path : repoPath ,
291303 signal,
@@ -354,10 +366,54 @@ export class RepoIndexManager {
354366 } ) ;
355367 }
356368
369+ let revisions = [
370+ 'HEAD'
371+ ] ;
372+
373+ if ( metadata . branches ) {
374+ const branchGlobs = metadata . branches
375+ const allBranches = await getBranches ( repoPath ) ;
376+ const matchingBranches =
377+ allBranches
378+ . filter ( ( branch ) => micromatch . isMatch ( branch , branchGlobs ) )
379+ . map ( ( branch ) => `refs/heads/${ branch } ` ) ;
380+
381+ revisions = [
382+ ...revisions ,
383+ ...matchingBranches
384+ ] ;
385+ }
386+
387+ if ( metadata . tags ) {
388+ const tagGlobs = metadata . tags ;
389+ const allTags = await getTags ( repoPath ) ;
390+ const matchingTags =
391+ allTags
392+ . filter ( ( tag ) => micromatch . isMatch ( tag , tagGlobs ) )
393+ . map ( ( tag ) => `refs/tags/${ tag } ` ) ;
394+
395+ revisions = [
396+ ...revisions ,
397+ ...matchingTags
398+ ] ;
399+ }
400+
401+ // zoekt has a limit of 64 branches/tags to index.
402+ if ( revisions . length > 64 ) {
403+ logger . warn ( `Too many revisions (${ revisions . length } ) for repo ${ repo . id } , truncating to 64` ) ;
404+ captureEvent ( 'backend_revisions_truncated' , {
405+ repoId : repo . id ,
406+ revisionCount : revisions . length ,
407+ } ) ;
408+ revisions = revisions . slice ( 0 , 64 ) ;
409+ }
410+
357411 logger . info ( `Indexing ${ repo . name } (id: ${ repo . id } )...` ) ;
358- const { durationMs } = await measure ( ( ) => indexGitRepository ( repo , this . settings , signal ) ) ;
412+ const { durationMs } = await measure ( ( ) => indexGitRepository ( repo , this . settings , revisions , signal ) ) ;
359413 const indexDuration_s = durationMs / 1000 ;
360414 logger . info ( `Indexed ${ repo . name } (id: ${ repo . id } ) in ${ indexDuration_s } s` ) ;
415+
416+ return revisions ;
361417 }
362418
363419 private async cleanupRepository ( repo : Repo , logger : Logger ) {
@@ -384,16 +440,32 @@ export class RepoIndexManager {
384440 data : {
385441 status : RepoIndexingJobStatus . COMPLETED ,
386442 completedAt : new Date ( ) ,
443+ } ,
444+ include : {
445+ repo : true ,
387446 }
388447 } ) ;
389448
390449 const jobTypeLabel = getJobTypePrometheusLabel ( jobData . type ) ;
391450
392451 if ( jobData . type === RepoIndexingJobType . INDEX ) {
452+ const { path : repoPath } = getRepoPath ( jobData . repo ) ;
453+ const commitHash = await getCommitHashForRefName ( {
454+ path : repoPath ,
455+ refName : 'HEAD' ,
456+ } ) ;
457+
458+ const jobMetadata = repoIndexingJobMetadataSchema . parse ( jobData . metadata ) ;
459+
393460 const repo = await this . db . repo . update ( {
394461 where : { id : jobData . repoId } ,
395462 data : {
396463 indexedAt : new Date ( ) ,
464+ indexedCommitHash : commitHash ,
465+ metadata : {
466+ ...( jobData . repo . metadata as RepoMetadata ) ,
467+ indexedRevisions : jobMetadata . indexedRevisions ,
468+ } satisfies RepoMetadata ,
397469 }
398470 } ) ;
399471
0 commit comments