Skip to content

Commit 2d3b03b

Browse files
feat(web): Improved repository table (#572)
1 parent 4b86bcd commit 2d3b03b

File tree

29 files changed

+1456
-547
lines changed

29 files changed

+1456
-547
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
2525
- Improved homepage performance by removing client side polling. [#563](https://github.com/sourcebot-dev/sourcebot/pull/563)
2626
- Changed navbar indexing indicator to only report progress for first time indexing jobs. [#563](https://github.com/sourcebot-dev/sourcebot/pull/563)
2727
- Improved repo indexing job stability and robustness. [#563](https://github.com/sourcebot-dev/sourcebot/pull/563)
28+
- Improved repositories table. [#572](https://github.com/sourcebot-dev/sourcebot/pull/572)
2829

2930
### Removed
3031
- Removed spam "login page loaded" log. [#552](https://github.com/sourcebot-dev/sourcebot/pull/552)

packages/backend/src/constants.ts

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,6 @@
11
import { env } from "./env.js";
2-
import { Settings } from "./types.js";
32
import path from "path";
43

5-
/**
6-
* Default settings.
7-
*/
8-
export const DEFAULT_SETTINGS: Settings = {
9-
maxFileSize: 2 * 1024 * 1024, // 2MB in bytes
10-
maxTrigramCount: 20000,
11-
reindexIntervalMs: 1000 * 60 * 60, // 1 hour
12-
resyncConnectionIntervalMs: 1000 * 60 * 60 * 24, // 24 hours
13-
resyncConnectionPollingIntervalMs: 1000 * 1, // 1 second
14-
reindexRepoPollingIntervalMs: 1000 * 1, // 1 second
15-
maxConnectionSyncJobConcurrency: 8,
16-
maxRepoIndexingJobConcurrency: 8,
17-
maxRepoGarbageCollectionJobConcurrency: 8,
18-
repoGarbageCollectionGracePeriodMs: 10 * 1000, // 10 seconds
19-
repoIndexTimeoutMs: 1000 * 60 * 60 * 2, // 2 hours
20-
enablePublicAccess: false, // deprected, use FORCE_ENABLE_ANONYMOUS_ACCESS instead
21-
experiment_repoDrivenPermissionSyncIntervalMs: 1000 * 60 * 60 * 24, // 24 hours
22-
experiment_userDrivenPermissionSyncIntervalMs: 1000 * 60 * 60 * 24, // 24 hours
23-
}
24-
254
export const PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES = [
265
'github',
276
];

packages/backend/src/git.ts

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,4 +268,26 @@ export const getTags = async (path: string) => {
268268
const git = createGitClientForPath(path);
269269
const tags = await git.tags();
270270
return tags.all;
271+
}
272+
273+
export const getCommitHashForRefName = async ({
274+
path,
275+
refName,
276+
}: {
277+
path: string,
278+
refName: string,
279+
}) => {
280+
const git = createGitClientForPath(path);
281+
282+
try {
283+
// The `^{commit}` suffix is used to fully dereference the ref to a commit hash.
284+
const rev = await git.revparse(`${refName}^{commit}`);
285+
return rev;
286+
287+
// @note: Was hitting errors when the repository is empty,
288+
// so we're catching the error and returning undefined.
289+
} catch (error: unknown) {
290+
console.error(error);
291+
return undefined;
292+
}
271293
}

packages/backend/src/index.ts

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,12 @@ import "./instrument.js";
22

33
import { PrismaClient } from "@sourcebot/db";
44
import { createLogger } from "@sourcebot/logger";
5-
import { hasEntitlement, loadConfig } from '@sourcebot/shared';
5+
import { getConfigSettings, hasEntitlement } from '@sourcebot/shared';
66
import { existsSync } from 'fs';
77
import { mkdir } from 'fs/promises';
88
import { Redis } from 'ioredis';
99
import { ConnectionManager } from './connectionManager.js';
10-
import { DEFAULT_SETTINGS, INDEX_CACHE_DIR, REPOS_CACHE_DIR } from './constants.js';
10+
import { INDEX_CACHE_DIR, REPOS_CACHE_DIR } from './constants.js';
1111
import { RepoPermissionSyncer } from './ee/repoPermissionSyncer.js';
1212
import { UserPermissionSyncer } from "./ee/userPermissionSyncer.js";
1313
import { GithubAppManager } from "./ee/githubAppManager.js";
@@ -18,20 +18,6 @@ import { PromClient } from './promClient.js';
1818

1919
const logger = createLogger('backend-entrypoint');
2020

21-
const getSettings = async (configPath?: string) => {
22-
if (!configPath) {
23-
return DEFAULT_SETTINGS;
24-
}
25-
26-
const config = await loadConfig(configPath);
27-
28-
return {
29-
...DEFAULT_SETTINGS,
30-
...config.settings,
31-
}
32-
}
33-
34-
3521
const reposPath = REPOS_CACHE_DIR;
3622
const indexPath = INDEX_CACHE_DIR;
3723

@@ -57,8 +43,7 @@ redis.ping().then(() => {
5743

5844
const promClient = new PromClient();
5945

60-
const settings = await getSettings(env.CONFIG_PATH);
61-
46+
const settings = await getConfigSettings(env.CONFIG_PATH);
6247

6348
if (hasEntitlement('github-app')) {
6449
await GithubAppManager.getInstance().init(prisma);

packages/backend/src/repoCompileUtils.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,12 @@ import { marshalBool } from "./utils.js";
1313
import { createLogger } from '@sourcebot/logger';
1414
import { BitbucketConnectionConfig, GerritConnectionConfig, GiteaConnectionConfig, GitlabConnectionConfig, GenericGitHostConnectionConfig, AzureDevOpsConnectionConfig } from '@sourcebot/schemas/v3/connection.type';
1515
import { ProjectVisibility } from "azure-devops-node-api/interfaces/CoreInterfaces.js";
16-
import { RepoMetadata } from './types.js';
1716
import path from 'path';
1817
import { glob } from 'glob';
1918
import { getOriginUrl, isPathAValidGitRepoRoot, isUrlAValidGitRepo } from './git.js';
2019
import assert from 'assert';
2120
import GitUrlParse from 'git-url-parse';
21+
import { RepoMetadata } from '@sourcebot/shared';
2222

2323
export type RepoData = WithRequired<Prisma.RepoCreateInput, 'connections'>;
2424

packages/backend/src/repoIndexManager.ts

Lines changed: 79 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,18 @@
11
import * as Sentry from '@sentry/node';
22
import { PrismaClient, Repo, RepoIndexingJobStatus, RepoIndexingJobType } from "@sourcebot/db";
33
import { createLogger, Logger } from "@sourcebot/logger";
4+
import { repoMetadataSchema, RepoIndexingJobMetadata, repoIndexingJobMetadataSchema, RepoMetadata } from '@sourcebot/shared';
45
import { existsSync } from 'fs';
56
import { readdir, rm } from 'fs/promises';
67
import { Job, Queue, ReservedJob, Worker } from "groupmq";
78
import { Redis } from 'ioredis';
9+
import micromatch from 'micromatch';
810
import { INDEX_CACHE_DIR } from './constants.js';
911
import { env } from './env.js';
10-
import { cloneRepository, fetchRepository, isPathAValidGitRepoRoot, unsetGitConfig, upsertGitConfig } from './git.js';
12+
import { cloneRepository, fetchRepository, getBranches, getCommitHashForRefName, getTags, isPathAValidGitRepoRoot, unsetGitConfig, upsertGitConfig } from './git.js';
13+
import { captureEvent } from './posthog.js';
1114
import { PromClient } from './promClient.js';
12-
import { repoMetadataSchema, RepoWithConnections, Settings } from "./types.js";
15+
import { RepoWithConnections, Settings } from "./types.js";
1316
import { getAuthCredentialsForRepo, getRepoPath, getShardPrefix, groupmqLifecycleExceptionWrapper, measure } from './utils.js';
1417
import { indexGitRepository } from './zoekt.js';
1518

@@ -61,7 +64,7 @@ export class RepoIndexManager {
6164
concurrency: this.settings.maxRepoIndexingJobConcurrency,
6265
...(env.DEBUG_ENABLE_GROUPMQ_LOGGING === 'true' ? {
6366
logger: true,
64-
}: {}),
67+
} : {}),
6568
});
6669

6770
this.worker.on('completed', this.onJobCompleted.bind(this));
@@ -126,7 +129,7 @@ export class RepoIndexManager {
126129
{
127130
AND: [
128131
{ status: RepoIndexingJobStatus.FAILED },
129-
{ completedAt: { gt: timeoutDate } },
132+
{ completedAt: { gt: thresholdDate } },
130133
]
131134
}
132135
]
@@ -263,7 +266,16 @@ export class RepoIndexManager {
263266

264267
try {
265268
if (jobType === RepoIndexingJobType.INDEX) {
266-
await this.indexRepository(repo, logger, abortController.signal);
269+
const revisions = await this.indexRepository(repo, logger, abortController.signal);
270+
271+
await this.db.repoIndexingJob.update({
272+
where: { id },
273+
data: {
274+
metadata: {
275+
indexedRevisions: revisions,
276+
} satisfies RepoIndexingJobMetadata,
277+
},
278+
});
267279
} else if (jobType === RepoIndexingJobType.CLEANUP) {
268280
await this.cleanupRepository(repo, logger);
269281
}
@@ -285,7 +297,7 @@ export class RepoIndexManager {
285297
// If the repo path exists but it is not a valid git repository root, this indicates
286298
// that the repository is in a bad state. To fix, we remove the directory and perform
287299
// a fresh clone.
288-
if (existsSync(repoPath) && !(await isPathAValidGitRepoRoot( { path: repoPath } ))) {
300+
if (existsSync(repoPath) && !(await isPathAValidGitRepoRoot({ path: repoPath }))) {
289301
const isValidGitRepo = await isPathAValidGitRepoRoot({
290302
path: repoPath,
291303
signal,
@@ -354,10 +366,54 @@ export class RepoIndexManager {
354366
});
355367
}
356368

369+
let revisions = [
370+
'HEAD'
371+
];
372+
373+
if (metadata.branches) {
374+
const branchGlobs = metadata.branches
375+
const allBranches = await getBranches(repoPath);
376+
const matchingBranches =
377+
allBranches
378+
.filter((branch) => micromatch.isMatch(branch, branchGlobs))
379+
.map((branch) => `refs/heads/${branch}`);
380+
381+
revisions = [
382+
...revisions,
383+
...matchingBranches
384+
];
385+
}
386+
387+
if (metadata.tags) {
388+
const tagGlobs = metadata.tags;
389+
const allTags = await getTags(repoPath);
390+
const matchingTags =
391+
allTags
392+
.filter((tag) => micromatch.isMatch(tag, tagGlobs))
393+
.map((tag) => `refs/tags/${tag}`);
394+
395+
revisions = [
396+
...revisions,
397+
...matchingTags
398+
];
399+
}
400+
401+
// zoekt has a limit of 64 branches/tags to index.
402+
if (revisions.length > 64) {
403+
logger.warn(`Too many revisions (${revisions.length}) for repo ${repo.id}, truncating to 64`);
404+
captureEvent('backend_revisions_truncated', {
405+
repoId: repo.id,
406+
revisionCount: revisions.length,
407+
});
408+
revisions = revisions.slice(0, 64);
409+
}
410+
357411
logger.info(`Indexing ${repo.name} (id: ${repo.id})...`);
358-
const { durationMs } = await measure(() => indexGitRepository(repo, this.settings, signal));
412+
const { durationMs } = await measure(() => indexGitRepository(repo, this.settings, revisions, signal));
359413
const indexDuration_s = durationMs / 1000;
360414
logger.info(`Indexed ${repo.name} (id: ${repo.id}) in ${indexDuration_s}s`);
415+
416+
return revisions;
361417
}
362418

363419
private async cleanupRepository(repo: Repo, logger: Logger) {
@@ -384,16 +440,32 @@ export class RepoIndexManager {
384440
data: {
385441
status: RepoIndexingJobStatus.COMPLETED,
386442
completedAt: new Date(),
443+
},
444+
include: {
445+
repo: true,
387446
}
388447
});
389448

390449
const jobTypeLabel = getJobTypePrometheusLabel(jobData.type);
391450

392451
if (jobData.type === RepoIndexingJobType.INDEX) {
452+
const { path: repoPath } = getRepoPath(jobData.repo);
453+
const commitHash = await getCommitHashForRefName({
454+
path: repoPath,
455+
refName: 'HEAD',
456+
});
457+
458+
const jobMetadata = repoIndexingJobMetadataSchema.parse(jobData.metadata);
459+
393460
const repo = await this.db.repo.update({
394461
where: { id: jobData.repoId },
395462
data: {
396463
indexedAt: new Date(),
464+
indexedCommitHash: commitHash,
465+
metadata: {
466+
...(jobData.repo.metadata as RepoMetadata),
467+
indexedRevisions: jobMetadata.indexedRevisions,
468+
} satisfies RepoMetadata,
397469
}
398470
});
399471

packages/backend/src/types.ts

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,8 @@
11
import { Connection, Repo, RepoToConnection } from "@sourcebot/db";
22
import { Settings as SettingsSchema } from "@sourcebot/schemas/v3/index.type";
3-
import { z } from "zod";
43

54
export type Settings = Required<SettingsSchema>;
65

7-
// Structure of the `metadata` field in the `Repo` table.
8-
//
9-
// @WARNING: If you modify this schema, please make sure it is backwards
10-
// compatible with any prior versions of the schema!!
11-
// @NOTE: If you move this schema, please update the comment in schema.prisma
12-
// to point to the new location.
13-
export const repoMetadataSchema = z.object({
14-
/**
15-
* A set of key-value pairs that will be used as git config
16-
* variables when cloning the repo.
17-
* @see: https://git-scm.com/docs/git-clone#Documentation/git-clone.txt-code--configcodecodeltkeygtltvaluegtcode
18-
*/
19-
gitConfig: z.record(z.string(), z.string()).optional(),
20-
21-
/**
22-
* A list of branches to index. Glob patterns are supported.
23-
*/
24-
branches: z.array(z.string()).optional(),
25-
26-
/**
27-
* A list of tags to index. Glob patterns are supported.
28-
*/
29-
tags: z.array(z.string()).optional(),
30-
});
31-
32-
export type RepoMetadata = z.infer<typeof repoMetadataSchema>;
33-
346
// @see : https://stackoverflow.com/a/61132308
357
export type DeepPartial<T> = T extends object ? {
368
[P in keyof T]?: DeepPartial<T[P]>;

packages/backend/src/zoekt.ts

Lines changed: 4 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,62 +1,16 @@
11
import { Repo } from "@sourcebot/db";
22
import { createLogger } from "@sourcebot/logger";
33
import { exec } from "child_process";
4-
import micromatch from "micromatch";
54
import { INDEX_CACHE_DIR } from "./constants.js";
6-
import { getBranches, getTags } from "./git.js";
7-
import { captureEvent } from "./posthog.js";
8-
import { repoMetadataSchema, Settings } from "./types.js";
5+
import { Settings } from "./types.js";
96
import { getRepoPath, getShardPrefix } from "./utils.js";
107

118
const logger = createLogger('zoekt');
129

13-
export const indexGitRepository = async (repo: Repo, settings: Settings, signal?: AbortSignal) => {
14-
let revisions = [
15-
'HEAD'
16-
];
17-
10+
export const indexGitRepository = async (repo: Repo, settings: Settings, revisions: string[], signal?: AbortSignal) => {
1811
const { path: repoPath } = getRepoPath(repo);
1912
const shardPrefix = getShardPrefix(repo.orgId, repo.id);
20-
const metadata = repoMetadataSchema.parse(repo.metadata);
21-
22-
if (metadata.branches) {
23-
const branchGlobs = metadata.branches
24-
const allBranches = await getBranches(repoPath);
25-
const matchingBranches =
26-
allBranches
27-
.filter((branch) => micromatch.isMatch(branch, branchGlobs))
28-
.map((branch) => `refs/heads/${branch}`);
29-
30-
revisions = [
31-
...revisions,
32-
...matchingBranches
33-
];
34-
}
35-
36-
if (metadata.tags) {
37-
const tagGlobs = metadata.tags;
38-
const allTags = await getTags(repoPath);
39-
const matchingTags =
40-
allTags
41-
.filter((tag) => micromatch.isMatch(tag, tagGlobs))
42-
.map((tag) => `refs/tags/${tag}`);
4313

44-
revisions = [
45-
...revisions,
46-
...matchingTags
47-
];
48-
}
49-
50-
// zoekt has a limit of 64 branches/tags to index.
51-
if (revisions.length > 64) {
52-
logger.warn(`Too many revisions (${revisions.length}) for repo ${repo.id}, truncating to 64`);
53-
captureEvent('backend_revisions_truncated', {
54-
repoId: repo.id,
55-
revisionCount: revisions.length,
56-
});
57-
revisions = revisions.slice(0, 64);
58-
}
59-
6014
const command = [
6115
'zoekt-git-index',
6216
'-allow_missing_branches',
@@ -76,7 +30,7 @@ export const indexGitRepository = async (repo: Repo, settings: Settings, signal?
7630
reject(error);
7731
return;
7832
}
79-
33+
8034
if (stdout) {
8135
stdout.split('\n').filter(line => line.trim()).forEach(line => {
8236
logger.info(line);
@@ -89,7 +43,7 @@ export const indexGitRepository = async (repo: Repo, settings: Settings, signal?
8943
logger.info(line);
9044
});
9145
}
92-
46+
9347
resolve({
9448
stdout,
9549
stderr

0 commit comments

Comments
 (0)