From d697cb2bc9238b932cddc60518f6ac0412cc3c7f Mon Sep 17 00:00:00 2001 From: ARUNAVO RAY Date: Wed, 18 Mar 2026 15:27:20 +0530 Subject: [PATCH] fix: prevent starred repo name collisions during concurrent mirroring (#236) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: prevent starred repo name collisions during concurrent mirroring (#95) When multiple starred repos share the same short name (e.g. alice/dotfiles and bob/dotfiles), concurrent batch mirroring could cause 409 Conflict errors because generateUniqueRepoName only checked Gitea via HTTP, missing repos that were claimed in the local DB but not yet created remotely. Three fixes: - Add DB-level check in generateUniqueRepoName so it queries the local repositories table for existing mirroredLocation claims, preventing two concurrent jobs from picking the same target name. - Clear mirroredLocation on failed mirror so a failed repo doesn't falsely hold a location that was never successfully created, which would block retries and confuse the uniqueness check. - Extract isMirroredLocationClaimedInDb helper for the DB lookup, using ne() to exclude the current repo's own record from the collision check. * fix: address review findings for starred repo name collision fix - Make generateUniqueRepoName immediately claim name by writing mirroredLocation to DB, closing the TOCTOU race window between name selection and the later status="mirroring" DB update - Add fullName validation guard (must contain "/") - Make isMirroredLocationClaimedInDb fail-closed (return true on DB error) to be conservative about preventing collisions - Scope mirroredLocation clear on failure to starred repos only, preserving it for non-starred repos that may have partially created in Gitea and need the location for recovery * fix: address P1/P2 review findings for starred repo name collision P1a: Remove early name claiming from generateUniqueRepoName to prevent stale claims on early return paths. The function now only checks availability — the actual claim happens at the status="mirroring" DB write (after both idempotency checks), which is protected by a new unique partial index. P1b: Add unique partial index on (userId, mirroredLocation) WHERE mirroredLocation != '' via migration 0010. This enforces atomicity at the DB level: if two concurrent workers try to claim the same name, the second gets a constraint violation rather than silently colliding. P2: Only clear mirroredLocation on failure if the Gitea migrate call itself failed (migrateSucceeded flag). If migrate succeeded but metadata mirroring failed, preserve the location since the repo physically exists in Gitea and we need it for recovery/retry. --- drizzle/0010_mirrored_location_index.sql | 9 ++ drizzle/meta/_journal.json | 7 ++ scripts/validate-migrations.ts | 27 +++++ src/lib/db/schema.ts | 1 + src/lib/gitea.ts | 146 +++++++++++++++++++---- 5 files changed, 166 insertions(+), 24 deletions(-) create mode 100644 drizzle/0010_mirrored_location_index.sql diff --git a/drizzle/0010_mirrored_location_index.sql b/drizzle/0010_mirrored_location_index.sql new file mode 100644 index 0000000..82ddbfb --- /dev/null +++ b/drizzle/0010_mirrored_location_index.sql @@ -0,0 +1,9 @@ +-- Add index for mirroredLocation lookups (used by name collision detection) +CREATE INDEX IF NOT EXISTS `idx_repositories_mirrored_location` ON `repositories` (`user_id`, `mirrored_location`); + +-- Add unique partial index to enforce that no two repos for the same user +-- can claim the same non-empty mirroredLocation. This prevents race conditions +-- during concurrent batch mirroring of starred repos with duplicate names. +CREATE UNIQUE INDEX IF NOT EXISTS `uniq_repositories_user_mirrored_location` + ON `repositories` (`user_id`, `mirrored_location`) + WHERE `mirrored_location` != ''; diff --git a/drizzle/meta/_journal.json b/drizzle/meta/_journal.json index 787ca46..9c3766f 100644 --- a/drizzle/meta/_journal.json +++ b/drizzle/meta/_journal.json @@ -71,6 +71,13 @@ "when": 1773542995732, "tag": "0009_nervous_tyger_tiger", "breakpoints": true + }, + { + "idx": 10, + "version": "6", + "when": 1774054800000, + "tag": "0010_mirrored_location_index", + "breakpoints": true } ] } \ No newline at end of file diff --git a/scripts/validate-migrations.ts b/scripts/validate-migrations.ts index 8432407..ceac3da 100644 --- a/scripts/validate-migrations.ts +++ b/scripts/validate-migrations.ts @@ -143,11 +143,38 @@ function verify0009Migration(db: Database) { assert(importedAtIndex?.name === "idx_repositories_user_imported_at", "Expected repositories imported_at index to exist after migration"); } +function seedPre0010Database(db: any) { + // Seed a repo row to verify index creation doesn't break existing data + seedPre0009Database(db); +} + +function verify0010Migration(db: any) { + // Verify the unique partial index exists by checking that two repos + // with the same non-empty mirroredLocation would conflict + const indexes = db.prepare( + "SELECT name FROM sqlite_master WHERE type='index' AND name='uniq_repositories_user_mirrored_location'" + ).all(); + if (indexes.length === 0) { + throw new Error("Missing unique partial index uniq_repositories_user_mirrored_location"); + } + + const lookupIdx = db.prepare( + "SELECT name FROM sqlite_master WHERE type='index' AND name='idx_repositories_mirrored_location'" + ).all(); + if (lookupIdx.length === 0) { + throw new Error("Missing lookup index idx_repositories_mirrored_location"); + } +} + const latestUpgradeFixtures: Record = { "0009_nervous_tyger_tiger": { seed: seedPre0009Database, verify: verify0009Migration, }, + "0010_mirrored_location_index": { + seed: seedPre0010Database, + verify: verify0010Migration, + }, }; function lintMigrations(selectedMigrations: Migration[]) { diff --git a/src/lib/db/schema.ts b/src/lib/db/schema.ts index fd64254..0637aab 100644 --- a/src/lib/db/schema.ts +++ b/src/lib/db/schema.ts @@ -418,6 +418,7 @@ export const repositories = sqliteTable("repositories", { index("idx_repositories_user_imported_at").on(table.userId, table.importedAt), uniqueIndex("uniq_repositories_user_full_name").on(table.userId, table.fullName), uniqueIndex("uniq_repositories_user_normalized_full_name").on(table.userId, table.normalizedFullName), + index("idx_repositories_mirrored_location").on(table.userId, table.mirroredLocation), ]); export const mirrorJobs = sqliteTable("mirror_jobs", { diff --git a/src/lib/gitea.ts b/src/lib/gitea.ts index ce4fa19..475315c 100644 --- a/src/lib/gitea.ts +++ b/src/lib/gitea.ts @@ -10,7 +10,7 @@ import type { Organization, Repository } from "./db/schema"; import { httpPost, httpGet, httpDelete, httpPut, httpPatch } from "./http-client"; import { createMirrorJob } from "./helpers"; import { db, organizations, repositories } from "./db"; -import { eq, and } from "drizzle-orm"; +import { eq, and, ne } from "drizzle-orm"; import { decryptConfigTokens } from "./utils/config-encryption"; import { formatDateShort } from "./utils"; import { @@ -586,6 +586,7 @@ export const mirrorGithubRepoToGitea = async ({ orgName: repoOwner, baseName: repository.name, githubOwner, + fullName: repository.fullName, strategy: config.githubConfig.starredDuplicateStrategy, }); @@ -825,6 +826,10 @@ export const mirrorGithubRepoToGitea = async ({ migratePayload.auth_token = decryptedConfig.githubConfig.token; } + // Track whether the Gitea migrate call succeeded so the catch block + // knows whether to clear mirroredLocation (only safe before migrate succeeds) + let migrateSucceeded = false; + const response = await httpPost( apiUrl, migratePayload, @@ -833,6 +838,8 @@ export const mirrorGithubRepoToGitea = async ({ } ); + migrateSucceeded = true; + await syncRepositoryMetadataToGitea({ config, octokit, @@ -1075,14 +1082,21 @@ export const mirrorGithubRepoToGitea = async ({ }` ); - // Mark repos as "failed" in DB + // Mark repos as "failed" in DB. Only clear mirroredLocation if the Gitea + // migrate call itself failed (repo doesn't exist in Gitea). If migrate + // succeeded but metadata mirroring failed, preserve the location since + // the repo physically exists and we need the location for recovery/retry. + const failureUpdate: Record = { + status: repoStatusEnum.parse("failed"), + updatedAt: new Date(), + errorMessage: error instanceof Error ? error.message : "Unknown error", + }; + if (!migrateSucceeded) { + failureUpdate.mirroredLocation = ""; + } await db .update(repositories) - .set({ - status: repoStatusEnum.parse("failed"), - updatedAt: new Date(), - errorMessage: error instanceof Error ? error.message : "Unknown error", - }) + .set(failureUpdate) .where(eq(repositories.id, repository.id!)); // Append log for failure @@ -1133,29 +1147,103 @@ export async function getOrCreateGiteaOrg({ } /** - * Generate a unique repository name for starred repos with duplicate names + * Check if a candidate mirroredLocation is already claimed by another repository + * in the local database. This prevents race conditions during concurrent batch + * mirroring where two repos could both claim the same name before either + * finishes creating in Gitea. + */ +async function isMirroredLocationClaimedInDb({ + userId, + candidateLocation, + excludeFullName, +}: { + userId: string; + candidateLocation: string; + excludeFullName: string; +}): Promise { + try { + const existing = await db + .select({ id: repositories.id }) + .from(repositories) + .where( + and( + eq(repositories.userId, userId), + eq(repositories.mirroredLocation, candidateLocation), + ne(repositories.fullName, excludeFullName) + ) + ) + .limit(1); + + return existing.length > 0; + } catch (error) { + console.error( + `Error checking DB for mirroredLocation "${candidateLocation}":`, + error + ); + // Fail-closed: assume claimed to be conservative and prevent collisions + return true; + } +} + +/** + * Generate a unique repository name for starred repos with duplicate names. + * Checks both the Gitea instance (HTTP) and the local DB (mirroredLocation) + * to reduce collisions during concurrent batch mirroring. + * + * NOTE: This function only checks availability — it does NOT claim the name. + * The actual claim happens later when mirroredLocation is written at the + * status="mirroring" DB update, which is protected by a unique partial index + * on (userId, mirroredLocation) WHERE mirroredLocation != ''. */ async function generateUniqueRepoName({ config, orgName, baseName, githubOwner, + fullName, strategy, }: { config: Partial; orgName: string; baseName: string; githubOwner: string; + fullName: string; strategy?: string; }): Promise { + if (!fullName?.includes("/")) { + throw new Error( + `Invalid fullName "${fullName}" for starred repo dedup — expected "owner/repo" format` + ); + } + const duplicateStrategy = strategy || "suffix"; + const userId = config.userId || ""; + + // Helper: check both Gitea and local DB for a candidate name + const isNameTaken = async (candidateName: string): Promise => { + const existsInGitea = await isRepoPresentInGitea({ + config, + owner: orgName, + repoName: candidateName, + }); + if (existsInGitea) return true; + + // Also check local DB to catch concurrent batch operations + // where another repo claimed this location but hasn't created it in Gitea yet + if (userId) { + const claimedInDb = await isMirroredLocationClaimedInDb({ + userId, + candidateLocation: `${orgName}/${candidateName}`, + excludeFullName: fullName, + }); + if (claimedInDb) return true; + } + + return false; + }; // First check if base name is available - const baseExists = await isRepoPresentInGitea({ - config, - owner: orgName, - repoName: baseName, - }); + const baseExists = await isNameTaken(baseName); if (!baseExists) { return baseName; @@ -1187,11 +1275,7 @@ async function generateUniqueRepoName({ break; } - const exists = await isRepoPresentInGitea({ - config, - owner: orgName, - repoName: candidateName, - }); + const exists = await isNameTaken(candidateName); if (!exists) { console.log(`Found unique name for duplicate starred repo: ${candidateName}`); @@ -1254,6 +1338,7 @@ export async function mirrorGitHubRepoToGiteaOrg({ orgName, baseName: repository.name, githubOwner, + fullName: repository.fullName, strategy: config.githubConfig.starredDuplicateStrategy, }); @@ -1421,6 +1506,8 @@ export async function mirrorGitHubRepoToGiteaOrg({ migratePayload.auth_token = decryptedConfig.githubConfig.token; } + let migrateSucceeded = false; + const migrateRes = await httpPost( apiUrl, migratePayload, @@ -1429,6 +1516,8 @@ export async function mirrorGitHubRepoToGiteaOrg({ } ); + migrateSucceeded = true; + await syncRepositoryMetadataToGitea({ config, octokit, @@ -1676,14 +1765,23 @@ export async function mirrorGitHubRepoToGiteaOrg({ error instanceof Error ? error.message : String(error) }` ); - // Mark repos as "failed" in DB + // Mark repos as "failed" in DB. For starred repos, clear mirroredLocation + // to release the name claim for retry. For non-starred repos, preserve it + // since the Gitea repo may partially exist and we need the location for recovery. + const failureUpdate2: Record = { + status: repoStatusEnum.parse("failed"), + updatedAt: new Date(), + errorMessage: error instanceof Error ? error.message : "Unknown error", + }; + // Only clear mirroredLocation if the Gitea migrate call itself failed. + // If migrate succeeded but metadata mirroring failed, preserve the + // location since the repo physically exists in Gitea. + if (!migrateSucceeded) { + failureUpdate2.mirroredLocation = ""; + } await db .update(repositories) - .set({ - status: repoStatusEnum.parse("failed"), - updatedAt: new Date(), - errorMessage: error instanceof Error ? error.message : "Unknown error", - }) + .set(failureUpdate2) .where(eq(repositories.id, repository.id!)); // Append log for failure