diff --git a/drizzle/0010_mirrored_location_index.sql b/drizzle/0010_mirrored_location_index.sql new file mode 100644 index 0000000..82ddbfb --- /dev/null +++ b/drizzle/0010_mirrored_location_index.sql @@ -0,0 +1,9 @@ +-- Add index for mirroredLocation lookups (used by name collision detection) +CREATE INDEX IF NOT EXISTS `idx_repositories_mirrored_location` ON `repositories` (`user_id`, `mirrored_location`); + +-- Add unique partial index to enforce that no two repos for the same user +-- can claim the same non-empty mirroredLocation. This prevents race conditions +-- during concurrent batch mirroring of starred repos with duplicate names. +CREATE UNIQUE INDEX IF NOT EXISTS `uniq_repositories_user_mirrored_location` + ON `repositories` (`user_id`, `mirrored_location`) + WHERE `mirrored_location` != ''; diff --git a/drizzle/meta/_journal.json b/drizzle/meta/_journal.json index 787ca46..9c3766f 100644 --- a/drizzle/meta/_journal.json +++ b/drizzle/meta/_journal.json @@ -71,6 +71,13 @@ "when": 1773542995732, "tag": "0009_nervous_tyger_tiger", "breakpoints": true + }, + { + "idx": 10, + "version": "6", + "when": 1774054800000, + "tag": "0010_mirrored_location_index", + "breakpoints": true } ] } \ No newline at end of file diff --git a/scripts/validate-migrations.ts b/scripts/validate-migrations.ts index 8432407..ceac3da 100644 --- a/scripts/validate-migrations.ts +++ b/scripts/validate-migrations.ts @@ -143,11 +143,38 @@ function verify0009Migration(db: Database) { assert(importedAtIndex?.name === "idx_repositories_user_imported_at", "Expected repositories imported_at index to exist after migration"); } +function seedPre0010Database(db: any) { + // Seed a repo row to verify index creation doesn't break existing data + seedPre0009Database(db); +} + +function verify0010Migration(db: any) { + // Verify the unique partial index exists by checking that two repos + // with the same non-empty mirroredLocation would conflict + const indexes = db.prepare( + "SELECT name FROM sqlite_master WHERE type='index' AND name='uniq_repositories_user_mirrored_location'" + ).all(); + if (indexes.length === 0) { + throw new Error("Missing unique partial index uniq_repositories_user_mirrored_location"); + } + + const lookupIdx = db.prepare( + "SELECT name FROM sqlite_master WHERE type='index' AND name='idx_repositories_mirrored_location'" + ).all(); + if (lookupIdx.length === 0) { + throw new Error("Missing lookup index idx_repositories_mirrored_location"); + } +} + const latestUpgradeFixtures: Record = { "0009_nervous_tyger_tiger": { seed: seedPre0009Database, verify: verify0009Migration, }, + "0010_mirrored_location_index": { + seed: seedPre0010Database, + verify: verify0010Migration, + }, }; function lintMigrations(selectedMigrations: Migration[]) { diff --git a/src/lib/db/schema.ts b/src/lib/db/schema.ts index fd64254..0637aab 100644 --- a/src/lib/db/schema.ts +++ b/src/lib/db/schema.ts @@ -418,6 +418,7 @@ export const repositories = sqliteTable("repositories", { index("idx_repositories_user_imported_at").on(table.userId, table.importedAt), uniqueIndex("uniq_repositories_user_full_name").on(table.userId, table.fullName), uniqueIndex("uniq_repositories_user_normalized_full_name").on(table.userId, table.normalizedFullName), + index("idx_repositories_mirrored_location").on(table.userId, table.mirroredLocation), ]); export const mirrorJobs = sqliteTable("mirror_jobs", { diff --git a/src/lib/gitea.ts b/src/lib/gitea.ts index ce4fa19..475315c 100644 --- a/src/lib/gitea.ts +++ b/src/lib/gitea.ts @@ -10,7 +10,7 @@ import type { Organization, Repository } from "./db/schema"; import { httpPost, httpGet, httpDelete, httpPut, httpPatch } from "./http-client"; import { createMirrorJob } from "./helpers"; import { db, organizations, repositories } from "./db"; -import { eq, and } from "drizzle-orm"; +import { eq, and, ne } from "drizzle-orm"; import { decryptConfigTokens } from "./utils/config-encryption"; import { formatDateShort } from "./utils"; import { @@ -586,6 +586,7 @@ export const mirrorGithubRepoToGitea = async ({ orgName: repoOwner, baseName: repository.name, githubOwner, + fullName: repository.fullName, strategy: config.githubConfig.starredDuplicateStrategy, }); @@ -825,6 +826,10 @@ export const mirrorGithubRepoToGitea = async ({ migratePayload.auth_token = decryptedConfig.githubConfig.token; } + // Track whether the Gitea migrate call succeeded so the catch block + // knows whether to clear mirroredLocation (only safe before migrate succeeds) + let migrateSucceeded = false; + const response = await httpPost( apiUrl, migratePayload, @@ -833,6 +838,8 @@ export const mirrorGithubRepoToGitea = async ({ } ); + migrateSucceeded = true; + await syncRepositoryMetadataToGitea({ config, octokit, @@ -1075,14 +1082,21 @@ export const mirrorGithubRepoToGitea = async ({ }` ); - // Mark repos as "failed" in DB + // Mark repos as "failed" in DB. Only clear mirroredLocation if the Gitea + // migrate call itself failed (repo doesn't exist in Gitea). If migrate + // succeeded but metadata mirroring failed, preserve the location since + // the repo physically exists and we need the location for recovery/retry. + const failureUpdate: Record = { + status: repoStatusEnum.parse("failed"), + updatedAt: new Date(), + errorMessage: error instanceof Error ? error.message : "Unknown error", + }; + if (!migrateSucceeded) { + failureUpdate.mirroredLocation = ""; + } await db .update(repositories) - .set({ - status: repoStatusEnum.parse("failed"), - updatedAt: new Date(), - errorMessage: error instanceof Error ? error.message : "Unknown error", - }) + .set(failureUpdate) .where(eq(repositories.id, repository.id!)); // Append log for failure @@ -1133,29 +1147,103 @@ export async function getOrCreateGiteaOrg({ } /** - * Generate a unique repository name for starred repos with duplicate names + * Check if a candidate mirroredLocation is already claimed by another repository + * in the local database. This prevents race conditions during concurrent batch + * mirroring where two repos could both claim the same name before either + * finishes creating in Gitea. + */ +async function isMirroredLocationClaimedInDb({ + userId, + candidateLocation, + excludeFullName, +}: { + userId: string; + candidateLocation: string; + excludeFullName: string; +}): Promise { + try { + const existing = await db + .select({ id: repositories.id }) + .from(repositories) + .where( + and( + eq(repositories.userId, userId), + eq(repositories.mirroredLocation, candidateLocation), + ne(repositories.fullName, excludeFullName) + ) + ) + .limit(1); + + return existing.length > 0; + } catch (error) { + console.error( + `Error checking DB for mirroredLocation "${candidateLocation}":`, + error + ); + // Fail-closed: assume claimed to be conservative and prevent collisions + return true; + } +} + +/** + * Generate a unique repository name for starred repos with duplicate names. + * Checks both the Gitea instance (HTTP) and the local DB (mirroredLocation) + * to reduce collisions during concurrent batch mirroring. + * + * NOTE: This function only checks availability — it does NOT claim the name. + * The actual claim happens later when mirroredLocation is written at the + * status="mirroring" DB update, which is protected by a unique partial index + * on (userId, mirroredLocation) WHERE mirroredLocation != ''. */ async function generateUniqueRepoName({ config, orgName, baseName, githubOwner, + fullName, strategy, }: { config: Partial; orgName: string; baseName: string; githubOwner: string; + fullName: string; strategy?: string; }): Promise { + if (!fullName?.includes("/")) { + throw new Error( + `Invalid fullName "${fullName}" for starred repo dedup — expected "owner/repo" format` + ); + } + const duplicateStrategy = strategy || "suffix"; + const userId = config.userId || ""; + + // Helper: check both Gitea and local DB for a candidate name + const isNameTaken = async (candidateName: string): Promise => { + const existsInGitea = await isRepoPresentInGitea({ + config, + owner: orgName, + repoName: candidateName, + }); + if (existsInGitea) return true; + + // Also check local DB to catch concurrent batch operations + // where another repo claimed this location but hasn't created it in Gitea yet + if (userId) { + const claimedInDb = await isMirroredLocationClaimedInDb({ + userId, + candidateLocation: `${orgName}/${candidateName}`, + excludeFullName: fullName, + }); + if (claimedInDb) return true; + } + + return false; + }; // First check if base name is available - const baseExists = await isRepoPresentInGitea({ - config, - owner: orgName, - repoName: baseName, - }); + const baseExists = await isNameTaken(baseName); if (!baseExists) { return baseName; @@ -1187,11 +1275,7 @@ async function generateUniqueRepoName({ break; } - const exists = await isRepoPresentInGitea({ - config, - owner: orgName, - repoName: candidateName, - }); + const exists = await isNameTaken(candidateName); if (!exists) { console.log(`Found unique name for duplicate starred repo: ${candidateName}`); @@ -1254,6 +1338,7 @@ export async function mirrorGitHubRepoToGiteaOrg({ orgName, baseName: repository.name, githubOwner, + fullName: repository.fullName, strategy: config.githubConfig.starredDuplicateStrategy, }); @@ -1421,6 +1506,8 @@ export async function mirrorGitHubRepoToGiteaOrg({ migratePayload.auth_token = decryptedConfig.githubConfig.token; } + let migrateSucceeded = false; + const migrateRes = await httpPost( apiUrl, migratePayload, @@ -1429,6 +1516,8 @@ export async function mirrorGitHubRepoToGiteaOrg({ } ); + migrateSucceeded = true; + await syncRepositoryMetadataToGitea({ config, octokit, @@ -1676,14 +1765,23 @@ export async function mirrorGitHubRepoToGiteaOrg({ error instanceof Error ? error.message : String(error) }` ); - // Mark repos as "failed" in DB + // Mark repos as "failed" in DB. For starred repos, clear mirroredLocation + // to release the name claim for retry. For non-starred repos, preserve it + // since the Gitea repo may partially exist and we need the location for recovery. + const failureUpdate2: Record = { + status: repoStatusEnum.parse("failed"), + updatedAt: new Date(), + errorMessage: error instanceof Error ? error.message : "Unknown error", + }; + // Only clear mirroredLocation if the Gitea migrate call itself failed. + // If migrate succeeded but metadata mirroring failed, preserve the + // location since the repo physically exists in Gitea. + if (!migrateSucceeded) { + failureUpdate2.mirroredLocation = ""; + } await db .update(repositories) - .set({ - status: repoStatusEnum.parse("failed"), - updatedAt: new Date(), - errorMessage: error instanceof Error ? error.message : "Unknown error", - }) + .set(failureUpdate2) .where(eq(repositories.id, repository.id!)); // Append log for failure