fix: prevent starred repo name collisions during concurrent mirroring (#236)

* fix: prevent starred repo name collisions during concurrent mirroring (#95)

When multiple starred repos share the same short name (e.g. alice/dotfiles
and bob/dotfiles), concurrent batch mirroring could cause 409 Conflict
errors because generateUniqueRepoName only checked Gitea via HTTP, missing
repos that were claimed in the local DB but not yet created remotely.

Three fixes:
- Add DB-level check in generateUniqueRepoName so it queries the local
  repositories table for existing mirroredLocation claims, preventing two
  concurrent jobs from picking the same target name.
- Clear mirroredLocation on failed mirror so a failed repo doesn't falsely
  hold a location that was never successfully created, which would block
  retries and confuse the uniqueness check.
- Extract isMirroredLocationClaimedInDb helper for the DB lookup, using
  ne() to exclude the current repo's own record from the collision check.

* fix: address review findings for starred repo name collision fix

- Make generateUniqueRepoName immediately claim name by writing
  mirroredLocation to DB, closing the TOCTOU race window between
  name selection and the later status="mirroring" DB update
- Add fullName validation guard (must contain "/")
- Make isMirroredLocationClaimedInDb fail-closed (return true on
  DB error) to be conservative about preventing collisions
- Scope mirroredLocation clear on failure to starred repos only,
  preserving it for non-starred repos that may have partially
  created in Gitea and need the location for recovery

* fix: address P1/P2 review findings for starred repo name collision

P1a: Remove early name claiming from generateUniqueRepoName to prevent
stale claims on early return paths. The function now only checks
availability — the actual claim happens at the status="mirroring" DB
write (after both idempotency checks), which is protected by a new
unique partial index.

P1b: Add unique partial index on (userId, mirroredLocation) WHERE
mirroredLocation != '' via migration 0010. This enforces atomicity at
the DB level: if two concurrent workers try to claim the same name,
the second gets a constraint violation rather than silently colliding.

P2: Only clear mirroredLocation on failure if the Gitea migrate call
itself failed (migrateSucceeded flag). If migrate succeeded but
metadata mirroring failed, preserve the location since the repo
physically exists in Gitea and we need it for recovery/retry.
This commit is contained in:
ARUNAVO RAY
2026-03-18 15:27:20 +05:30
committed by GitHub
parent ddd071f7e5
commit d697cb2bc9
5 changed files with 166 additions and 24 deletions

View File

@@ -0,0 +1,9 @@
-- Add index for mirroredLocation lookups (used by name collision detection)
CREATE INDEX IF NOT EXISTS `idx_repositories_mirrored_location` ON `repositories` (`user_id`, `mirrored_location`);
-- Add unique partial index to enforce that no two repos for the same user
-- can claim the same non-empty mirroredLocation. This prevents race conditions
-- during concurrent batch mirroring of starred repos with duplicate names.
CREATE UNIQUE INDEX IF NOT EXISTS `uniq_repositories_user_mirrored_location`
ON `repositories` (`user_id`, `mirrored_location`)
WHERE `mirrored_location` != '';

View File

@@ -71,6 +71,13 @@
"when": 1773542995732,
"tag": "0009_nervous_tyger_tiger",
"breakpoints": true
},
{
"idx": 10,
"version": "6",
"when": 1774054800000,
"tag": "0010_mirrored_location_index",
"breakpoints": true
}
]
}

View File

@@ -143,11 +143,38 @@ function verify0009Migration(db: Database) {
assert(importedAtIndex?.name === "idx_repositories_user_imported_at", "Expected repositories imported_at index to exist after migration");
}
function seedPre0010Database(db: any) {
// Seed a repo row to verify index creation doesn't break existing data
seedPre0009Database(db);
}
function verify0010Migration(db: any) {
// Verify the unique partial index exists by checking that two repos
// with the same non-empty mirroredLocation would conflict
const indexes = db.prepare(
"SELECT name FROM sqlite_master WHERE type='index' AND name='uniq_repositories_user_mirrored_location'"
).all();
if (indexes.length === 0) {
throw new Error("Missing unique partial index uniq_repositories_user_mirrored_location");
}
const lookupIdx = db.prepare(
"SELECT name FROM sqlite_master WHERE type='index' AND name='idx_repositories_mirrored_location'"
).all();
if (lookupIdx.length === 0) {
throw new Error("Missing lookup index idx_repositories_mirrored_location");
}
}
const latestUpgradeFixtures: Record<string, UpgradeFixture> = {
"0009_nervous_tyger_tiger": {
seed: seedPre0009Database,
verify: verify0009Migration,
},
"0010_mirrored_location_index": {
seed: seedPre0010Database,
verify: verify0010Migration,
},
};
function lintMigrations(selectedMigrations: Migration[]) {

View File

@@ -418,6 +418,7 @@ export const repositories = sqliteTable("repositories", {
index("idx_repositories_user_imported_at").on(table.userId, table.importedAt),
uniqueIndex("uniq_repositories_user_full_name").on(table.userId, table.fullName),
uniqueIndex("uniq_repositories_user_normalized_full_name").on(table.userId, table.normalizedFullName),
index("idx_repositories_mirrored_location").on(table.userId, table.mirroredLocation),
]);
export const mirrorJobs = sqliteTable("mirror_jobs", {

View File

@@ -10,7 +10,7 @@ import type { Organization, Repository } from "./db/schema";
import { httpPost, httpGet, httpDelete, httpPut, httpPatch } from "./http-client";
import { createMirrorJob } from "./helpers";
import { db, organizations, repositories } from "./db";
import { eq, and } from "drizzle-orm";
import { eq, and, ne } from "drizzle-orm";
import { decryptConfigTokens } from "./utils/config-encryption";
import { formatDateShort } from "./utils";
import {
@@ -586,6 +586,7 @@ export const mirrorGithubRepoToGitea = async ({
orgName: repoOwner,
baseName: repository.name,
githubOwner,
fullName: repository.fullName,
strategy: config.githubConfig.starredDuplicateStrategy,
});
@@ -825,6 +826,10 @@ export const mirrorGithubRepoToGitea = async ({
migratePayload.auth_token = decryptedConfig.githubConfig.token;
}
// Track whether the Gitea migrate call succeeded so the catch block
// knows whether to clear mirroredLocation (only safe before migrate succeeds)
let migrateSucceeded = false;
const response = await httpPost(
apiUrl,
migratePayload,
@@ -833,6 +838,8 @@ export const mirrorGithubRepoToGitea = async ({
}
);
migrateSucceeded = true;
await syncRepositoryMetadataToGitea({
config,
octokit,
@@ -1075,14 +1082,21 @@ export const mirrorGithubRepoToGitea = async ({
}`
);
// Mark repos as "failed" in DB
// Mark repos as "failed" in DB. Only clear mirroredLocation if the Gitea
// migrate call itself failed (repo doesn't exist in Gitea). If migrate
// succeeded but metadata mirroring failed, preserve the location since
// the repo physically exists and we need the location for recovery/retry.
const failureUpdate: Record<string, any> = {
status: repoStatusEnum.parse("failed"),
updatedAt: new Date(),
errorMessage: error instanceof Error ? error.message : "Unknown error",
};
if (!migrateSucceeded) {
failureUpdate.mirroredLocation = "";
}
await db
.update(repositories)
.set({
status: repoStatusEnum.parse("failed"),
updatedAt: new Date(),
errorMessage: error instanceof Error ? error.message : "Unknown error",
})
.set(failureUpdate)
.where(eq(repositories.id, repository.id!));
// Append log for failure
@@ -1133,29 +1147,103 @@ export async function getOrCreateGiteaOrg({
}
/**
* Generate a unique repository name for starred repos with duplicate names
* Check if a candidate mirroredLocation is already claimed by another repository
* in the local database. This prevents race conditions during concurrent batch
* mirroring where two repos could both claim the same name before either
* finishes creating in Gitea.
*/
async function isMirroredLocationClaimedInDb({
userId,
candidateLocation,
excludeFullName,
}: {
userId: string;
candidateLocation: string;
excludeFullName: string;
}): Promise<boolean> {
try {
const existing = await db
.select({ id: repositories.id })
.from(repositories)
.where(
and(
eq(repositories.userId, userId),
eq(repositories.mirroredLocation, candidateLocation),
ne(repositories.fullName, excludeFullName)
)
)
.limit(1);
return existing.length > 0;
} catch (error) {
console.error(
`Error checking DB for mirroredLocation "${candidateLocation}":`,
error
);
// Fail-closed: assume claimed to be conservative and prevent collisions
return true;
}
}
/**
* Generate a unique repository name for starred repos with duplicate names.
* Checks both the Gitea instance (HTTP) and the local DB (mirroredLocation)
* to reduce collisions during concurrent batch mirroring.
*
* NOTE: This function only checks availability — it does NOT claim the name.
* The actual claim happens later when mirroredLocation is written at the
* status="mirroring" DB update, which is protected by a unique partial index
* on (userId, mirroredLocation) WHERE mirroredLocation != ''.
*/
async function generateUniqueRepoName({
config,
orgName,
baseName,
githubOwner,
fullName,
strategy,
}: {
config: Partial<Config>;
orgName: string;
baseName: string;
githubOwner: string;
fullName: string;
strategy?: string;
}): Promise<string> {
if (!fullName?.includes("/")) {
throw new Error(
`Invalid fullName "${fullName}" for starred repo dedup — expected "owner/repo" format`
);
}
const duplicateStrategy = strategy || "suffix";
const userId = config.userId || "";
// Helper: check both Gitea and local DB for a candidate name
const isNameTaken = async (candidateName: string): Promise<boolean> => {
const existsInGitea = await isRepoPresentInGitea({
config,
owner: orgName,
repoName: candidateName,
});
if (existsInGitea) return true;
// Also check local DB to catch concurrent batch operations
// where another repo claimed this location but hasn't created it in Gitea yet
if (userId) {
const claimedInDb = await isMirroredLocationClaimedInDb({
userId,
candidateLocation: `${orgName}/${candidateName}`,
excludeFullName: fullName,
});
if (claimedInDb) return true;
}
return false;
};
// First check if base name is available
const baseExists = await isRepoPresentInGitea({
config,
owner: orgName,
repoName: baseName,
});
const baseExists = await isNameTaken(baseName);
if (!baseExists) {
return baseName;
@@ -1187,11 +1275,7 @@ async function generateUniqueRepoName({
break;
}
const exists = await isRepoPresentInGitea({
config,
owner: orgName,
repoName: candidateName,
});
const exists = await isNameTaken(candidateName);
if (!exists) {
console.log(`Found unique name for duplicate starred repo: ${candidateName}`);
@@ -1254,6 +1338,7 @@ export async function mirrorGitHubRepoToGiteaOrg({
orgName,
baseName: repository.name,
githubOwner,
fullName: repository.fullName,
strategy: config.githubConfig.starredDuplicateStrategy,
});
@@ -1421,6 +1506,8 @@ export async function mirrorGitHubRepoToGiteaOrg({
migratePayload.auth_token = decryptedConfig.githubConfig.token;
}
let migrateSucceeded = false;
const migrateRes = await httpPost(
apiUrl,
migratePayload,
@@ -1429,6 +1516,8 @@ export async function mirrorGitHubRepoToGiteaOrg({
}
);
migrateSucceeded = true;
await syncRepositoryMetadataToGitea({
config,
octokit,
@@ -1676,14 +1765,23 @@ export async function mirrorGitHubRepoToGiteaOrg({
error instanceof Error ? error.message : String(error)
}`
);
// Mark repos as "failed" in DB
// Mark repos as "failed" in DB. For starred repos, clear mirroredLocation
// to release the name claim for retry. For non-starred repos, preserve it
// since the Gitea repo may partially exist and we need the location for recovery.
const failureUpdate2: Record<string, any> = {
status: repoStatusEnum.parse("failed"),
updatedAt: new Date(),
errorMessage: error instanceof Error ? error.message : "Unknown error",
};
// Only clear mirroredLocation if the Gitea migrate call itself failed.
// If migrate succeeded but metadata mirroring failed, preserve the
// location since the repo physically exists in Gitea.
if (!migrateSucceeded) {
failureUpdate2.mirroredLocation = "";
}
await db
.update(repositories)
.set({
status: repoStatusEnum.parse("failed"),
updatedAt: new Date(),
errorMessage: error instanceof Error ? error.message : "Unknown error",
})
.set(failureUpdate2)
.where(eq(repositories.id, repository.id!));
// Append log for failure