Merge pull request #119 from RayLabsHQ/fix/duplicate-repos-issue-115

Fix/duplicate repos issue 115
This commit is contained in:
ARUNAVO RAY
2025-10-22 08:59:07 +05:30
committed by GitHub

View File

@@ -200,6 +200,96 @@ export const isRepoPresentInGitea = async ({
}
};
/**
* Check if a repository is currently being mirrored (in-progress state in database)
* This prevents race conditions where multiple concurrent operations try to mirror the same repo
*/
export const isRepoCurrentlyMirroring = async ({
config,
repoName,
expectedLocation,
}: {
config: Partial<Config>;
repoName: string;
expectedLocation?: string; // Format: "owner/repo"
}): Promise<boolean> => {
try {
if (!config.userId) {
return false;
}
const { or } = await import("drizzle-orm");
// Check database for any repository with "mirroring" or "syncing" status
const inProgressRepos = await db
.select()
.from(repositories)
.where(
and(
eq(repositories.userId, config.userId),
eq(repositories.name, repoName),
// Check for in-progress statuses
or(
eq(repositories.status, "mirroring"),
eq(repositories.status, "syncing")
)
)
);
if (inProgressRepos.length > 0) {
// Check if any of the in-progress repos are stale (stuck for > 2 hours)
const TWO_HOURS_MS = 2 * 60 * 60 * 1000;
const now = new Date().getTime();
const activeRepos = inProgressRepos.filter((repo) => {
if (!repo.updatedAt) return true; // No timestamp, assume active
const updatedTime = new Date(repo.updatedAt).getTime();
const isStale = (now - updatedTime) > TWO_HOURS_MS;
if (isStale) {
console.warn(
`[Idempotency] Repository ${repo.name} has been in "${repo.status}" status for over 2 hours. ` +
`Considering it stale and allowing retry.`
);
}
return !isStale;
});
if (activeRepos.length === 0) {
console.log(
`[Idempotency] All in-progress operations for ${repoName} are stale (>2h). Allowing retry.`
);
return false;
}
// If we have an expected location, verify it matches
if (expectedLocation) {
const matchingRepo = activeRepos.find(
(repo) => repo.mirroredLocation === expectedLocation
);
if (matchingRepo) {
console.log(
`[Idempotency] Repository ${repoName} is already being mirrored at ${expectedLocation}`
);
return true;
}
} else {
console.log(
`[Idempotency] Repository ${repoName} is already being mirrored (${activeRepos.length} in-progress operations found)`
);
return true;
}
}
return false;
} catch (error) {
console.error("Error checking if repo is currently mirroring:", error);
console.error("Error details:", error);
return false;
}
};
/**
* Helper function to check if a repository exists in Gitea.
* First checks the recorded mirroredLocation, then falls back to the expected location.
@@ -296,6 +386,23 @@ export const mirrorGithubRepoToGitea = async ({
}
}
// IDEMPOTENCY CHECK: Check if this repo is already being mirrored
const expectedLocation = `${repoOwner}/${targetRepoName}`;
const isCurrentlyMirroring = await isRepoCurrentlyMirroring({
config,
repoName: targetRepoName,
expectedLocation,
});
if (isCurrentlyMirroring) {
console.log(
`[Idempotency] Skipping ${repository.fullName} - already being mirrored to ${expectedLocation}`
);
// Don't throw an error, just return to allow other repos to continue
return;
}
const isExisting = await isRepoPresentInGitea({
config,
owner: repoOwner,
@@ -337,11 +444,30 @@ export const mirrorGithubRepoToGitea = async ({
console.log(`Mirroring repository ${repository.name}`);
// DOUBLE-CHECK: Final idempotency check right before updating status
// This catches race conditions in the small window between first check and status update
const finalCheck = await isRepoCurrentlyMirroring({
config,
repoName: targetRepoName,
expectedLocation,
});
if (finalCheck) {
console.log(
`[Idempotency] Race condition detected - ${repository.fullName} is now being mirrored by another process. Skipping.`
);
return;
}
// Mark repos as "mirroring" in DB
// CRITICAL: Set mirroredLocation NOW (not after success) so idempotency checks work
// This becomes the "target location" - where we intend to mirror to
// Without this, the idempotency check can't detect concurrent operations on first mirror
await db
.update(repositories)
.set({
status: repoStatusEnum.parse("mirroring"),
mirroredLocation: expectedLocation,
updatedAt: new Date(),
})
.where(eq(repositories.id, repository.id!));
@@ -733,9 +859,16 @@ async function generateUniqueRepoName({
attempt++;
}
// If all attempts failed, use timestamp as last resort
const timestamp = Date.now();
return `${baseName}-${githubOwner}-${timestamp}`;
// SECURITY FIX: Prevent infinite duplicate creation
// Instead of falling back to timestamp (which creates infinite duplicates),
// throw an error to prevent hundreds of duplicate repos
console.error(`Failed to find unique name for ${baseName} after ${maxAttempts} attempts`);
console.error(`Organization: ${orgName}, GitHub Owner: ${githubOwner}, Strategy: ${duplicateStrategy}`);
throw new Error(
`Unable to generate unique repository name for "${baseName}". ` +
`All ${maxAttempts} naming attempts resulted in conflicts. ` +
`Please manually resolve the naming conflict or adjust your duplicate strategy.`
);
}
export async function mirrorGitHubRepoToGiteaOrg({
@@ -785,6 +918,23 @@ export async function mirrorGitHubRepoToGiteaOrg({
}
}
// IDEMPOTENCY CHECK: Check if this repo is already being mirrored
const expectedLocation = `${orgName}/${targetRepoName}`;
const isCurrentlyMirroring = await isRepoCurrentlyMirroring({
config,
repoName: targetRepoName,
expectedLocation,
});
if (isCurrentlyMirroring) {
console.log(
`[Idempotency] Skipping ${repository.fullName} - already being mirrored to ${expectedLocation}`
);
// Don't throw an error, just return to allow other repos to continue
return;
}
const isExisting = await isRepoPresentInGitea({
config,
owner: orgName,
@@ -831,11 +981,30 @@ export async function mirrorGitHubRepoToGiteaOrg({
// Use clean clone URL without embedded credentials (Forgejo 12+ security requirement)
const cloneAddress = repository.cloneUrl;
// DOUBLE-CHECK: Final idempotency check right before updating status
// This catches race conditions in the small window between first check and status update
const finalCheck = await isRepoCurrentlyMirroring({
config,
repoName: targetRepoName,
expectedLocation,
});
if (finalCheck) {
console.log(
`[Idempotency] Race condition detected - ${repository.fullName} is now being mirrored by another process. Skipping.`
);
return;
}
// Mark repos as "mirroring" in DB
// CRITICAL: Set mirroredLocation NOW (not after success) so idempotency checks work
// This becomes the "target location" - where we intend to mirror to
// Without this, the idempotency check can't detect concurrent operations on first mirror
await db
.update(repositories)
.set({
status: repoStatusEnum.parse("mirroring"),
mirroredLocation: expectedLocation,
updatedAt: new Date(),
})
.where(eq(repositories.id, repository.id!));