mirror of
https://github.com/RayLabsHQ/gitea-mirror.git
synced 2025-12-19 14:08:39 +03:00
- Implemented comprehensive GitHub API rate limit handling:
- Integrated @octokit/plugin-throttling for automatic retry with exponential backoff
- Added RateLimitManager service to track and enforce rate limits
- Store rate limit status in database for persistence across restarts
- Automatic pause and resume when limits are exceeded
- Proper user identification for 5000 req/hr authenticated limit (vs 60 unauthenticated)
- Improved rate limit UI/UX:
- Removed intrusive rate limit card from dashboard
- Toast notifications only at critical thresholds (80% and 100% usage)
- All rate limit events logged for debugging
- Optimized for GitHub's API constraints:
- Reduced default batch size from 10 to 5 repositories
- Added documentation about GitHub's 100 concurrent request limit
- Better handling of repositories with many issues/PRs
510 lines
16 KiB
TypeScript
510 lines
16 KiB
TypeScript
/**
|
|
* Recovery mechanism for interrupted jobs
|
|
* This module handles detecting and resuming jobs that were interrupted by container restarts
|
|
*/
|
|
|
|
import { findInterruptedJobs, resumeInterruptedJob } from './helpers';
|
|
import { db, repositories, organizations, mirrorJobs, configs } from './db';
|
|
import { eq, and, lt, inArray } from 'drizzle-orm';
|
|
import { mirrorGithubRepoToGitea, mirrorGitHubOrgRepoToGiteaOrg, syncGiteaRepo } from './gitea';
|
|
import { createGitHubClient } from './github';
|
|
import { processWithResilience } from './utils/concurrency';
|
|
import { repositoryVisibilityEnum, repoStatusEnum } from '@/types/Repository';
|
|
import type { Repository } from './db/schema';
|
|
import { getDecryptedGitHubToken } from './utils/config-encryption';
|
|
|
|
// Recovery state tracking
|
|
let recoveryInProgress = false;
|
|
let lastRecoveryAttempt: Date | null = null;
|
|
|
|
/**
|
|
* Validates database connection before attempting recovery
|
|
*/
|
|
async function validateDatabaseConnection(): Promise<boolean> {
|
|
try {
|
|
// Simple query to test database connectivity
|
|
await db.select().from(mirrorJobs).limit(1);
|
|
return true;
|
|
} catch (error) {
|
|
console.error('Database connection validation failed:', error);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Cleans up stale jobs that are too old to recover
|
|
*/
|
|
async function cleanupStaleJobs(): Promise<void> {
|
|
try {
|
|
const staleThreshold = new Date();
|
|
staleThreshold.setHours(staleThreshold.getHours() - 24); // Jobs older than 24 hours
|
|
|
|
const staleJobs = await db
|
|
.select()
|
|
.from(mirrorJobs)
|
|
.where(
|
|
and(
|
|
eq(mirrorJobs.inProgress, true),
|
|
lt(mirrorJobs.startedAt, staleThreshold)
|
|
)
|
|
);
|
|
|
|
if (staleJobs.length > 0) {
|
|
console.log(`Found ${staleJobs.length} stale jobs to clean up`);
|
|
|
|
// Mark stale jobs as failed
|
|
await db
|
|
.update(mirrorJobs)
|
|
.set({
|
|
inProgress: false,
|
|
completedAt: new Date(),
|
|
status: "failed",
|
|
message: "Job marked as failed due to being stale (older than 24 hours)"
|
|
})
|
|
.where(
|
|
and(
|
|
eq(mirrorJobs.inProgress, true),
|
|
lt(mirrorJobs.startedAt, staleThreshold)
|
|
)
|
|
);
|
|
|
|
console.log(`Cleaned up ${staleJobs.length} stale jobs`);
|
|
}
|
|
} catch (error) {
|
|
console.error('Error cleaning up stale jobs:', error);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Initialize the recovery system with enhanced error handling and resilience
|
|
* This should be called when the application starts
|
|
*/
|
|
export async function initializeRecovery(options: {
|
|
maxRetries?: number;
|
|
retryDelay?: number;
|
|
skipIfRecentAttempt?: boolean;
|
|
} = {}): Promise<boolean> {
|
|
const { maxRetries = 3, retryDelay = 5000, skipIfRecentAttempt = true } = options;
|
|
|
|
// Prevent concurrent recovery attempts
|
|
if (recoveryInProgress) {
|
|
console.log('Recovery already in progress, skipping...');
|
|
return false;
|
|
}
|
|
|
|
// Skip if recent attempt (within last 5 minutes) unless forced
|
|
if (skipIfRecentAttempt && lastRecoveryAttempt) {
|
|
const timeSinceLastAttempt = Date.now() - lastRecoveryAttempt.getTime();
|
|
if (timeSinceLastAttempt < 5 * 60 * 1000) {
|
|
console.log('Recent recovery attempt detected, skipping...');
|
|
return false;
|
|
}
|
|
}
|
|
|
|
recoveryInProgress = true;
|
|
lastRecoveryAttempt = new Date();
|
|
|
|
console.log('Initializing recovery system...');
|
|
|
|
let attempt = 0;
|
|
while (attempt < maxRetries) {
|
|
try {
|
|
attempt++;
|
|
console.log(`Recovery attempt ${attempt}/${maxRetries}`);
|
|
|
|
// Validate database connection first
|
|
const dbConnected = await validateDatabaseConnection();
|
|
if (!dbConnected) {
|
|
throw new Error('Database connection validation failed');
|
|
}
|
|
|
|
// Clean up stale jobs first
|
|
await cleanupStaleJobs();
|
|
|
|
// Find interrupted jobs
|
|
const interruptedJobs = await findInterruptedJobs();
|
|
|
|
if (interruptedJobs.length === 0) {
|
|
console.log('No interrupted jobs found.');
|
|
recoveryInProgress = false;
|
|
return true;
|
|
}
|
|
|
|
console.log(`Found ${interruptedJobs.length} interrupted jobs. Starting recovery...`);
|
|
|
|
// Process each interrupted job with individual error handling
|
|
let successCount = 0;
|
|
let failureCount = 0;
|
|
|
|
for (const job of interruptedJobs) {
|
|
try {
|
|
const resumeData = await resumeInterruptedJob(job);
|
|
|
|
if (!resumeData) {
|
|
console.log(`Job ${job.id} could not be resumed.`);
|
|
failureCount++;
|
|
continue;
|
|
}
|
|
|
|
const { job: updatedJob, remainingItemIds } = resumeData;
|
|
|
|
// Handle different job types
|
|
switch (updatedJob.jobType) {
|
|
case 'mirror':
|
|
await recoverMirrorJob(updatedJob, remainingItemIds);
|
|
break;
|
|
case 'sync':
|
|
await recoverSyncJob(updatedJob, remainingItemIds);
|
|
break;
|
|
case 'retry':
|
|
await recoverRetryJob(updatedJob, remainingItemIds);
|
|
break;
|
|
default:
|
|
console.log(`Unknown job type: ${updatedJob.jobType}`);
|
|
failureCount++;
|
|
continue;
|
|
}
|
|
|
|
successCount++;
|
|
} catch (jobError) {
|
|
console.error(`Error recovering individual job ${job.id}:`, jobError);
|
|
failureCount++;
|
|
|
|
// Mark the job as failed if recovery fails
|
|
try {
|
|
await db
|
|
.update(mirrorJobs)
|
|
.set({
|
|
inProgress: false,
|
|
completedAt: new Date(),
|
|
status: "failed",
|
|
message: `Job recovery failed: ${jobError instanceof Error ? jobError.message : String(jobError)}`
|
|
})
|
|
.where(eq(mirrorJobs.id, job.id));
|
|
} catch (updateError) {
|
|
console.error(`Failed to mark job ${job.id} as failed:`, updateError);
|
|
}
|
|
}
|
|
}
|
|
|
|
console.log(`Recovery process completed. Success: ${successCount}, Failures: ${failureCount}`);
|
|
recoveryInProgress = false;
|
|
return true;
|
|
|
|
} catch (error) {
|
|
console.error(`Recovery attempt ${attempt} failed:`, error);
|
|
|
|
if (attempt < maxRetries) {
|
|
console.log(`Retrying in ${retryDelay}ms...`);
|
|
await new Promise(resolve => setTimeout(resolve, retryDelay));
|
|
} else {
|
|
console.error('All recovery attempts failed');
|
|
recoveryInProgress = false;
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
recoveryInProgress = false;
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Recover a mirror job with enhanced error handling
|
|
*/
|
|
async function recoverMirrorJob(job: any, remainingItemIds: string[]) {
|
|
console.log(`Recovering mirror job ${job.id} with ${remainingItemIds.length} remaining items`);
|
|
|
|
try {
|
|
// Get the config for this user with better error handling
|
|
const userConfigs = await db
|
|
.select()
|
|
.from(configs)
|
|
.where(eq(configs.userId, job.userId))
|
|
.limit(1);
|
|
|
|
if (userConfigs.length === 0) {
|
|
throw new Error(`No configuration found for user ${job.userId}`);
|
|
}
|
|
|
|
const config = userConfigs[0];
|
|
if (!config.id) {
|
|
throw new Error(`Configuration missing id for user ${job.userId}`);
|
|
}
|
|
|
|
// Get repositories to process with validation
|
|
const repos = await db
|
|
.select()
|
|
.from(repositories)
|
|
.where(inArray(repositories.id, remainingItemIds));
|
|
|
|
if (repos.length === 0) {
|
|
console.warn(`No repositories found for remaining item IDs: ${remainingItemIds.join(', ')}`);
|
|
// Mark job as completed since there's nothing to process
|
|
await db
|
|
.update(mirrorJobs)
|
|
.set({
|
|
inProgress: false,
|
|
completedAt: new Date(),
|
|
status: "mirrored",
|
|
message: "Job completed - no repositories found to process"
|
|
})
|
|
.where(eq(mirrorJobs.id, job.id));
|
|
return;
|
|
}
|
|
|
|
console.log(`Found ${repos.length} repositories to process for recovery`);
|
|
|
|
// Validate GitHub configuration before creating client
|
|
if (!config.githubConfig?.token) {
|
|
throw new Error('GitHub token not found in configuration');
|
|
}
|
|
|
|
// Create GitHub client with error handling and rate limit tracking
|
|
let octokit;
|
|
try {
|
|
const decryptedToken = getDecryptedGitHubToken(config);
|
|
const githubUsername = config.githubConfig?.owner || undefined;
|
|
const userId = config.userId || undefined;
|
|
octokit = createGitHubClient(decryptedToken, userId, githubUsername);
|
|
} catch (error) {
|
|
throw new Error(`Failed to create GitHub client: ${error instanceof Error ? error.message : String(error)}`);
|
|
}
|
|
|
|
// Process repositories with resilience and reduced concurrency for recovery
|
|
await processWithResilience(
|
|
repos,
|
|
async (repo) => {
|
|
// Prepare repository data with validation
|
|
const repoData = {
|
|
...repo,
|
|
status: repoStatusEnum.parse("imported"),
|
|
organization: repo.organization ?? undefined,
|
|
lastMirrored: repo.lastMirrored ?? undefined,
|
|
errorMessage: repo.errorMessage ?? undefined,
|
|
forkedFrom: repo.forkedFrom ?? undefined,
|
|
visibility: repositoryVisibilityEnum.parse(repo.visibility || "public"),
|
|
mirroredLocation: repo.mirroredLocation || "",
|
|
};
|
|
|
|
// Mirror the repository based on whether it's in an organization
|
|
if (repo.organization && config.giteaConfig.preserveOrgStructure) {
|
|
await mirrorGitHubOrgRepoToGiteaOrg({
|
|
config,
|
|
octokit,
|
|
orgName: repo.organization,
|
|
repository: repoData,
|
|
});
|
|
} else {
|
|
await mirrorGithubRepoToGitea({
|
|
octokit,
|
|
repository: repoData,
|
|
config,
|
|
});
|
|
}
|
|
|
|
return repo;
|
|
},
|
|
{
|
|
userId: job.userId,
|
|
jobType: 'mirror',
|
|
getItemId: (repo) => repo.id,
|
|
getItemName: (repo) => repo.name,
|
|
resumeFromJobId: job.id,
|
|
concurrencyLimit: 2, // Reduced concurrency for recovery to be more stable
|
|
maxRetries: 3, // Increased retries for recovery
|
|
retryDelay: 3000, // Longer delay for recovery
|
|
}
|
|
);
|
|
|
|
console.log(`Successfully recovered mirror job ${job.id}`);
|
|
} catch (error) {
|
|
console.error(`Error recovering mirror job ${job.id}:`, error);
|
|
|
|
// Mark the job as failed
|
|
try {
|
|
await db
|
|
.update(mirrorJobs)
|
|
.set({
|
|
inProgress: false,
|
|
completedAt: new Date(),
|
|
status: "failed",
|
|
message: `Mirror job recovery failed: ${error instanceof Error ? error.message : String(error)}`
|
|
})
|
|
.where(eq(mirrorJobs.id, job.id));
|
|
} catch (updateError) {
|
|
console.error(`Failed to mark mirror job ${job.id} as failed:`, updateError);
|
|
}
|
|
|
|
throw error; // Re-throw to be handled by the caller
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Recover a sync job with enhanced error handling
|
|
*/
|
|
async function recoverSyncJob(job: any, remainingItemIds: string[]) {
|
|
console.log(`Recovering sync job ${job.id} with ${remainingItemIds.length} remaining items`);
|
|
|
|
try {
|
|
// Get the config for this user with better error handling
|
|
const userConfigs = await db
|
|
.select()
|
|
.from(configs)
|
|
.where(eq(configs.userId, job.userId))
|
|
.limit(1);
|
|
|
|
if (userConfigs.length === 0) {
|
|
throw new Error(`No configuration found for user ${job.userId}`);
|
|
}
|
|
|
|
const config = userConfigs[0];
|
|
if (!config.id) {
|
|
throw new Error(`Configuration missing id for user ${job.userId}`);
|
|
}
|
|
|
|
// Get repositories to process with validation
|
|
const repos = await db
|
|
.select()
|
|
.from(repositories)
|
|
.where(inArray(repositories.id, remainingItemIds));
|
|
|
|
if (repos.length === 0) {
|
|
console.warn(`No repositories found for remaining item IDs: ${remainingItemIds.join(', ')}`);
|
|
// Mark job as completed since there's nothing to process
|
|
await db
|
|
.update(mirrorJobs)
|
|
.set({
|
|
inProgress: false,
|
|
completedAt: new Date(),
|
|
status: "mirrored",
|
|
message: "Job completed - no repositories found to process"
|
|
})
|
|
.where(eq(mirrorJobs.id, job.id));
|
|
return;
|
|
}
|
|
|
|
console.log(`Found ${repos.length} repositories to process for sync recovery`);
|
|
|
|
// Process repositories with resilience and reduced concurrency for recovery
|
|
await processWithResilience(
|
|
repos,
|
|
async (repo) => {
|
|
// Prepare repository data with validation
|
|
const repoData = {
|
|
...repo,
|
|
status: repoStatusEnum.parse(repo.status || "imported"),
|
|
organization: repo.organization ?? undefined,
|
|
lastMirrored: repo.lastMirrored ?? undefined,
|
|
errorMessage: repo.errorMessage ?? undefined,
|
|
forkedFrom: repo.forkedFrom ?? undefined,
|
|
visibility: repositoryVisibilityEnum.parse(repo.visibility || "public"),
|
|
mirroredLocation: repo.mirroredLocation || "",
|
|
};
|
|
|
|
// Sync the repository
|
|
await syncGiteaRepo({
|
|
config,
|
|
repository: repoData,
|
|
});
|
|
|
|
return repo;
|
|
},
|
|
{
|
|
userId: job.userId,
|
|
jobType: 'sync',
|
|
getItemId: (repo) => repo.id,
|
|
getItemName: (repo) => repo.name,
|
|
resumeFromJobId: job.id,
|
|
concurrencyLimit: 3, // Reduced concurrency for recovery
|
|
maxRetries: 3, // Increased retries for recovery
|
|
retryDelay: 3000, // Longer delay for recovery
|
|
}
|
|
);
|
|
|
|
console.log(`Successfully recovered sync job ${job.id}`);
|
|
} catch (error) {
|
|
console.error(`Error recovering sync job ${job.id}:`, error);
|
|
|
|
// Mark the job as failed
|
|
try {
|
|
await db
|
|
.update(mirrorJobs)
|
|
.set({
|
|
inProgress: false,
|
|
completedAt: new Date(),
|
|
status: "failed",
|
|
message: `Sync job recovery failed: ${error instanceof Error ? error.message : String(error)}`
|
|
})
|
|
.where(eq(mirrorJobs.id, job.id));
|
|
} catch (updateError) {
|
|
console.error(`Failed to mark sync job ${job.id} as failed:`, updateError);
|
|
}
|
|
|
|
throw error; // Re-throw to be handled by the caller
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Recover a retry job with enhanced error handling
|
|
*/
|
|
async function recoverRetryJob(job: any, remainingItemIds: string[]) {
|
|
console.log(`Recovering retry job ${job.id} with ${remainingItemIds.length} remaining items`);
|
|
|
|
try {
|
|
// For now, retry jobs are treated similarly to mirror jobs
|
|
// In the future, this could have specific retry logic
|
|
await recoverMirrorJob(job, remainingItemIds);
|
|
console.log(`Successfully recovered retry job ${job.id}`);
|
|
} catch (error) {
|
|
console.error(`Error recovering retry job ${job.id}:`, error);
|
|
|
|
// Mark the job as failed
|
|
try {
|
|
await db
|
|
.update(mirrorJobs)
|
|
.set({
|
|
inProgress: false,
|
|
completedAt: new Date(),
|
|
status: "failed",
|
|
message: `Retry job recovery failed: ${error instanceof Error ? error.message : String(error)}`
|
|
})
|
|
.where(eq(mirrorJobs.id, job.id));
|
|
} catch (updateError) {
|
|
console.error(`Failed to mark retry job ${job.id} as failed:`, updateError);
|
|
}
|
|
|
|
throw error; // Re-throw to be handled by the caller
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get recovery system status
|
|
*/
|
|
export function getRecoveryStatus() {
|
|
return {
|
|
inProgress: recoveryInProgress,
|
|
lastAttempt: lastRecoveryAttempt,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Force recovery to run (bypassing recent attempt check)
|
|
*/
|
|
export async function forceRecovery(): Promise<boolean> {
|
|
return initializeRecovery({ skipIfRecentAttempt: false });
|
|
}
|
|
|
|
/**
|
|
* Check if there are any jobs that need recovery
|
|
*/
|
|
export async function hasJobsNeedingRecovery(): Promise<boolean> {
|
|
try {
|
|
const interruptedJobs = await findInterruptedJobs();
|
|
return interruptedJobs.length > 0;
|
|
} catch (error) {
|
|
console.error('Error checking for jobs needing recovery:', error);
|
|
return false;
|
|
}
|
|
}
|