diff --git a/.env.example b/.env.example index 4763dd6..74df142 100644 --- a/.env.example +++ b/.env.example @@ -71,7 +71,7 @@ DOCKER_TAG=latest # Repository Settings # GITEA_ORG_VISIBILITY=public # Options: public, private, limited, default -# GITEA_MIRROR_INTERVAL=8h # Mirror sync interval (e.g., 30m, 1h, 8h, 24h) +# GITEA_MIRROR_INTERVAL=8h # Mirror sync interval (e.g., 30m, 1h, 8h, 24h) - automatically enables scheduler # GITEA_LFS=false # Enable LFS support # GITEA_CREATE_ORG=true # Auto-create organizations # GITEA_PRESERVE_VISIBILITY=false # Preserve GitHub repo visibility in Gitea @@ -150,7 +150,7 @@ DOCKER_TAG=latest # Repository Cleanup # CLEANUP_DELETE_FROM_GITEA=false # Delete repos from Gitea -# CLEANUP_DELETE_IF_NOT_IN_GITHUB=true # Delete if not in GitHub +# CLEANUP_DELETE_IF_NOT_IN_GITHUB=true # Delete if not in GitHub - automatically enables cleanup # CLEANUP_ORPHANED_REPO_ACTION=archive # Options: skip, archive, delete # CLEANUP_DRY_RUN=true # Test mode without actual deletion diff --git a/docs/ENVIRONMENT_VARIABLES.md b/docs/ENVIRONMENT_VARIABLES.md index 689f9a8..52ac8dc 100644 --- a/docs/ENVIRONMENT_VARIABLES.md +++ b/docs/ENVIRONMENT_VARIABLES.md @@ -83,7 +83,7 @@ Settings for the destination Gitea instance. | Variable | Description | Default | Options | |----------|-------------|---------|---------| | `GITEA_ORG_VISIBILITY` | Default organization visibility | `public` | `public`, `private`, `limited`, `default` | -| `GITEA_MIRROR_INTERVAL` | Mirror sync interval | `8h` | Duration string (e.g., `30m`, `1h`, `8h`, `24h`) | +| `GITEA_MIRROR_INTERVAL` | Mirror sync interval (automatically enables scheduler) | `8h` | Duration string (e.g., `30m`, `1h`, `8h`, `24h`) | | `GITEA_LFS` | Enable LFS support | `false` | `true`, `false` | | `GITEA_CREATE_ORG` | Auto-create organizations | `true` | `true`, `false` | | `GITEA_PRESERVE_VISIBILITY` | Preserve GitHub repo visibility in Gitea | `false` | `true`, `false` | @@ -192,7 +192,7 @@ Configure automatic cleanup of old events and data. | Variable | Description | Default | Options | |----------|-------------|---------|---------| | `CLEANUP_DELETE_FROM_GITEA` | Delete repositories from Gitea | `false` | `true`, `false` | -| `CLEANUP_DELETE_IF_NOT_IN_GITHUB` | Delete repos not found in GitHub | `true` | `true`, `false` | +| `CLEANUP_DELETE_IF_NOT_IN_GITHUB` | Delete repos not found in GitHub (automatically enables cleanup) | `true` | `true`, `false` | | `CLEANUP_ORPHANED_REPO_ACTION` | Action for orphaned repositories | `archive` | `skip`, `archive`, `delete` | | `CLEANUP_DRY_RUN` | Test mode without actual deletion | `true` | `true`, `false` | | `CLEANUP_PROTECTED_REPOS` | Comma-separated list of protected repository names | - | Comma-separated strings | diff --git a/src/lib/env-config-loader.ts b/src/lib/env-config-loader.ts index 6840aa4..c7ac34e 100644 --- a/src/lib/env-config-loader.ts +++ b/src/lib/env-config-loader.ts @@ -135,8 +135,11 @@ function parseEnvConfig(): EnvConfig { mirrorMetadata: process.env.MIRROR_METADATA === 'true', }, schedule: { - enabled: process.env.SCHEDULE_ENABLED === 'true', - interval: process.env.SCHEDULE_INTERVAL || process.env.DELAY, // Support both old DELAY and new SCHEDULE_INTERVAL + enabled: process.env.SCHEDULE_ENABLED === 'true' || + !!process.env.GITEA_MIRROR_INTERVAL || + !!process.env.SCHEDULE_INTERVAL || + !!process.env.DELAY, // Auto-enable if any interval is specified + interval: process.env.SCHEDULE_INTERVAL || process.env.GITEA_MIRROR_INTERVAL || process.env.DELAY, // Support GITEA_MIRROR_INTERVAL, SCHEDULE_INTERVAL, and old DELAY concurrent: process.env.SCHEDULE_CONCURRENT === 'true', batchSize: process.env.SCHEDULE_BATCH_SIZE ? parseInt(process.env.SCHEDULE_BATCH_SIZE, 10) : undefined, pauseBetweenBatches: process.env.SCHEDULE_PAUSE_BETWEEN_BATCHES ? parseInt(process.env.SCHEDULE_PAUSE_BETWEEN_BATCHES, 10) : undefined, @@ -155,7 +158,8 @@ function parseEnvConfig(): EnvConfig { recentThreshold: process.env.SCHEDULE_RECENT_THRESHOLD ? parseInt(process.env.SCHEDULE_RECENT_THRESHOLD, 10) : undefined, }, cleanup: { - enabled: process.env.CLEANUP_ENABLED === 'true', + enabled: process.env.CLEANUP_ENABLED === 'true' || + process.env.CLEANUP_DELETE_IF_NOT_IN_GITHUB === 'true', // Auto-enable if deleteIfNotInGitHub is enabled retentionDays: process.env.CLEANUP_RETENTION_DAYS ? parseInt(process.env.CLEANUP_RETENTION_DAYS, 10) : undefined, deleteFromGitea: process.env.CLEANUP_DELETE_FROM_GITEA === 'true', deleteIfNotInGitHub: process.env.CLEANUP_DELETE_IF_NOT_IN_GITHUB === 'true', diff --git a/src/lib/gitea.ts b/src/lib/gitea.ts index c5265bd..3f41a53 100644 --- a/src/lib/gitea.ts +++ b/src/lib/gitea.ts @@ -7,7 +7,7 @@ import { membershipRoleEnum } from "@/types/organizations"; import { Octokit } from "@octokit/rest"; import type { Config } from "@/types/config"; import type { Organization, Repository } from "./db/schema"; -import { httpPost, httpGet } from "./http-client"; +import { httpPost, httpGet, httpDelete, httpPut } from "./http-client"; import { createMirrorJob } from "./helpers"; import { db, organizations, repositories } from "./db"; import { eq, and } from "drizzle-orm"; @@ -1739,4 +1739,69 @@ export async function mirrorGitRepoMilestonesToGitea({ } console.log(`✅ Mirrored ${mirroredCount} new milestones to Gitea`); -} \ No newline at end of file +} + +/** + * Create a simple Gitea client object with base URL and token + */ +export function createGiteaClient(url: string, token: string) { + return { url, token }; +} + +/** + * Delete a repository from Gitea + */ +export async function deleteGiteaRepo( + client: { url: string; token: string }, + owner: string, + repo: string +): Promise { + try { + const response = await httpDelete( + `${client.url}/api/v1/repos/${owner}/${repo}`, + { + Authorization: `token ${client.token}`, + } + ); + + if (!response.success) { + throw new Error(`Failed to delete repository ${owner}/${repo}: ${response.statusCode}`); + } + + console.log(`Successfully deleted repository ${owner}/${repo} from Gitea`); + } catch (error) { + console.error(`Error deleting repository ${owner}/${repo}:`, error); + throw error; + } +} + +/** + * Archive a repository in Gitea + */ +export async function archiveGiteaRepo( + client: { url: string; token: string }, + owner: string, + repo: string +): Promise { + try { + const response = await httpPut( + `${client.url}/api/v1/repos/${owner}/${repo}`, + { + archived: true, + }, + { + Authorization: `token ${client.token}`, + 'Content-Type': 'application/json', + } + ); + + if (!response.success) { + throw new Error(`Failed to archive repository ${owner}/${repo}: ${response.statusCode}`); + } + + console.log(`Successfully archived repository ${owner}/${repo} in Gitea`); + } catch (error) { + console.error(`Error archiving repository ${owner}/${repo}:`, error); + throw error; + } +} diff --git a/src/lib/repository-cleanup-service.ts b/src/lib/repository-cleanup-service.ts new file mode 100644 index 0000000..f21e279 --- /dev/null +++ b/src/lib/repository-cleanup-service.ts @@ -0,0 +1,373 @@ +/** + * Repository cleanup service for handling orphaned repositories + * This service identifies and handles repositories that exist in Gitea + * but are no longer present in GitHub (e.g., unstarred repositories) + */ + +import { db, configs, repositories } from '@/lib/db'; +import { eq, and, or, sql, not, inArray } from 'drizzle-orm'; +import { createGitHubClient, getGithubRepositories, getGithubStarredRepositories } from '@/lib/github'; +import { createGiteaClient, deleteGiteaRepo, archiveGiteaRepo } from '@/lib/gitea'; +import { getDecryptedGitHubToken, getDecryptedGiteaToken } from '@/lib/utils/config-encryption'; +import { publishEvent } from '@/lib/events'; + +let cleanupInterval: NodeJS.Timeout | null = null; +let isCleanupRunning = false; + +/** + * Identify orphaned repositories for a user + * These are repositories that exist in our database (and likely in Gitea) + * but are no longer in GitHub based on current criteria + */ +async function identifyOrphanedRepositories(config: any): Promise { + const userId = config.userId; + + try { + // Get current GitHub repositories + const decryptedToken = getDecryptedGitHubToken(config); + const octokit = createGitHubClient(decryptedToken); + + // Fetch GitHub data + const [basicAndForkedRepos, starredRepos] = await Promise.all([ + getGithubRepositories({ octokit, config }), + config.githubConfig?.includeStarred + ? getGithubStarredRepositories({ octokit, config }) + : Promise.resolve([]), + ]); + + const allGithubRepos = [...basicAndForkedRepos, ...starredRepos]; + const githubRepoFullNames = new Set(allGithubRepos.map(repo => repo.fullName)); + + // Get all repositories from our database + const dbRepos = await db + .select() + .from(repositories) + .where(eq(repositories.userId, userId)); + + // Identify orphaned repositories + const orphanedRepos = dbRepos.filter(repo => !githubRepoFullNames.has(repo.fullName)); + + return orphanedRepos; + } catch (error) { + console.error(`[Repository Cleanup] Error identifying orphaned repositories for user ${userId}:`, error); + throw error; + } +} + +/** + * Handle an orphaned repository based on configuration + */ +async function handleOrphanedRepository( + config: any, + repo: any, + action: 'skip' | 'archive' | 'delete', + dryRun: boolean +): Promise { + const repoFullName = repo.fullName; + + if (action === 'skip') { + console.log(`[Repository Cleanup] Skipping orphaned repository ${repoFullName}`); + return; + } + + if (dryRun) { + console.log(`[Repository Cleanup] DRY RUN: Would ${action} orphaned repository ${repoFullName}`); + return; + } + + try { + // Get Gitea client + const giteaToken = getDecryptedGiteaToken(config); + const giteaClient = createGiteaClient(config.giteaConfig.url, giteaToken); + + // Determine the Gitea owner and repo name + const mirroredLocation = repo.mirroredLocation || ''; + let giteaOwner = repo.owner; + let giteaRepoName = repo.name; + + if (mirroredLocation) { + const parts = mirroredLocation.split('/'); + if (parts.length >= 2) { + giteaOwner = parts[parts.length - 2]; + giteaRepoName = parts[parts.length - 1]; + } + } + + if (action === 'archive') { + console.log(`[Repository Cleanup] Archiving orphaned repository ${repoFullName} in Gitea`); + await archiveGiteaRepo(giteaClient, giteaOwner, giteaRepoName); + + // Update database status + await db.update(repositories).set({ + status: 'archived', + errorMessage: 'Repository archived - no longer in GitHub', + updatedAt: new Date(), + }).where(eq(repositories.id, repo.id)); + + // Create event + await publishEvent({ + userId: config.userId, + channel: 'repository', + payload: { + type: 'repository.archived', + message: `Repository ${repoFullName} archived (no longer in GitHub)`, + metadata: { + repositoryId: repo.id, + repositoryName: repo.name, + action: 'archive', + reason: 'orphaned', + }, + }, + }); + } else if (action === 'delete') { + console.log(`[Repository Cleanup] Deleting orphaned repository ${repoFullName} from Gitea`); + await deleteGiteaRepo(giteaClient, giteaOwner, giteaRepoName); + + // Delete from database + await db.delete(repositories).where(eq(repositories.id, repo.id)); + + // Create event + await publishEvent({ + userId: config.userId, + channel: 'repository', + payload: { + type: 'repository.deleted', + message: `Repository ${repoFullName} deleted (no longer in GitHub)`, + metadata: { + repositoryId: repo.id, + repositoryName: repo.name, + action: 'delete', + reason: 'orphaned', + }, + }, + }); + } + } catch (error) { + console.error(`[Repository Cleanup] Error handling orphaned repository ${repoFullName}:`, error); + + // Update repository with error status + await db.update(repositories).set({ + status: 'failed', + errorMessage: `Cleanup failed: ${error instanceof Error ? error.message : 'Unknown error'}`, + updatedAt: new Date(), + }).where(eq(repositories.id, repo.id)); + + throw error; + } +} + +/** + * Run repository cleanup for a single configuration + */ +async function runRepositoryCleanup(config: any): Promise<{ + orphanedCount: number; + processedCount: number; + errors: string[]; +}> { + const userId = config.userId; + const cleanupConfig = config.cleanupConfig || {}; + + console.log(`[Repository Cleanup] Starting repository cleanup for user ${userId}`); + + const results = { + orphanedCount: 0, + processedCount: 0, + errors: [] as string[], + }; + + try { + // Check if repository cleanup is enabled - either through the main toggle or the specific feature + const isCleanupEnabled = cleanupConfig.enabled || cleanupConfig.deleteIfNotInGitHub; + + if (!isCleanupEnabled) { + console.log(`[Repository Cleanup] Repository cleanup disabled for user ${userId} (enabled=${cleanupConfig.enabled}, deleteIfNotInGitHub=${cleanupConfig.deleteIfNotInGitHub})`); + return results; + } + + // Only process if deleteIfNotInGitHub is enabled (this is the main feature flag) + if (!cleanupConfig.deleteIfNotInGitHub) { + console.log(`[Repository Cleanup] Delete if not in GitHub disabled for user ${userId}`); + return results; + } + + // Warn if deleteFromGitea is explicitly disabled but deleteIfNotInGitHub is enabled + if (cleanupConfig.deleteFromGitea === false && cleanupConfig.deleteIfNotInGitHub) { + console.warn(`[Repository Cleanup] Warning: CLEANUP_DELETE_FROM_GITEA is false but CLEANUP_DELETE_IF_NOT_IN_GITHUB is true. Proceeding with cleanup.`); + } + + // Identify orphaned repositories + const orphanedRepos = await identifyOrphanedRepositories(config); + results.orphanedCount = orphanedRepos.length; + + if (orphanedRepos.length === 0) { + console.log(`[Repository Cleanup] No orphaned repositories found for user ${userId}`); + return results; + } + + console.log(`[Repository Cleanup] Found ${orphanedRepos.length} orphaned repositories for user ${userId}`); + + // Get protected repositories + const protectedRepos = new Set(cleanupConfig.protectedRepos || []); + + // Process orphaned repositories + const action = cleanupConfig.orphanedRepoAction || 'archive'; + const dryRun = cleanupConfig.dryRun ?? true; + const batchSize = cleanupConfig.batchSize || 10; + const pauseBetweenDeletes = cleanupConfig.pauseBetweenDeletes || 2000; + + for (let i = 0; i < orphanedRepos.length; i += batchSize) { + const batch = orphanedRepos.slice(i, i + batchSize); + + for (const repo of batch) { + // Skip protected repositories + if (protectedRepos.has(repo.name) || protectedRepos.has(repo.fullName)) { + console.log(`[Repository Cleanup] Skipping protected repository ${repo.fullName}`); + continue; + } + + try { + await handleOrphanedRepository(config, repo, action, dryRun); + results.processedCount++; + } catch (error) { + const errorMsg = `Failed to ${action} ${repo.fullName}: ${error instanceof Error ? error.message : 'Unknown error'}`; + console.error(`[Repository Cleanup] ${errorMsg}`); + results.errors.push(errorMsg); + } + + // Pause between operations to avoid rate limiting + if (i < orphanedRepos.length - 1) { + await new Promise(resolve => setTimeout(resolve, pauseBetweenDeletes)); + } + } + } + + // Update cleanup timestamps + const currentTime = new Date(); + await db.update(configs).set({ + cleanupConfig: { + ...cleanupConfig, + lastRun: currentTime, + nextRun: new Date(currentTime.getTime() + 24 * 60 * 60 * 1000), // Next run in 24 hours + }, + updatedAt: currentTime, + }).where(eq(configs.id, config.id)); + + console.log(`[Repository Cleanup] Completed cleanup for user ${userId}: ${results.processedCount}/${results.orphanedCount} processed`); + } catch (error) { + console.error(`[Repository Cleanup] Error during cleanup for user ${userId}:`, error); + results.errors.push(`General cleanup error: ${error instanceof Error ? error.message : 'Unknown error'}`); + } + + return results; +} + +/** + * Main repository cleanup loop + */ +async function repositoryCleanupLoop(): Promise { + if (isCleanupRunning) { + console.log('[Repository Cleanup] Cleanup is already running, skipping this cycle'); + return; + } + + isCleanupRunning = true; + + try { + // Get all active configurations with repository cleanup enabled + const activeConfigs = await db + .select() + .from(configs) + .where(eq(configs.isActive, true)); + + const enabledConfigs = activeConfigs.filter(config => { + const cleanupConfig = config.cleanupConfig || {}; + // Enable cleanup if either the main toggle is on OR deleteIfNotInGitHub is enabled + return cleanupConfig.enabled === true || cleanupConfig.deleteIfNotInGitHub === true; + }); + + if (enabledConfigs.length === 0) { + console.log('[Repository Cleanup] No configurations with repository cleanup enabled'); + return; + } + + console.log(`[Repository Cleanup] Processing ${enabledConfigs.length} configurations`); + + // Process each configuration + for (const config of enabledConfigs) { + await runRepositoryCleanup(config); + } + } catch (error) { + console.error('[Repository Cleanup] Error in cleanup loop:', error); + } finally { + isCleanupRunning = false; + } +} + +/** + * Start the repository cleanup service + */ +export function startRepositoryCleanupService(): void { + if (cleanupInterval) { + console.log('[Repository Cleanup] Service is already running'); + return; + } + + console.log('[Repository Cleanup] Starting repository cleanup service'); + + // Run immediately on start + repositoryCleanupLoop().catch(error => { + console.error('[Repository Cleanup] Error during initial cleanup run:', error); + }); + + // Run every 6 hours to check for orphaned repositories + const checkInterval = 6 * 60 * 60 * 1000; // 6 hours + cleanupInterval = setInterval(() => { + repositoryCleanupLoop().catch(error => { + console.error('[Repository Cleanup] Error during cleanup run:', error); + }); + }, checkInterval); + + console.log('[Repository Cleanup] Service started, checking every 6 hours'); +} + +/** + * Stop the repository cleanup service + */ +export function stopRepositoryCleanupService(): void { + if (cleanupInterval) { + clearInterval(cleanupInterval); + cleanupInterval = null; + console.log('[Repository Cleanup] Service stopped'); + } +} + +/** + * Check if the repository cleanup service is running + */ +export function isRepositoryCleanupServiceRunning(): boolean { + return cleanupInterval !== null; +} + +/** + * Manually trigger repository cleanup for a specific user + */ +export async function triggerRepositoryCleanup(userId: string): Promise<{ + orphanedCount: number; + processedCount: number; + errors: string[]; +}> { + const [config] = await db + .select() + .from(configs) + .where(and( + eq(configs.userId, userId), + eq(configs.isActive, true) + )) + .limit(1); + + if (!config) { + throw new Error('No active configuration found for user'); + } + + return runRepositoryCleanup(config); +} \ No newline at end of file diff --git a/src/lib/scheduler-service.ts b/src/lib/scheduler-service.ts new file mode 100644 index 0000000..95664ff --- /dev/null +++ b/src/lib/scheduler-service.ts @@ -0,0 +1,286 @@ +/** + * Scheduler service for automatic repository mirroring + * This service runs in the background and automatically mirrors repositories + * based on the configured schedule + */ + +import { db, configs, repositories } from '@/lib/db'; +import { eq, and, or, lt, gte } from 'drizzle-orm'; +import { syncGiteaRepo } from '@/lib/gitea'; +import { createGitHubClient } from '@/lib/github'; +import { getDecryptedGitHubToken } from '@/lib/utils/config-encryption'; +import { parseInterval, formatDuration } from '@/lib/utils/duration-parser'; +import type { Repository } from '@/types'; +import { repoStatusEnum, repositoryVisibilityEnum } from '@/types/Repository'; + +let schedulerInterval: NodeJS.Timeout | null = null; +let isSchedulerRunning = false; + +/** + * Parse schedule interval with enhanced support for duration strings, cron, and numbers + * Supports formats like: "8h", "30m", "24h", "0 */2 * * *", or plain numbers (seconds) + */ +function parseScheduleInterval(interval: string | number): number { + try { + const milliseconds = parseInterval(interval); + console.log(`[Scheduler] Parsed interval "${interval}" as ${formatDuration(milliseconds)}`); + return milliseconds; + } catch (error) { + console.error(`[Scheduler] Failed to parse interval "${interval}": ${error instanceof Error ? error.message : 'Unknown error'}`); + const defaultInterval = 60 * 60 * 1000; // 1 hour + console.log(`[Scheduler] Using default interval: ${formatDuration(defaultInterval)}`); + return defaultInterval; + } +} + +/** + * Run scheduled mirror sync for a single user configuration + */ +async function runScheduledSync(config: any): Promise { + const userId = config.userId; + console.log(`[Scheduler] Running scheduled sync for user ${userId}`); + + try { + // Update lastRun timestamp + const currentTime = new Date(); + const scheduleConfig = config.scheduleConfig || {}; + + // Priority order: scheduleConfig.interval > giteaConfig.mirrorInterval > default + const intervalSource = scheduleConfig.interval || + config.giteaConfig?.mirrorInterval || + '1h'; // Default to 1 hour instead of 3600 seconds + + console.log(`[Scheduler] Using interval source for user ${userId}: ${intervalSource}`); + const interval = parseScheduleInterval(intervalSource); + + // Note: The interval timing is calculated from the LAST RUN time, not from container startup + // This means if GITEA_MIRROR_INTERVAL=8h, the next sync will be 8 hours from the last completed sync + const nextRun = new Date(currentTime.getTime() + interval); + + console.log(`[Scheduler] Next sync for user ${userId} scheduled for: ${nextRun.toISOString()} (in ${formatDuration(interval)})`); + + await db.update(configs).set({ + scheduleConfig: { + ...scheduleConfig, + lastRun: currentTime, + nextRun: nextRun, + }, + updatedAt: currentTime, + }).where(eq(configs.id, config.id)); + + // Get repositories to sync + let reposToSync = await db + .select() + .from(repositories) + .where( + and( + eq(repositories.userId, userId), + or( + eq(repositories.status, 'mirrored'), + eq(repositories.status, 'synced'), + eq(repositories.status, 'failed'), + eq(repositories.status, 'pending') + ) + ) + ); + + // Filter based on schedule configuration + if (scheduleConfig.skipRecentlyMirrored) { + const recentThreshold = scheduleConfig.recentThreshold || 3600000; // Default 1 hour + const thresholdTime = new Date(currentTime.getTime() - recentThreshold); + + reposToSync = reposToSync.filter(repo => { + if (!repo.lastMirrored) return true; // Never mirrored + return repo.lastMirrored < thresholdTime; + }); + } + + if (scheduleConfig.onlyMirrorUpdated) { + const updateInterval = scheduleConfig.updateInterval || 86400000; // Default 24 hours + const updateThreshold = new Date(currentTime.getTime() - updateInterval); + + // Check GitHub for updates (this would need to be implemented) + // For now, we'll sync repos that haven't been synced in the update interval + reposToSync = reposToSync.filter(repo => { + if (!repo.lastMirrored) return true; + return repo.lastMirrored < updateThreshold; + }); + } + + if (reposToSync.length === 0) { + console.log(`[Scheduler] No repositories to sync for user ${userId}`); + return; + } + + console.log(`[Scheduler] Syncing ${reposToSync.length} repositories for user ${userId}`); + + // Process repositories in batches + const batchSize = scheduleConfig.batchSize || 10; + const pauseBetweenBatches = scheduleConfig.pauseBetweenBatches || 5000; + const concurrent = scheduleConfig.concurrent ?? false; + + for (let i = 0; i < reposToSync.length; i += batchSize) { + const batch = reposToSync.slice(i, i + batchSize); + + if (concurrent) { + // Process batch concurrently + await Promise.allSettled( + batch.map(repo => syncSingleRepository(config, repo)) + ); + } else { + // Process batch sequentially + for (const repo of batch) { + await syncSingleRepository(config, repo); + } + } + + // Pause between batches if not the last batch + if (i + batchSize < reposToSync.length) { + await new Promise(resolve => setTimeout(resolve, pauseBetweenBatches)); + } + } + + console.log(`[Scheduler] Completed scheduled sync for user ${userId}`); + } catch (error) { + console.error(`[Scheduler] Error during scheduled sync for user ${userId}:`, error); + } +} + +/** + * Sync a single repository + */ +async function syncSingleRepository(config: any, repo: any): Promise { + try { + const repository: Repository = { + ...repo, + status: repoStatusEnum.parse(repo.status), + organization: repo.organization ?? undefined, + lastMirrored: repo.lastMirrored ?? undefined, + errorMessage: repo.errorMessage ?? undefined, + mirroredLocation: repo.mirroredLocation || '', + forkedFrom: repo.forkedFrom ?? undefined, + visibility: repositoryVisibilityEnum.parse(repo.visibility), + }; + + await syncGiteaRepo({ config, repository }); + console.log(`[Scheduler] Successfully synced repository ${repo.fullName}`); + } catch (error) { + console.error(`[Scheduler] Failed to sync repository ${repo.fullName}:`, error); + + // Update repository status to failed + await db.update(repositories).set({ + status: 'failed', + errorMessage: error instanceof Error ? error.message : 'Unknown error', + updatedAt: new Date(), + }).where(eq(repositories.id, repo.id)); + } +} + +/** + * Main scheduler loop + */ +async function schedulerLoop(): Promise { + if (isSchedulerRunning) { + console.log('[Scheduler] Scheduler is already running, skipping this cycle'); + return; + } + + isSchedulerRunning = true; + + try { + // Get all active configurations with scheduling enabled + const activeConfigs = await db + .select() + .from(configs) + .where( + and( + eq(configs.isActive, true) + ) + ); + + const enabledConfigs = activeConfigs.filter(config => + config.scheduleConfig?.enabled === true + ); + + if (enabledConfigs.length === 0) { + console.log(`[Scheduler] No configurations with scheduling enabled (found ${activeConfigs.length} active configs)`); + + // Show details about why configs are not enabled + activeConfigs.forEach(config => { + const scheduleEnabled = config.scheduleConfig?.enabled; + const mirrorInterval = config.giteaConfig?.mirrorInterval; + console.log(`[Scheduler] User ${config.userId}: scheduleEnabled=${scheduleEnabled}, mirrorInterval=${mirrorInterval}`); + }); + + return; + } + + console.log(`[Scheduler] Processing ${enabledConfigs.length} configurations with scheduling enabled (out of ${activeConfigs.length} total active configs)`); + + // Check each configuration to see if it's time to run + const currentTime = new Date(); + + for (const config of enabledConfigs) { + const scheduleConfig = config.scheduleConfig || {}; + + // Check if it's time to run based on nextRun + if (scheduleConfig.nextRun && new Date(scheduleConfig.nextRun) > currentTime) { + console.log(`[Scheduler] Skipping user ${config.userId} - next run at ${scheduleConfig.nextRun}`); + continue; + } + + // If no nextRun is set, or it's past due, run the sync + await runScheduledSync(config); + } + } catch (error) { + console.error('[Scheduler] Error in scheduler loop:', error); + } finally { + isSchedulerRunning = false; + } +} + +/** + * Start the scheduler service + */ +export function startSchedulerService(): void { + if (schedulerInterval) { + console.log('[Scheduler] Scheduler service is already running'); + return; + } + + console.log('[Scheduler] Starting scheduler service'); + + // Run immediately on start + schedulerLoop().catch(error => { + console.error('[Scheduler] Error during initial scheduler run:', error); + }); + + // Run every minute to check for scheduled tasks + const checkInterval = 60 * 1000; // 1 minute + schedulerInterval = setInterval(() => { + schedulerLoop().catch(error => { + console.error('[Scheduler] Error during scheduler run:', error); + }); + }, checkInterval); + + console.log(`[Scheduler] Scheduler service started, checking every ${formatDuration(checkInterval)} for scheduled tasks`); + console.log('[Scheduler] To trigger manual sync, check your configuration intervals and ensure SCHEDULE_ENABLED=true or use GITEA_MIRROR_INTERVAL'); +} + +/** + * Stop the scheduler service + */ +export function stopSchedulerService(): void { + if (schedulerInterval) { + clearInterval(schedulerInterval); + schedulerInterval = null; + console.log('[Scheduler] Scheduler service stopped'); + } +} + +/** + * Check if the scheduler service is running + */ +export function isSchedulerServiceRunning(): boolean { + return schedulerInterval !== null; +} \ No newline at end of file diff --git a/src/lib/utils/duration-parser.test.ts b/src/lib/utils/duration-parser.test.ts new file mode 100644 index 0000000..fda4a43 --- /dev/null +++ b/src/lib/utils/duration-parser.test.ts @@ -0,0 +1,94 @@ +import { test, expect } from 'bun:test'; +import { parseDuration, parseInterval, formatDuration, parseCronInterval } from './duration-parser'; + +test('parseDuration - handles duration strings correctly', () => { + // Hours + expect(parseDuration('8h')).toBe(8 * 60 * 60 * 1000); + expect(parseDuration('1h')).toBe(60 * 60 * 1000); + expect(parseDuration('24h')).toBe(24 * 60 * 60 * 1000); + + // Minutes + expect(parseDuration('30m')).toBe(30 * 60 * 1000); + expect(parseDuration('5m')).toBe(5 * 60 * 1000); + + // Seconds + expect(parseDuration('45s')).toBe(45 * 1000); + expect(parseDuration('1s')).toBe(1000); + + // Days + expect(parseDuration('1d')).toBe(24 * 60 * 60 * 1000); + expect(parseDuration('7d')).toBe(7 * 24 * 60 * 60 * 1000); + + // Numbers (treated as seconds) + expect(parseDuration(3600)).toBe(3600 * 1000); + expect(parseDuration('3600')).toBe(3600 * 1000); +}); + +test('parseDuration - handles edge cases', () => { + // Case insensitive + expect(parseDuration('8H')).toBe(8 * 60 * 60 * 1000); + expect(parseDuration('30M')).toBe(30 * 60 * 1000); + + // With spaces + expect(parseDuration('8 h')).toBe(8 * 60 * 60 * 1000); + expect(parseDuration('30 minutes')).toBe(30 * 60 * 1000); + + // Fractional values + expect(parseDuration('1.5h')).toBe(1.5 * 60 * 60 * 1000); + expect(parseDuration('2.5m')).toBe(2.5 * 60 * 1000); +}); + +test('parseDuration - throws on invalid input', () => { + expect(() => parseDuration('')).toThrow(); + expect(() => parseDuration('invalid')).toThrow(); + expect(() => parseDuration('8x')).toThrow(); + expect(() => parseDuration('-1h')).toThrow(); +}); + +test('parseInterval - handles cron expressions', () => { + // Every 2 hours + expect(parseInterval('0 */2 * * *')).toBe(2 * 60 * 60 * 1000); + + // Every 15 minutes + expect(parseInterval('*/15 * * * *')).toBe(15 * 60 * 1000); + + // Daily at 2 AM + expect(parseInterval('0 2 * * *')).toBe(24 * 60 * 60 * 1000); +}); + +test('parseInterval - prioritizes duration strings over cron', () => { + expect(parseInterval('8h')).toBe(8 * 60 * 60 * 1000); + expect(parseInterval('30m')).toBe(30 * 60 * 1000); + expect(parseInterval(3600)).toBe(3600 * 1000); +}); + +test('formatDuration - converts milliseconds back to readable format', () => { + expect(formatDuration(1000)).toBe('1s'); + expect(formatDuration(60 * 1000)).toBe('1m'); + expect(formatDuration(60 * 60 * 1000)).toBe('1h'); + expect(formatDuration(24 * 60 * 60 * 1000)).toBe('1d'); + expect(formatDuration(8 * 60 * 60 * 1000)).toBe('8h'); + expect(formatDuration(500)).toBe('500ms'); +}); + +test('parseCronInterval - handles common cron patterns', () => { + expect(parseCronInterval('0 */8 * * *')).toBe(8 * 60 * 60 * 1000); + expect(parseCronInterval('*/30 * * * *')).toBe(30 * 60 * 1000); + expect(parseCronInterval('0 2 * * *')).toBe(24 * 60 * 60 * 1000); + expect(parseCronInterval('0 0 * * 0')).toBe(7 * 24 * 60 * 60 * 1000); // Weekly +}); + +test('Integration test - Issue #72 scenario', () => { + // User sets GITEA_MIRROR_INTERVAL=8h + const userInterval = '8h'; + const parsedMs = parseInterval(userInterval); + + expect(parsedMs).toBe(8 * 60 * 60 * 1000); // 8 hours in milliseconds + expect(formatDuration(parsedMs)).toBe('8h'); + + // Should work from container startup time + const startTime = new Date(); + const nextRun = new Date(startTime.getTime() + parsedMs); + + expect(nextRun.getTime() - startTime.getTime()).toBe(8 * 60 * 60 * 1000); +}); \ No newline at end of file diff --git a/src/lib/utils/duration-parser.ts b/src/lib/utils/duration-parser.ts new file mode 100644 index 0000000..d724be1 --- /dev/null +++ b/src/lib/utils/duration-parser.ts @@ -0,0 +1,251 @@ +/** + * Duration parser utility for converting human-readable duration strings to milliseconds + * Supports formats like: 8h, 30m, 24h, 1d, 5s, etc. + */ + +export interface ParsedDuration { + value: number; + unit: string; + milliseconds: number; +} + +/** + * Parse a duration string into milliseconds + * @param duration - Duration string (e.g., "8h", "30m", "1d", "5s") or number in seconds + * @returns Duration in milliseconds + */ +export function parseDuration(duration: string | number): number { + if (typeof duration === 'number') { + return duration * 1000; // Convert seconds to milliseconds + } + + if (!duration || typeof duration !== 'string') { + throw new Error('Invalid duration: must be a string or number'); + } + + // Try to parse as number first (assume seconds) + const parsed = parseInt(duration, 10); + if (!isNaN(parsed) && duration === parsed.toString()) { + return parsed * 1000; // Convert seconds to milliseconds + } + + // Parse duration string with unit + const match = duration.trim().match(/^(\d+(?:\.\d+)?)\s*([a-zA-Z]+)$/); + if (!match) { + throw new Error(`Invalid duration format: "${duration}". Expected format like "8h", "30m", "1d"`); + } + + const [, valueStr, unit] = match; + const value = parseFloat(valueStr); + + if (isNaN(value) || value < 0) { + throw new Error(`Invalid duration value: "${valueStr}". Must be a positive number`); + } + + const unitLower = unit.toLowerCase(); + let multiplier: number; + + switch (unitLower) { + case 'ms': + case 'millisecond': + case 'milliseconds': + multiplier = 1; + break; + case 's': + case 'sec': + case 'second': + case 'seconds': + multiplier = 1000; + break; + case 'm': + case 'min': + case 'minute': + case 'minutes': + multiplier = 60 * 1000; + break; + case 'h': + case 'hr': + case 'hour': + case 'hours': + multiplier = 60 * 60 * 1000; + break; + case 'd': + case 'day': + case 'days': + multiplier = 24 * 60 * 60 * 1000; + break; + case 'w': + case 'week': + case 'weeks': + multiplier = 7 * 24 * 60 * 60 * 1000; + break; + default: + throw new Error(`Unsupported duration unit: "${unit}". Supported units: ms, s, m, h, d, w`); + } + + return Math.floor(value * multiplier); +} + +/** + * Parse a duration string and return detailed information + * @param duration - Duration string + * @returns Parsed duration with value, unit, and milliseconds + */ +export function parseDurationDetailed(duration: string | number): ParsedDuration { + const milliseconds = parseDuration(duration); + + if (typeof duration === 'number') { + return { + value: duration, + unit: 's', + milliseconds + }; + } + + const match = duration.trim().match(/^(\d+(?:\.\d+)?)\s*([a-zA-Z]+)$/); + if (!match) { + // If it's just a number as string + const value = parseFloat(duration); + if (!isNaN(value)) { + return { + value, + unit: 's', + milliseconds + }; + } + throw new Error(`Invalid duration format: "${duration}"`); + } + + const [, valueStr, unit] = match; + return { + value: parseFloat(valueStr), + unit: unit.toLowerCase(), + milliseconds + }; +} + +/** + * Format milliseconds back to human-readable duration + * @param milliseconds - Duration in milliseconds + * @returns Human-readable duration string + */ +export function formatDuration(milliseconds: number): string { + if (milliseconds < 1000) { + return `${milliseconds}ms`; + } + + const seconds = Math.floor(milliseconds / 1000); + if (seconds < 60) { + return `${seconds}s`; + } + + const minutes = Math.floor(seconds / 60); + if (minutes < 60) { + return `${minutes}m`; + } + + const hours = Math.floor(minutes / 60); + if (hours < 24) { + return `${hours}h`; + } + + const days = Math.floor(hours / 24); + return `${days}d`; +} + +/** + * Parse cron expression to approximate milliseconds interval + * This is a simplified parser for common cron patterns + * @param cron - Cron expression + * @returns Approximate interval in milliseconds + */ +export function parseCronInterval(cron: string): number { + if (!cron || typeof cron !== 'string') { + throw new Error('Invalid cron expression'); + } + + const parts = cron.trim().split(/\s+/); + if (parts.length !== 5) { + throw new Error('Cron expression must have 5 parts (minute hour day month weekday)'); + } + + const [minute, hour, day, month, weekday] = parts; + + // Extract hour interval from patterns like "*/2" (every 2 hours) + if (hour.includes('*/')) { + const everyMatch = hour.match(/\*\/(\d+)/); + if (everyMatch) { + const hours = parseInt(everyMatch[1], 10); + return hours * 60 * 60 * 1000; // Convert hours to milliseconds + } + } + + // Extract minute interval from patterns like "*/15" (every 15 minutes) + if (minute.includes('*/')) { + const everyMatch = minute.match(/\*\/(\d+)/); + if (everyMatch) { + const minutes = parseInt(everyMatch[1], 10); + return minutes * 60 * 1000; // Convert minutes to milliseconds + } + } + + // Daily patterns like "0 2 * * *" (daily at 2 AM) + if (hour !== '*' && minute !== '*' && day === '*' && month === '*' && weekday === '*') { + return 24 * 60 * 60 * 1000; // 24 hours in milliseconds + } + + // Weekly patterns + if (weekday !== '*') { + return 7 * 24 * 60 * 60 * 1000; // 7 days in milliseconds + } + + // Monthly patterns + if (day !== '*') { + return 30 * 24 * 60 * 60 * 1000; // Approximate month (30 days) + } + + // Default to 1 hour if unable to parse + return 60 * 60 * 1000; +} + +/** + * Enhanced interval parser that handles duration strings, cron expressions, and numbers + * @param interval - Interval specification (duration string, cron, or number) + * @returns Interval in milliseconds + */ +export function parseInterval(interval: string | number): number { + if (typeof interval === 'number') { + return interval * 1000; // Convert seconds to milliseconds + } + + if (!interval || typeof interval !== 'string') { + throw new Error('Invalid interval: must be a string or number'); + } + + const trimmed = interval.trim(); + + // Check if it's a cron expression (contains spaces and specific patterns) + if (trimmed.includes(' ') && trimmed.split(/\s+/).length === 5) { + try { + return parseCronInterval(trimmed); + } catch (error) { + console.warn(`Failed to parse as cron expression: ${error instanceof Error ? error.message : 'Unknown error'}`); + // Fall through to duration parsing + } + } + + // Try to parse as duration string + try { + return parseDuration(trimmed); + } catch (error) { + console.warn(`Failed to parse as duration: ${error instanceof Error ? error.message : 'Unknown error'}`); + + // Last resort: try as plain number (seconds) + const parsed = parseInt(trimmed, 10); + if (!isNaN(parsed)) { + return parsed * 1000; + } + + throw new Error(`Unable to parse interval: "${interval}". Expected duration (e.g., "8h"), cron expression (e.g., "0 */2 * * *"), or number of seconds`); + } +} \ No newline at end of file diff --git a/src/middleware.ts b/src/middleware.ts index fe1f66e..0a186f9 100644 --- a/src/middleware.ts +++ b/src/middleware.ts @@ -1,6 +1,8 @@ import { defineMiddleware } from 'astro:middleware'; import { initializeRecovery, hasJobsNeedingRecovery, getRecoveryStatus } from './lib/recovery'; import { startCleanupService, stopCleanupService } from './lib/cleanup-service'; +import { startSchedulerService, stopSchedulerService } from './lib/scheduler-service'; +import { startRepositoryCleanupService, stopRepositoryCleanupService } from './lib/repository-cleanup-service'; import { initializeShutdownManager, registerShutdownCallback } from './lib/shutdown-manager'; import { setupSignalHandlers } from './lib/signal-handlers'; import { auth } from './lib/auth'; @@ -11,6 +13,8 @@ import { initializeConfigFromEnv } from './lib/env-config-loader'; let recoveryInitialized = false; let recoveryAttempted = false; let cleanupServiceStarted = false; +let schedulerServiceStarted = false; +let repositoryCleanupServiceStarted = false; let shutdownManagerInitialized = false; let envConfigInitialized = false; @@ -152,6 +156,44 @@ export const onRequest = defineMiddleware(async (context, next) => { } } + // Start scheduler service only once after recovery is complete + if (recoveryInitialized && !schedulerServiceStarted) { + try { + console.log('Starting automatic mirror scheduler service...'); + startSchedulerService(); + + // Register scheduler service shutdown callback + registerShutdownCallback(async () => { + console.log('🛑 Shutting down scheduler service...'); + stopSchedulerService(); + }); + + schedulerServiceStarted = true; + } catch (error) { + console.error('Failed to start scheduler service:', error); + // Don't fail the request if scheduler service fails to start + } + } + + // Start repository cleanup service only once after recovery is complete + if (recoveryInitialized && !repositoryCleanupServiceStarted) { + try { + console.log('Starting repository cleanup service...'); + startRepositoryCleanupService(); + + // Register repository cleanup service shutdown callback + registerShutdownCallback(async () => { + console.log('🛑 Shutting down repository cleanup service...'); + stopRepositoryCleanupService(); + }); + + repositoryCleanupServiceStarted = true; + } catch (error) { + console.error('Failed to start repository cleanup service:', error); + // Don't fail the request if repository cleanup service fails to start + } + } + // Continue with the request return next(); }); diff --git a/src/pages/api/cleanup/trigger.ts b/src/pages/api/cleanup/trigger.ts new file mode 100644 index 0000000..57d830d --- /dev/null +++ b/src/pages/api/cleanup/trigger.ts @@ -0,0 +1,130 @@ +import type { APIRoute } from 'astro'; +import { auth } from '@/lib/auth'; +import { createSecureErrorResponse } from '@/lib/utils/error-handler'; +import { triggerRepositoryCleanup } from '@/lib/repository-cleanup-service'; + +/** + * Manually trigger repository cleanup for the current user + * This can be called when repositories are updated or when immediate cleanup is needed + */ +export const POST: APIRoute = async ({ request }) => { + try { + // Get user session + const session = await auth.api.getSession({ + headers: request.headers, + }); + + if (!session?.user?.id) { + return new Response( + JSON.stringify({ error: 'Unauthorized' }), + { + status: 401, + headers: { 'Content-Type': 'application/json' }, + } + ); + } + + console.log(`[Cleanup API] Manual cleanup triggered for user ${session.user.id}`); + + // Trigger immediate cleanup for this user + const results = await triggerRepositoryCleanup(session.user.id); + + console.log(`[Cleanup API] Cleanup completed: ${results.processedCount}/${results.orphanedCount} repositories processed, ${results.errors.length} errors`); + + return new Response( + JSON.stringify({ + success: true, + message: 'Repository cleanup completed', + results: { + orphanedCount: results.orphanedCount, + processedCount: results.processedCount, + errorCount: results.errors.length, + errors: results.errors, + }, + }), + { + status: 200, + headers: { 'Content-Type': 'application/json' }, + } + ); + } catch (error) { + console.error('[Cleanup API] Error during manual cleanup:', error); + return createSecureErrorResponse(error); + } +}; + +/** + * Get cleanup status and configuration for the current user + */ +export const GET: APIRoute = async ({ request }) => { + try { + // Get user session + const session = await auth.api.getSession({ + headers: request.headers, + }); + + if (!session?.user?.id) { + return new Response( + JSON.stringify({ error: 'Unauthorized' }), + { + status: 401, + headers: { 'Content-Type': 'application/json' }, + } + ); + } + + // Import inside the function to avoid import issues + const { db, configs } = await import('@/lib/db'); + const { eq, and } = await import('drizzle-orm'); + + // Get user's cleanup configuration + const [config] = await db + .select() + .from(configs) + .where(and( + eq(configs.userId, session.user.id), + eq(configs.isActive, true) + )) + .limit(1); + + if (!config) { + return new Response( + JSON.stringify({ + success: false, + message: 'No active configuration found', + cleanupEnabled: false, + }), + { + status: 200, + headers: { 'Content-Type': 'application/json' }, + } + ); + } + + const cleanupConfig = config.cleanupConfig || {}; + const isCleanupEnabled = cleanupConfig.enabled || cleanupConfig.deleteIfNotInGitHub; + + return new Response( + JSON.stringify({ + success: true, + cleanupEnabled: isCleanupEnabled, + configuration: { + enabled: cleanupConfig.enabled, + deleteFromGitea: cleanupConfig.deleteFromGitea, + deleteIfNotInGitHub: cleanupConfig.deleteIfNotInGitHub, + dryRun: cleanupConfig.dryRun, + orphanedRepoAction: cleanupConfig.orphanedRepoAction || 'archive', + lastRun: cleanupConfig.lastRun, + nextRun: cleanupConfig.nextRun, + }, + }), + { + status: 200, + headers: { 'Content-Type': 'application/json' }, + } + ); + } catch (error) { + console.error('[Cleanup API] Error getting cleanup status:', error); + return createSecureErrorResponse(error); + } +}; \ No newline at end of file