Compare commits

...

2 Commits

Author SHA1 Message Date
Arunavo Ray
5add8766a4 fix(scheduler,config): preserve ENV schedule; add AUTO_MIRROR_REPOS auto-mirroring
- Prevent Automation UI from overriding schedule:
      - mapDbScheduleToUi now parses intervals robustly (cron/duration/seconds) via parseInterval
      - mapUiScheduleToDb merges with existing config and stores interval as seconds (no lossy cron conversion)
      - /api/config passes existing scheduleConfig to preserve ENV-sourced values
      - schedule-sync endpoint uses parseInterval for nextRun calculation
  - Add AUTO_MIRROR_REPOS support and scheduled auto-mirror phase:
      - scheduleConfig schema includes autoImport and autoMirror
      - env-config-loader reads AUTO_MIRROR_REPOS and carries through to DB
      - scheduler auto-mirrors imported/pending/failed repos when autoMirror is enabled before regular sync
      - docker-compose and ENV docs updated with AUTO_MIRROR_REPOS
  - Tests pass and build succeeds
2025-09-14 08:31:31 +05:30
Arunavo Ray
6ce70bb5bf chore(version): bump to 3.7.1\n\ncleanup: attempt fix for orphaned repo archiving (refs #84)\n- Sanitize mirror rename to satisfy AlphaDashDot; timestamped fallback\n- Resolve Gitea owner robustly via mirroredLocation/strategy; verify presence\n- Add 'archived' status to Zod enums; set isArchived on archive\n- Update CHANGELOG entry without closing keyword 2025-09-14 07:53:36 +05:30
14 changed files with 232 additions and 74 deletions

View File

@@ -58,6 +58,23 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Updated README with new features
- Enhanced CLAUDE.md with repository status definitions
## [3.7.1] - 2025-09-14
### Fixed
- Cleanup archiving for mirror repositories now works reliably (refs #84; awaiting user confirmation).
- Gitea rejects names violating the AlphaDashDot rule; archiving a mirror now uses a sanitized rename strategy (`archived-<name>`), with a timestamped fallback on conflicts or validation errors.
- Owner resolution during cleanup no longer uses the GitHub owner by mistake. It prefers `mirroredLocation`, falls back to computed Gitea owner via configuration, and verifies location with a presence check to avoid `GetUserByName` 404s.
- Repositories UI crash resolved when cleanup marked repos as archived.
- Added `"archived"` to repository/job status enums, fixing Zod validation errors on the Repositories page.
### Changed
- Archiving logic for mirror repos is non-destructive by design: data is preserved, repo is renamed with an archive marker, and mirror interval is reduced (besteffort) to minimize sync attempts.
- Cleanup service updates DB to `status: "archived"` and `isArchived: true` on successful archive path.
### Notes
- This release addresses the scenario where a GitHub source disappears (deleted/banned), ensuring Gitea backups are preserved even when using `CLEANUP_DELETE_IF_NOT_IN_GITHUB=true` with `CLEANUP_ORPHANED_REPO_ACTION=archive`.
- No database migration required.
## [3.2.6] - 2025-08-09
### Fixed

View File

@@ -57,6 +57,7 @@ services:
- SCHEDULE_ENABLED=${SCHEDULE_ENABLED:-false}
- GITEA_MIRROR_INTERVAL=${GITEA_MIRROR_INTERVAL:-8h}
- AUTO_IMPORT_REPOS=${AUTO_IMPORT_REPOS:-true}
- AUTO_MIRROR_REPOS=${AUTO_MIRROR_REPOS:-false}
# Repository Cleanup Configuration
- CLEANUP_DELETE_IF_NOT_IN_GITHUB=${CLEANUP_DELETE_IF_NOT_IN_GITHUB:-false}
- CLEANUP_ORPHANED_REPO_ACTION=${CLEANUP_ORPHANED_REPO_ACTION:-archive}

View File

@@ -195,6 +195,7 @@ Configure automatic scheduled mirroring.
| Variable | Description | Default | Options |
|----------|-------------|---------|---------|
| `AUTO_IMPORT_REPOS` | Automatically discover and import new GitHub repositories during scheduled syncs | `true` | `true`, `false` |
| `AUTO_MIRROR_REPOS` | Automatically mirror newly imported repositories during scheduled syncs (no manual “Mirror All” required) | `false` | `true`, `false` |
| `SCHEDULE_ONLY_MIRROR_UPDATED` | Only mirror repos with updates | `false` | `true`, `false` |
| `SCHEDULE_UPDATE_INTERVAL` | Check for updates interval (milliseconds) | `86400000` | Number |
| `SCHEDULE_SKIP_RECENTLY_MIRRORED` | Skip recently mirrored repos | `true` | `true`, `false` |
@@ -407,4 +408,4 @@ BETTER_AUTH_TRUSTED_ORIGINS=http://localhost:3000,http://192.168.1.100:3000
- `admin:org` (read organization data)
- Additional scopes may be required for specific features
For more examples and detailed configuration, see the `.env.example` file in the repository.
For more examples and detailed configuration, see the `.env.example` file in the repository.

View File

@@ -1,7 +1,7 @@
{
"name": "gitea-mirror",
"type": "module",
"version": "3.7.0",
"version": "3.7.1",
"engines": {
"bun": ">=1.2.9"
},

View File

@@ -81,6 +81,8 @@ export const scheduleConfigSchema = z.object({
updateInterval: z.number().default(86400000),
skipRecentlyMirrored: z.boolean().default(true),
recentThreshold: z.number().default(3600000),
autoImport: z.boolean().default(true),
autoMirror: z.boolean().default(false),
lastRun: z.coerce.date().optional(),
nextRun: z.coerce.date().optional(),
});
@@ -152,6 +154,7 @@ export const repositorySchema = z.object({
"deleted",
"syncing",
"synced",
"archived",
])
.default("imported"),
lastMirrored: z.coerce.date().optional().nullable(),
@@ -181,6 +184,7 @@ export const mirrorJobSchema = z.object({
"deleted",
"syncing",
"synced",
"archived",
])
.default("imported"),
message: z.string(),

View File

@@ -69,6 +69,8 @@ interface EnvConfig {
updateInterval?: number;
skipRecentlyMirrored?: boolean;
recentThreshold?: number;
autoImport?: boolean;
autoMirror?: boolean;
};
cleanup: {
enabled?: boolean;
@@ -157,6 +159,8 @@ function parseEnvConfig(): EnvConfig {
updateInterval: process.env.SCHEDULE_UPDATE_INTERVAL ? parseInt(process.env.SCHEDULE_UPDATE_INTERVAL, 10) : undefined,
skipRecentlyMirrored: process.env.SCHEDULE_SKIP_RECENTLY_MIRRORED === 'true',
recentThreshold: process.env.SCHEDULE_RECENT_THRESHOLD ? parseInt(process.env.SCHEDULE_RECENT_THRESHOLD, 10) : undefined,
autoImport: process.env.AUTO_IMPORT_REPOS !== 'false',
autoMirror: process.env.AUTO_MIRROR_REPOS === 'true',
},
cleanup: {
enabled: process.env.CLEANUP_ENABLED === 'true' ||
@@ -301,7 +305,8 @@ export async function initializeConfigFromEnv(): Promise<void> {
updateInterval: envConfig.schedule.updateInterval ?? existingConfig?.[0]?.scheduleConfig?.updateInterval ?? 86400000,
skipRecentlyMirrored: envConfig.schedule.skipRecentlyMirrored ?? existingConfig?.[0]?.scheduleConfig?.skipRecentlyMirrored ?? true,
recentThreshold: envConfig.schedule.recentThreshold ?? existingConfig?.[0]?.scheduleConfig?.recentThreshold ?? 3600000,
autoImport: process.env.AUTO_IMPORT_REPOS !== 'false', // New field for auto-importing new repositories
autoImport: envConfig.schedule.autoImport ?? existingConfig?.[0]?.scheduleConfig?.autoImport ?? true,
autoMirror: envConfig.schedule.autoMirror ?? existingConfig?.[0]?.scheduleConfig?.autoMirror ?? false,
lastRun: existingConfig?.[0]?.scheduleConfig?.lastRun || undefined,
nextRun: existingConfig?.[0]?.scheduleConfig?.nextRun || undefined,
};
@@ -359,4 +364,4 @@ export async function initializeConfigFromEnv(): Promise<void> {
console.error('[ENV Config Loader] Failed to initialize configuration from environment:', error);
// Don't throw - this is a non-critical initialization
}
}
}

View File

@@ -2176,6 +2176,14 @@ export async function archiveGiteaRepo(
repo: string
): Promise<void> {
try {
// Helper: sanitize to Gitea's AlphaDashDot rule
const sanitizeRepoNameAlphaDashDot = (name: string): string => {
// Replace anything that's not [A-Za-z0-9.-] with '-'
const base = name.replace(/[^A-Za-z0-9.-]+/g, "-").replace(/-+/g, "-");
// Trim leading/trailing separators and dots for safety
return base.replace(/^[.-]+/, "").replace(/[.-]+$/, "");
};
// First, check if this is a mirror repository
const repoResponse = await httpGet(
`${client.url}/api/v1/repos/${owner}/${repo}`,
@@ -2207,7 +2215,8 @@ export async function archiveGiteaRepo(
return;
}
const archivedName = `[ARCHIVED] ${currentName}`;
// Use a safe prefix and sanitize the name to satisfy AlphaDashDot rule
let archivedName = `archived-${sanitizeRepoNameAlphaDashDot(currentName)}`;
const currentDesc = repoResponse.data.description || '';
const archiveNotice = `\n\n⚠ ARCHIVED: Original GitHub repository no longer exists. Preserved as backup on ${new Date().toISOString()}`;
@@ -2216,23 +2225,40 @@ export async function archiveGiteaRepo(
? currentDesc
: currentDesc + archiveNotice;
const renameResponse = await httpPatch(
`${client.url}/api/v1/repos/${owner}/${repo}`,
{
name: archivedName,
description: newDescription,
},
{
Authorization: `token ${client.token}`,
'Content-Type': 'application/json',
try {
await httpPatch(
`${client.url}/api/v1/repos/${owner}/${repo}`,
{
name: archivedName,
description: newDescription,
},
{
Authorization: `token ${client.token}`,
'Content-Type': 'application/json',
}
);
} catch (e: any) {
// If rename fails (e.g., 422 AlphaDashDot or name conflict), attempt a timestamped fallback
const ts = new Date().toISOString().replace(/[-:T.]/g, "").slice(0, 14);
archivedName = `archived-${ts}-${sanitizeRepoNameAlphaDashDot(currentName)}`;
try {
await httpPatch(
`${client.url}/api/v1/repos/${owner}/${repo}`,
{
name: archivedName,
description: newDescription,
},
{
Authorization: `token ${client.token}`,
'Content-Type': 'application/json',
}
);
} catch (e2) {
// If this also fails, log but don't throw - data remains preserved
console.error(`[Archive] Failed to rename mirror repository ${owner}/${repo}:`, e2);
console.log(`[Archive] Repository ${owner}/${repo} remains accessible but not marked as archived`);
return;
}
);
if (renameResponse.status >= 400) {
// If rename fails, log but don't throw - data is still preserved
console.error(`[Archive] Failed to rename mirror repository ${owner}/${repo}: ${renameResponse.status}`);
console.log(`[Archive] Repository ${owner}/${repo} remains accessible but not marked as archived`);
return;
}
console.log(`[Archive] Successfully marked mirror repository ${owner}/${repo} as archived (renamed to ${archivedName})`);

View File

@@ -7,7 +7,7 @@
import { db, configs, repositories } from '@/lib/db';
import { eq, and, or, sql, not, inArray } from 'drizzle-orm';
import { createGitHubClient, getGithubRepositories, getGithubStarredRepositories } from '@/lib/github';
import { createGiteaClient, deleteGiteaRepo, archiveGiteaRepo } from '@/lib/gitea';
import { createGiteaClient, deleteGiteaRepo, archiveGiteaRepo, getGiteaRepoOwnerAsync, checkRepoLocation } from '@/lib/gitea';
import { getDecryptedGitHubToken, getDecryptedGiteaToken } from '@/lib/utils/config-encryption';
import { publishEvent } from '@/lib/events';
@@ -109,26 +109,46 @@ async function handleOrphanedRepository(
const giteaToken = getDecryptedGiteaToken(config);
const giteaClient = createGiteaClient(config.giteaConfig.url, giteaToken);
// Determine the Gitea owner and repo name
const mirroredLocation = repo.mirroredLocation || '';
let giteaOwner = repo.owner;
let giteaRepoName = repo.name;
if (mirroredLocation) {
const parts = mirroredLocation.split('/');
if (parts.length >= 2) {
giteaOwner = parts[parts.length - 2];
giteaRepoName = parts[parts.length - 1];
}
// Determine the Gitea owner and repo name more robustly
const mirroredLocation = (repo.mirroredLocation || '').trim();
let giteaOwner: string;
let giteaRepoName: string;
if (mirroredLocation && mirroredLocation.includes('/')) {
const [ownerPart, namePart] = mirroredLocation.split('/');
giteaOwner = ownerPart;
giteaRepoName = namePart;
} else {
// Fall back to expected owner based on config and repo flags (starred/org overrides)
giteaOwner = await getGiteaRepoOwnerAsync({ config, repository: repo });
giteaRepoName = repo.name;
}
// Normalize owner casing to avoid GetUserByName issues on some Gitea setups
giteaOwner = giteaOwner.trim();
if (action === 'archive') {
console.log(`[Repository Cleanup] Archiving orphaned repository ${repoFullName} in Gitea`);
// Best-effort check to validate actual location; falls back gracefully
try {
const { present, actualOwner } = await checkRepoLocation({
config,
repository: repo,
expectedOwner: giteaOwner,
});
if (present) {
giteaOwner = actualOwner;
}
} catch {
// Non-fatal; continue with best guess
}
await archiveGiteaRepo(giteaClient, giteaOwner, giteaRepoName);
// Update database status
await db.update(repositories).set({
status: 'archived',
isArchived: true,
errorMessage: 'Repository archived - no longer in GitHub',
updatedAt: new Date(),
}).where(eq(repositories.id, repo.id));
@@ -402,4 +422,4 @@ export async function triggerRepositoryCleanup(userId: string): Promise<{
}
return runRepositoryCleanup(config);
}
}

View File

@@ -166,6 +166,75 @@ async function runScheduledSync(config: any): Promise<void> {
}
}
// Auto-mirror: Mirror imported/pending/failed repositories if enabled
if (scheduleConfig.autoMirror) {
try {
console.log(`[Scheduler] Auto-mirror enabled - checking for repositories to mirror for user ${userId}...`);
const reposNeedingMirror = await db
.select()
.from(repositories)
.where(
and(
eq(repositories.userId, userId),
or(
eq(repositories.status, 'imported'),
eq(repositories.status, 'pending'),
eq(repositories.status, 'failed')
)
)
);
if (reposNeedingMirror.length > 0) {
console.log(`[Scheduler] Found ${reposNeedingMirror.length} repositories that need initial mirroring`);
// Prepare Octokit client
const decryptedToken = getDecryptedGitHubToken(config);
const { Octokit } = await import('@octokit/rest');
const octokit = new Octokit({ auth: decryptedToken });
// Process repositories in batches
const batchSize = scheduleConfig.batchSize || 10;
const pauseBetweenBatches = scheduleConfig.pauseBetweenBatches || 2000;
for (let i = 0; i < reposNeedingMirror.length; i += batchSize) {
const batch = reposNeedingMirror.slice(i, Math.min(i + batchSize, reposNeedingMirror.length));
console.log(`[Scheduler] Auto-mirror batch ${Math.floor(i / batchSize) + 1} of ${Math.ceil(reposNeedingMirror.length / batchSize)} (${batch.length} repos)`);
await Promise.all(
batch.map(async (repo) => {
try {
const repository: Repository = {
...repo,
status: repoStatusEnum.parse(repo.status),
organization: repo.organization ?? undefined,
lastMirrored: repo.lastMirrored ?? undefined,
errorMessage: repo.errorMessage ?? undefined,
mirroredLocation: repo.mirroredLocation || '',
forkedFrom: repo.forkedFrom ?? undefined,
visibility: repositoryVisibilityEnum.parse(repo.visibility),
};
await mirrorGithubRepoToGitea({ octokit, repository, config });
console.log(`[Scheduler] Auto-mirrored repository: ${repo.fullName}`);
} catch (error) {
console.error(`[Scheduler] Failed to auto-mirror repository ${repo.fullName}:`, error);
}
})
);
// Pause between batches if configured
if (i + batchSize < reposNeedingMirror.length) {
console.log(`[Scheduler] Pausing for ${pauseBetweenBatches}ms before next auto-mirror batch...`);
await new Promise(resolve => setTimeout(resolve, pauseBetweenBatches));
}
}
} else {
console.log(`[Scheduler] No repositories need initial mirroring`);
}
} catch (mirrorError) {
console.error(`[Scheduler] Error during auto-mirror phase for user ${userId}:`, mirrorError);
}
}
// Get repositories to sync
let reposToSync = await db
.select()

View File

@@ -11,6 +11,7 @@ import type {
} from "@/types/config";
import { z } from "zod";
import { githubConfigSchema, giteaConfigSchema, scheduleConfigSchema, cleanupConfigSchema } from "@/lib/db/schema";
import { parseInterval } from "@/lib/utils/duration-parser";
// Use the actual database schema types
type DbGitHubConfig = z.infer<typeof githubConfigSchema>;
@@ -165,27 +166,22 @@ export function mapDbToUiConfig(dbConfig: any): {
/**
* Maps UI schedule config to database schema
*/
export function mapUiScheduleToDb(uiSchedule: any): DbScheduleConfig {
export function mapUiScheduleToDb(uiSchedule: any, existing?: DbScheduleConfig): DbScheduleConfig {
// Preserve existing schedule config and only update fields controlled by the UI
const base: DbScheduleConfig = existing
? { ...(existing as unknown as DbScheduleConfig) }
: (scheduleConfigSchema.parse({}) as unknown as DbScheduleConfig);
// Store interval as seconds string to avoid lossy cron conversion
const intervalSeconds = typeof uiSchedule.interval === 'number' && uiSchedule.interval > 0
? String(uiSchedule.interval)
: (typeof base.interval === 'string' ? base.interval : String(86400));
return {
enabled: uiSchedule.enabled || false,
interval: uiSchedule.interval ? `0 */${Math.floor(uiSchedule.interval / 3600)} * * *` : "0 2 * * *", // Convert seconds to cron expression
concurrent: false,
batchSize: 10,
pauseBetweenBatches: 5000,
retryAttempts: 3,
retryDelay: 60000,
timeout: 3600000,
autoRetry: true,
cleanupBeforeMirror: false,
notifyOnFailure: true,
notifyOnSuccess: false,
logLevel: "info",
timezone: "UTC",
onlyMirrorUpdated: false,
updateInterval: 86400000,
skipRecentlyMirrored: true,
recentThreshold: 3600000,
};
...base,
enabled: !!uiSchedule.enabled,
interval: intervalSeconds,
} as DbScheduleConfig;
}
/**
@@ -202,23 +198,18 @@ export function mapDbScheduleToUi(dbSchedule: DbScheduleConfig): any {
};
}
// Extract hours from cron expression if possible
// Parse interval supporting numbers (seconds), duration strings, and cron
let intervalSeconds = 86400; // Default to daily (24 hours)
if (dbSchedule.interval) {
// Check if it's already a number (seconds), use it directly
if (typeof dbSchedule.interval === 'number') {
intervalSeconds = dbSchedule.interval;
} else if (typeof dbSchedule.interval === 'string') {
// Check if it's a cron expression
const cronMatch = dbSchedule.interval.match(/0 \*\/(\d+) \* \* \*/);
if (cronMatch) {
intervalSeconds = parseInt(cronMatch[1]) * 3600;
} else if (dbSchedule.interval === "0 2 * * *") {
// Daily at 2 AM
intervalSeconds = 86400;
}
}
try {
const ms = parseInterval(
typeof dbSchedule.interval === 'number'
? dbSchedule.interval
: (dbSchedule.interval as unknown as string)
);
intervalSeconds = Math.max(1, Math.floor(ms / 1000));
} catch (_e) {
// Fallback to default if unparsable
intervalSeconds = 86400;
}
return {
@@ -266,4 +257,4 @@ export function mapDbCleanupToUi(dbCleanup: DbCleanupConfig): any {
lastRun: dbCleanup.lastRun || null,
nextRun: dbCleanup.nextRun || null,
};
}
}

View File

@@ -87,7 +87,10 @@ export const POST: APIRoute = async ({ request }) => {
}
// Map schedule and cleanup configs to database schema
const processedScheduleConfig = mapUiScheduleToDb(scheduleConfig);
const processedScheduleConfig = mapUiScheduleToDb(
scheduleConfig,
existingConfig ? existingConfig.scheduleConfig : undefined
);
const processedCleanupConfig = mapUiCleanupToDb(cleanupConfig);
if (existingConfig) {

View File

@@ -8,6 +8,7 @@ import type {
ScheduleSyncRepoResponse,
} from "@/types/sync";
import { createSecureErrorResponse } from "@/lib/utils";
import { parseInterval } from "@/lib/utils/duration-parser";
export const POST: APIRoute = async ({ request }) => {
try {
@@ -72,8 +73,17 @@ export const POST: APIRoute = async ({ request }) => {
// Calculate nextRun and update lastRun and nextRun in the config
const currentTime = new Date();
const interval = config.scheduleConfig?.interval ?? 3600;
const nextRun = new Date(currentTime.getTime() + interval * 1000);
let intervalMs = 3600 * 1000;
try {
intervalMs = parseInterval(
typeof config.scheduleConfig?.interval === 'number'
? config.scheduleConfig.interval
: (config.scheduleConfig?.interval as unknown as string) || '3600'
);
} catch {
intervalMs = 3600 * 1000;
}
const nextRun = new Date(currentTime.getTime() + intervalMs);
// Update the full giteaConfig object
await db

View File

@@ -12,6 +12,7 @@ export const repoStatusEnum = z.enum([
"deleted",
"syncing",
"synced",
"archived",
]);
export type RepoStatus = z.infer<typeof repoStatusEnum>;

View File

@@ -0,0 +1,10 @@
import { describe, expect, it } from "bun:test";
import { repoStatusEnum } from "@/types/Repository";
describe("repoStatusEnum", () => {
it("includes archived status", () => {
const res = repoStatusEnum.safeParse("archived");
expect(res.success).toBe(true);
});
});