mirror of
https://github.com/RayLabsHQ/gitea-mirror.git
synced 2026-04-11 13:37:44 +03:00
* fix: prevent excessive disk usage from repo backups (#234) Legacy configs with backupBeforeSync: true but no explicit backupStrategy silently resolved to "always", creating full git bundles on every sync cycle. This caused repo-backups to grow to 17GB+ for users with many repositories. Changes: - Fix resolveBackupStrategy to map backupBeforeSync: true → "on-force-push" instead of "always", so legacy configs only backup when force-push is detected - Fix config mapper to always set backupStrategy explicitly ("on-force-push") preventing the backward-compat fallback from triggering - Lower default backupRetentionCount from 20 to 5 bundles per repo - Add time-based retention (backupRetentionDays, default 30 days) alongside count-based retention, with safety net to always keep at least 1 bundle - Add "high disk usage" warning on "Always Backup" UI option - Update docs and tests to reflect new defaults and behavior * fix: preserve legacy backupBeforeSync:false on UI round-trip and expose retention days P1: mapDbToUiConfig now checks backupBeforeSync === false before defaulting backupStrategy, preventing legacy "disabled" configs from silently becoming "on-force-push" after any auto-save round-trip. P3: Added "Snapshot retention days" input field to the backup settings UI, matching the documented setting in FORCE_PUSH_PROTECTION.md.
314 lines
9.8 KiB
TypeScript
314 lines
9.8 KiB
TypeScript
import { mkdir, mkdtemp, readdir, rm, stat } from "node:fs/promises";
|
|
import os from "node:os";
|
|
import path from "node:path";
|
|
import type { Config, BackupStrategy } from "@/types/config";
|
|
import { decryptConfigTokens } from "./utils/config-encryption";
|
|
|
|
const TRUE_VALUES = new Set(["1", "true", "yes", "on"]);
|
|
|
|
function parseBoolean(value: string | undefined, fallback: boolean): boolean {
|
|
if (value === undefined) return fallback;
|
|
return TRUE_VALUES.has(value.trim().toLowerCase());
|
|
}
|
|
|
|
function parsePositiveInt(value: string | undefined, fallback: number): number {
|
|
if (!value) return fallback;
|
|
const parsed = Number.parseInt(value, 10);
|
|
if (!Number.isFinite(parsed) || parsed <= 0) {
|
|
return fallback;
|
|
}
|
|
return parsed;
|
|
}
|
|
|
|
function sanitizePathSegment(input: string): string {
|
|
return input.replace(/[^a-zA-Z0-9._-]/g, "_");
|
|
}
|
|
|
|
function buildTimestamp(): string {
|
|
// Example: 2026-02-25T18-34-22-123Z
|
|
return new Date().toISOString().replace(/[:.]/g, "-");
|
|
}
|
|
|
|
function buildAuthenticatedCloneUrl(cloneUrl: string, token: string): string {
|
|
const parsed = new URL(cloneUrl);
|
|
if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
|
|
return cloneUrl;
|
|
}
|
|
|
|
parsed.username = process.env.PRE_SYNC_BACKUP_GIT_USERNAME || "oauth2";
|
|
parsed.password = token;
|
|
return parsed.toString();
|
|
}
|
|
|
|
function maskToken(text: string, token: string): string {
|
|
if (!token) return text;
|
|
return text.split(token).join("***");
|
|
}
|
|
|
|
async function runGit(args: string[], tokenToMask: string): Promise<void> {
|
|
const proc = Bun.spawn({
|
|
cmd: ["git", ...args],
|
|
stdout: "pipe",
|
|
stderr: "pipe",
|
|
});
|
|
|
|
const [stdout, stderr, exitCode] = await Promise.all([
|
|
new Response(proc.stdout).text(),
|
|
new Response(proc.stderr).text(),
|
|
proc.exited,
|
|
]);
|
|
|
|
if (exitCode !== 0) {
|
|
const details = [stdout, stderr].filter(Boolean).join("\n").trim();
|
|
const safeDetails = maskToken(details, tokenToMask);
|
|
throw new Error(`git command failed: ${safeDetails || "unknown git error"}`);
|
|
}
|
|
}
|
|
|
|
async function enforceRetention(
|
|
repoBackupDir: string,
|
|
keepCount: number,
|
|
retentionDays: number = 0,
|
|
): Promise<void> {
|
|
const entries = await readdir(repoBackupDir);
|
|
const bundleFiles = entries
|
|
.filter((name) => name.endsWith(".bundle"))
|
|
.map((name) => path.join(repoBackupDir, name));
|
|
|
|
if (bundleFiles.length === 0) return;
|
|
|
|
const filesWithMtime = await Promise.all(
|
|
bundleFiles.map(async (filePath) => ({
|
|
filePath,
|
|
mtimeMs: (await stat(filePath)).mtimeMs,
|
|
}))
|
|
);
|
|
|
|
filesWithMtime.sort((a, b) => b.mtimeMs - a.mtimeMs);
|
|
|
|
const toDelete = new Set<string>();
|
|
|
|
// Count-based retention: keep only the N most recent
|
|
if (filesWithMtime.length > keepCount) {
|
|
for (const entry of filesWithMtime.slice(keepCount)) {
|
|
toDelete.add(entry.filePath);
|
|
}
|
|
}
|
|
|
|
// Time-based retention: delete bundles older than retentionDays
|
|
if (retentionDays > 0) {
|
|
const cutoffMs = Date.now() - retentionDays * 86_400_000;
|
|
for (const entry of filesWithMtime) {
|
|
if (entry.mtimeMs < cutoffMs) {
|
|
toDelete.add(entry.filePath);
|
|
}
|
|
}
|
|
// Always keep at least 1 bundle even if it's old
|
|
if (toDelete.size === filesWithMtime.length && filesWithMtime.length > 0) {
|
|
toDelete.delete(filesWithMtime[0].filePath);
|
|
}
|
|
}
|
|
|
|
if (toDelete.size > 0) {
|
|
await Promise.all([...toDelete].map((fp) => rm(fp, { force: true })));
|
|
}
|
|
}
|
|
|
|
export function isPreSyncBackupEnabled(): boolean {
|
|
return parseBoolean(process.env.PRE_SYNC_BACKUP_ENABLED, true);
|
|
}
|
|
|
|
export function shouldCreatePreSyncBackup(config: Partial<Config>): boolean {
|
|
const configSetting = config.giteaConfig?.backupBeforeSync;
|
|
const fallback = isPreSyncBackupEnabled();
|
|
return configSetting === undefined ? fallback : Boolean(configSetting);
|
|
}
|
|
|
|
export function shouldBlockSyncOnBackupFailure(config: Partial<Config>): boolean {
|
|
const configSetting = config.giteaConfig?.blockSyncOnBackupFailure;
|
|
return configSetting === undefined ? true : Boolean(configSetting);
|
|
}
|
|
|
|
// ---- Backup strategy resolver ----
|
|
|
|
const VALID_STRATEGIES = new Set<BackupStrategy>([
|
|
"disabled",
|
|
"always",
|
|
"on-force-push",
|
|
"block-on-force-push",
|
|
]);
|
|
|
|
/**
|
|
* Resolve the effective backup strategy from config, falling back through:
|
|
* 1. `backupStrategy` field (new)
|
|
* 2. `backupBeforeSync` boolean (deprecated, backward compat)
|
|
* 3. `PRE_SYNC_BACKUP_STRATEGY` env var
|
|
* 4. `PRE_SYNC_BACKUP_ENABLED` env var (legacy)
|
|
* 5. Default: `"on-force-push"`
|
|
*/
|
|
export function resolveBackupStrategy(config: Partial<Config>): BackupStrategy {
|
|
// 1. Explicit backupStrategy field
|
|
const explicit = config.giteaConfig?.backupStrategy;
|
|
if (explicit && VALID_STRATEGIES.has(explicit as BackupStrategy)) {
|
|
return explicit as BackupStrategy;
|
|
}
|
|
|
|
// 2. Legacy backupBeforeSync boolean → map to strategy
|
|
// Note: backupBeforeSync: true now maps to "on-force-push" (not "always")
|
|
// because mappers default backupBeforeSync to true, causing every legacy config
|
|
// to silently resolve to "always" and create full git bundles on every sync.
|
|
const legacy = config.giteaConfig?.backupBeforeSync;
|
|
if (legacy !== undefined) {
|
|
return legacy ? "on-force-push" : "disabled";
|
|
}
|
|
|
|
// 3. Env var (new)
|
|
const envStrategy = process.env.PRE_SYNC_BACKUP_STRATEGY?.trim().toLowerCase();
|
|
if (envStrategy && VALID_STRATEGIES.has(envStrategy as BackupStrategy)) {
|
|
return envStrategy as BackupStrategy;
|
|
}
|
|
|
|
// 4. Env var (legacy)
|
|
const envEnabled = process.env.PRE_SYNC_BACKUP_ENABLED;
|
|
if (envEnabled !== undefined) {
|
|
return parseBoolean(envEnabled, true) ? "always" : "disabled";
|
|
}
|
|
|
|
// 5. Default
|
|
return "on-force-push";
|
|
}
|
|
|
|
/**
|
|
* Determine whether a backup should be created for the given strategy and
|
|
* force-push detection result.
|
|
*/
|
|
export function shouldBackupForStrategy(
|
|
strategy: BackupStrategy,
|
|
forcePushDetected: boolean,
|
|
): boolean {
|
|
switch (strategy) {
|
|
case "disabled":
|
|
return false;
|
|
case "always":
|
|
return true;
|
|
case "on-force-push":
|
|
case "block-on-force-push":
|
|
return forcePushDetected;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Determine whether sync should be blocked (requires manual approval).
|
|
* Only `block-on-force-push` with an actual detection blocks sync.
|
|
*/
|
|
export function shouldBlockSyncForStrategy(
|
|
strategy: BackupStrategy,
|
|
forcePushDetected: boolean,
|
|
): boolean {
|
|
return strategy === "block-on-force-push" && forcePushDetected;
|
|
}
|
|
|
|
/**
|
|
* Returns true when the strategy requires running force-push detection
|
|
* before deciding on backup / block behavior.
|
|
*/
|
|
export function strategyNeedsDetection(strategy: BackupStrategy): boolean {
|
|
return strategy === "on-force-push" || strategy === "block-on-force-push";
|
|
}
|
|
|
|
export function resolveBackupPaths({
|
|
config,
|
|
owner,
|
|
repoName,
|
|
}: {
|
|
config: Partial<Config>;
|
|
owner: string;
|
|
repoName: string;
|
|
}): { backupRoot: string; repoBackupDir: string } {
|
|
let backupRoot =
|
|
config.giteaConfig?.backupDirectory?.trim() ||
|
|
process.env.PRE_SYNC_BACKUP_DIR?.trim() ||
|
|
path.join(process.cwd(), "data", "repo-backups");
|
|
|
|
// Ensure backupRoot is absolute - relative paths break git bundle creation
|
|
// because git runs with -C mirrorClonePath and interprets relative paths from there.
|
|
// Always use path.resolve() which guarantees an absolute path, rather than a
|
|
// conditional check that can miss edge cases (e.g., NixOS systemd services).
|
|
backupRoot = path.resolve(backupRoot);
|
|
|
|
const repoBackupDir = path.join(
|
|
backupRoot,
|
|
sanitizePathSegment(config.userId || "unknown-user"),
|
|
sanitizePathSegment(owner),
|
|
sanitizePathSegment(repoName)
|
|
);
|
|
|
|
return { backupRoot, repoBackupDir };
|
|
}
|
|
|
|
export async function createPreSyncBundleBackup({
|
|
config,
|
|
owner,
|
|
repoName,
|
|
cloneUrl,
|
|
force,
|
|
}: {
|
|
config: Partial<Config>;
|
|
owner: string;
|
|
repoName: string;
|
|
cloneUrl: string;
|
|
/** When true, skip the legacy shouldCreatePreSyncBackup check.
|
|
* Used by the strategy-driven path which has already decided to backup. */
|
|
force?: boolean;
|
|
}): Promise<{ bundlePath: string }> {
|
|
if (!force && !shouldCreatePreSyncBackup(config)) {
|
|
throw new Error("Pre-sync backup is disabled.");
|
|
}
|
|
|
|
if (!config.giteaConfig?.token) {
|
|
throw new Error("Gitea token is required for pre-sync backup.");
|
|
}
|
|
|
|
const decryptedConfig = decryptConfigTokens(config as Config);
|
|
const giteaToken = decryptedConfig.giteaConfig?.token;
|
|
if (!giteaToken) {
|
|
throw new Error("Decrypted Gitea token is required for pre-sync backup.");
|
|
}
|
|
|
|
const { repoBackupDir } = resolveBackupPaths({ config, owner, repoName });
|
|
const retention = Math.max(
|
|
1,
|
|
Number.isFinite(config.giteaConfig?.backupRetentionCount)
|
|
? Number(config.giteaConfig?.backupRetentionCount)
|
|
: parsePositiveInt(process.env.PRE_SYNC_BACKUP_KEEP_COUNT, 5)
|
|
);
|
|
const retentionDays = Math.max(
|
|
0,
|
|
Number.isFinite(config.giteaConfig?.backupRetentionDays)
|
|
? Number(config.giteaConfig?.backupRetentionDays)
|
|
: parsePositiveInt(process.env.PRE_SYNC_BACKUP_RETENTION_DAYS, 30)
|
|
);
|
|
|
|
await mkdir(repoBackupDir, { recursive: true });
|
|
|
|
const tmpDir = await mkdtemp(path.join(os.tmpdir(), "gitea-mirror-backup-"));
|
|
const mirrorClonePath = path.join(tmpDir, "repo.git");
|
|
// path.resolve guarantees an absolute path, critical because git -C changes
|
|
// the working directory and would misinterpret a relative bundlePath
|
|
const bundlePath = path.resolve(repoBackupDir, `${buildTimestamp()}.bundle`);
|
|
|
|
try {
|
|
const authCloneUrl = buildAuthenticatedCloneUrl(cloneUrl, giteaToken);
|
|
|
|
await runGit(["clone", "--mirror", authCloneUrl, mirrorClonePath], giteaToken);
|
|
await runGit(["-C", mirrorClonePath, "bundle", "create", bundlePath, "--all"], giteaToken);
|
|
|
|
await enforceRetention(repoBackupDir, retention, retentionDays);
|
|
return { bundlePath };
|
|
} finally {
|
|
await rm(tmpDir, { recursive: true, force: true });
|
|
}
|
|
}
|