Files
gitea-mirror/src/lib/repo-backup.ts
ARUNAVO RAY ddd071f7e5 fix: prevent excessive disk usage from repo backups (#235)
* fix: prevent excessive disk usage from repo backups (#234)

Legacy configs with backupBeforeSync: true but no explicit backupStrategy
silently resolved to "always", creating full git bundles on every sync
cycle. This caused repo-backups to grow to 17GB+ for users with many
repositories.

Changes:
- Fix resolveBackupStrategy to map backupBeforeSync: true → "on-force-push"
  instead of "always", so legacy configs only backup when force-push is detected
- Fix config mapper to always set backupStrategy explicitly ("on-force-push")
  preventing the backward-compat fallback from triggering
- Lower default backupRetentionCount from 20 to 5 bundles per repo
- Add time-based retention (backupRetentionDays, default 30 days) alongside
  count-based retention, with safety net to always keep at least 1 bundle
- Add "high disk usage" warning on "Always Backup" UI option
- Update docs and tests to reflect new defaults and behavior

* fix: preserve legacy backupBeforeSync:false on UI round-trip and expose retention days

P1: mapDbToUiConfig now checks backupBeforeSync === false before
defaulting backupStrategy, preventing legacy "disabled" configs from
silently becoming "on-force-push" after any auto-save round-trip.

P3: Added "Snapshot retention days" input field to the backup settings
UI, matching the documented setting in FORCE_PUSH_PROTECTION.md.
2026-03-18 15:05:00 +05:30

314 lines
9.8 KiB
TypeScript

import { mkdir, mkdtemp, readdir, rm, stat } from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import type { Config, BackupStrategy } from "@/types/config";
import { decryptConfigTokens } from "./utils/config-encryption";
const TRUE_VALUES = new Set(["1", "true", "yes", "on"]);
function parseBoolean(value: string | undefined, fallback: boolean): boolean {
if (value === undefined) return fallback;
return TRUE_VALUES.has(value.trim().toLowerCase());
}
function parsePositiveInt(value: string | undefined, fallback: number): number {
if (!value) return fallback;
const parsed = Number.parseInt(value, 10);
if (!Number.isFinite(parsed) || parsed <= 0) {
return fallback;
}
return parsed;
}
function sanitizePathSegment(input: string): string {
return input.replace(/[^a-zA-Z0-9._-]/g, "_");
}
function buildTimestamp(): string {
// Example: 2026-02-25T18-34-22-123Z
return new Date().toISOString().replace(/[:.]/g, "-");
}
function buildAuthenticatedCloneUrl(cloneUrl: string, token: string): string {
const parsed = new URL(cloneUrl);
if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
return cloneUrl;
}
parsed.username = process.env.PRE_SYNC_BACKUP_GIT_USERNAME || "oauth2";
parsed.password = token;
return parsed.toString();
}
function maskToken(text: string, token: string): string {
if (!token) return text;
return text.split(token).join("***");
}
async function runGit(args: string[], tokenToMask: string): Promise<void> {
const proc = Bun.spawn({
cmd: ["git", ...args],
stdout: "pipe",
stderr: "pipe",
});
const [stdout, stderr, exitCode] = await Promise.all([
new Response(proc.stdout).text(),
new Response(proc.stderr).text(),
proc.exited,
]);
if (exitCode !== 0) {
const details = [stdout, stderr].filter(Boolean).join("\n").trim();
const safeDetails = maskToken(details, tokenToMask);
throw new Error(`git command failed: ${safeDetails || "unknown git error"}`);
}
}
async function enforceRetention(
repoBackupDir: string,
keepCount: number,
retentionDays: number = 0,
): Promise<void> {
const entries = await readdir(repoBackupDir);
const bundleFiles = entries
.filter((name) => name.endsWith(".bundle"))
.map((name) => path.join(repoBackupDir, name));
if (bundleFiles.length === 0) return;
const filesWithMtime = await Promise.all(
bundleFiles.map(async (filePath) => ({
filePath,
mtimeMs: (await stat(filePath)).mtimeMs,
}))
);
filesWithMtime.sort((a, b) => b.mtimeMs - a.mtimeMs);
const toDelete = new Set<string>();
// Count-based retention: keep only the N most recent
if (filesWithMtime.length > keepCount) {
for (const entry of filesWithMtime.slice(keepCount)) {
toDelete.add(entry.filePath);
}
}
// Time-based retention: delete bundles older than retentionDays
if (retentionDays > 0) {
const cutoffMs = Date.now() - retentionDays * 86_400_000;
for (const entry of filesWithMtime) {
if (entry.mtimeMs < cutoffMs) {
toDelete.add(entry.filePath);
}
}
// Always keep at least 1 bundle even if it's old
if (toDelete.size === filesWithMtime.length && filesWithMtime.length > 0) {
toDelete.delete(filesWithMtime[0].filePath);
}
}
if (toDelete.size > 0) {
await Promise.all([...toDelete].map((fp) => rm(fp, { force: true })));
}
}
export function isPreSyncBackupEnabled(): boolean {
return parseBoolean(process.env.PRE_SYNC_BACKUP_ENABLED, true);
}
export function shouldCreatePreSyncBackup(config: Partial<Config>): boolean {
const configSetting = config.giteaConfig?.backupBeforeSync;
const fallback = isPreSyncBackupEnabled();
return configSetting === undefined ? fallback : Boolean(configSetting);
}
export function shouldBlockSyncOnBackupFailure(config: Partial<Config>): boolean {
const configSetting = config.giteaConfig?.blockSyncOnBackupFailure;
return configSetting === undefined ? true : Boolean(configSetting);
}
// ---- Backup strategy resolver ----
const VALID_STRATEGIES = new Set<BackupStrategy>([
"disabled",
"always",
"on-force-push",
"block-on-force-push",
]);
/**
* Resolve the effective backup strategy from config, falling back through:
* 1. `backupStrategy` field (new)
* 2. `backupBeforeSync` boolean (deprecated, backward compat)
* 3. `PRE_SYNC_BACKUP_STRATEGY` env var
* 4. `PRE_SYNC_BACKUP_ENABLED` env var (legacy)
* 5. Default: `"on-force-push"`
*/
export function resolveBackupStrategy(config: Partial<Config>): BackupStrategy {
// 1. Explicit backupStrategy field
const explicit = config.giteaConfig?.backupStrategy;
if (explicit && VALID_STRATEGIES.has(explicit as BackupStrategy)) {
return explicit as BackupStrategy;
}
// 2. Legacy backupBeforeSync boolean → map to strategy
// Note: backupBeforeSync: true now maps to "on-force-push" (not "always")
// because mappers default backupBeforeSync to true, causing every legacy config
// to silently resolve to "always" and create full git bundles on every sync.
const legacy = config.giteaConfig?.backupBeforeSync;
if (legacy !== undefined) {
return legacy ? "on-force-push" : "disabled";
}
// 3. Env var (new)
const envStrategy = process.env.PRE_SYNC_BACKUP_STRATEGY?.trim().toLowerCase();
if (envStrategy && VALID_STRATEGIES.has(envStrategy as BackupStrategy)) {
return envStrategy as BackupStrategy;
}
// 4. Env var (legacy)
const envEnabled = process.env.PRE_SYNC_BACKUP_ENABLED;
if (envEnabled !== undefined) {
return parseBoolean(envEnabled, true) ? "always" : "disabled";
}
// 5. Default
return "on-force-push";
}
/**
* Determine whether a backup should be created for the given strategy and
* force-push detection result.
*/
export function shouldBackupForStrategy(
strategy: BackupStrategy,
forcePushDetected: boolean,
): boolean {
switch (strategy) {
case "disabled":
return false;
case "always":
return true;
case "on-force-push":
case "block-on-force-push":
return forcePushDetected;
default:
return false;
}
}
/**
* Determine whether sync should be blocked (requires manual approval).
* Only `block-on-force-push` with an actual detection blocks sync.
*/
export function shouldBlockSyncForStrategy(
strategy: BackupStrategy,
forcePushDetected: boolean,
): boolean {
return strategy === "block-on-force-push" && forcePushDetected;
}
/**
* Returns true when the strategy requires running force-push detection
* before deciding on backup / block behavior.
*/
export function strategyNeedsDetection(strategy: BackupStrategy): boolean {
return strategy === "on-force-push" || strategy === "block-on-force-push";
}
export function resolveBackupPaths({
config,
owner,
repoName,
}: {
config: Partial<Config>;
owner: string;
repoName: string;
}): { backupRoot: string; repoBackupDir: string } {
let backupRoot =
config.giteaConfig?.backupDirectory?.trim() ||
process.env.PRE_SYNC_BACKUP_DIR?.trim() ||
path.join(process.cwd(), "data", "repo-backups");
// Ensure backupRoot is absolute - relative paths break git bundle creation
// because git runs with -C mirrorClonePath and interprets relative paths from there.
// Always use path.resolve() which guarantees an absolute path, rather than a
// conditional check that can miss edge cases (e.g., NixOS systemd services).
backupRoot = path.resolve(backupRoot);
const repoBackupDir = path.join(
backupRoot,
sanitizePathSegment(config.userId || "unknown-user"),
sanitizePathSegment(owner),
sanitizePathSegment(repoName)
);
return { backupRoot, repoBackupDir };
}
export async function createPreSyncBundleBackup({
config,
owner,
repoName,
cloneUrl,
force,
}: {
config: Partial<Config>;
owner: string;
repoName: string;
cloneUrl: string;
/** When true, skip the legacy shouldCreatePreSyncBackup check.
* Used by the strategy-driven path which has already decided to backup. */
force?: boolean;
}): Promise<{ bundlePath: string }> {
if (!force && !shouldCreatePreSyncBackup(config)) {
throw new Error("Pre-sync backup is disabled.");
}
if (!config.giteaConfig?.token) {
throw new Error("Gitea token is required for pre-sync backup.");
}
const decryptedConfig = decryptConfigTokens(config as Config);
const giteaToken = decryptedConfig.giteaConfig?.token;
if (!giteaToken) {
throw new Error("Decrypted Gitea token is required for pre-sync backup.");
}
const { repoBackupDir } = resolveBackupPaths({ config, owner, repoName });
const retention = Math.max(
1,
Number.isFinite(config.giteaConfig?.backupRetentionCount)
? Number(config.giteaConfig?.backupRetentionCount)
: parsePositiveInt(process.env.PRE_SYNC_BACKUP_KEEP_COUNT, 5)
);
const retentionDays = Math.max(
0,
Number.isFinite(config.giteaConfig?.backupRetentionDays)
? Number(config.giteaConfig?.backupRetentionDays)
: parsePositiveInt(process.env.PRE_SYNC_BACKUP_RETENTION_DAYS, 30)
);
await mkdir(repoBackupDir, { recursive: true });
const tmpDir = await mkdtemp(path.join(os.tmpdir(), "gitea-mirror-backup-"));
const mirrorClonePath = path.join(tmpDir, "repo.git");
// path.resolve guarantees an absolute path, critical because git -C changes
// the working directory and would misinterpret a relative bundlePath
const bundlePath = path.resolve(repoBackupDir, `${buildTimestamp()}.bundle`);
try {
const authCloneUrl = buildAuthenticatedCloneUrl(cloneUrl, giteaToken);
await runGit(["clone", "--mirror", authCloneUrl, mirrorClonePath], giteaToken);
await runGit(["-C", mirrorClonePath, "bundle", "create", bundlePath, "--all"], giteaToken);
await enforceRetention(repoBackupDir, retention, retentionDays);
return { bundlePath };
} finally {
await rm(tmpDir, { recursive: true, force: true });
}
}