Attempt to address #84

This commit is contained in:
Arunavo Ray
2025-09-07 13:55:20 +05:30
parent b956b71c5f
commit 502796371f
4 changed files with 174 additions and 24 deletions

View File

@@ -235,11 +235,21 @@ AUTO_IMPORT_REPOS=true
# Auto-cleanup orphaned repositories # Auto-cleanup orphaned repositories
CLEANUP_DELETE_IF_NOT_IN_GITHUB=true CLEANUP_DELETE_IF_NOT_IN_GITHUB=true
CLEANUP_ORPHANED_REPO_ACTION=archive # or 'delete' CLEANUP_ORPHANED_REPO_ACTION=archive # 'archive' (recommended) or 'delete'
CLEANUP_DRY_RUN=false # Set to true to test without changes CLEANUP_DRY_RUN=false # Set to true to test without changes
``` ```
**Important**: The scheduler checks every minute for tasks to run. The `GITEA_MIRROR_INTERVAL` determines how often each repository is actually synced. For example, with `8h`, each repo syncs every 8 hours from its last successful sync. **Important Notes**:
- The scheduler checks every minute for tasks to run. The `GITEA_MIRROR_INTERVAL` determines how often each repository is actually synced. For example, with `8h`, each repo syncs every 8 hours from its last successful sync.
**🛡️ Backup Protection Features**:
- **No Accidental Deletions**: Repository cleanup is automatically skipped if GitHub is inaccessible (account deleted, banned, or API errors)
- **Archive Never Deletes Data**: The `archive` action preserves all repository data:
- Regular repositories: Made read-only using Gitea's archive feature
- Mirror repositories: Renamed with `[ARCHIVED]` prefix (Gitea API limitation prevents archiving mirrors)
- Failed operations: Repository remains fully accessible even if marking as archived fails
- **The Whole Point of Backups**: Your Gitea mirrors are preserved even when GitHub sources disappear - that's why you have backups!
- **Strongly Recommended**: Always use `CLEANUP_ORPHANED_REPO_ACTION=archive` (default) instead of `delete`
## Troubleshooting ## Troubleshooting

View File

@@ -206,10 +206,25 @@ Configure automatic cleanup of old events and data.
|----------|-------------|---------|---------| |----------|-------------|---------|---------|
| `CLEANUP_DELETE_FROM_GITEA` | Delete repositories from Gitea | `false` | `true`, `false` | | `CLEANUP_DELETE_FROM_GITEA` | Delete repositories from Gitea | `false` | `true`, `false` |
| `CLEANUP_DELETE_IF_NOT_IN_GITHUB` | Delete repos not found in GitHub (automatically enables cleanup) | `true` | `true`, `false` | | `CLEANUP_DELETE_IF_NOT_IN_GITHUB` | Delete repos not found in GitHub (automatically enables cleanup) | `true` | `true`, `false` |
| `CLEANUP_ORPHANED_REPO_ACTION` | Action for orphaned repositories | `archive` | `skip`, `archive`, `delete` | | `CLEANUP_ORPHANED_REPO_ACTION` | Action for orphaned repositories. **Note**: `archive` is recommended to preserve backups | `archive` | `skip`, `archive`, `delete` |
| `CLEANUP_DRY_RUN` | Test mode without actual deletion | `true` | `true`, `false` | | `CLEANUP_DRY_RUN` | Test mode without actual deletion | `true` | `true`, `false` |
| `CLEANUP_PROTECTED_REPOS` | Comma-separated list of protected repository names | - | Comma-separated strings | | `CLEANUP_PROTECTED_REPOS` | Comma-separated list of protected repository names | - | Comma-separated strings |
**🛡️ Safety Features (Backup Protection)**:
- **GitHub Failures Don't Delete Backups**: Cleanup is automatically skipped if GitHub API returns errors (404, 403, connection issues)
- **Archive Never Deletes**: The `archive` action ALWAYS preserves repository data, it never deletes
- **Graceful Degradation**: If marking as archived fails, the repository remains fully accessible in Gitea
- **The Purpose of Backups**: Your mirrors are preserved even when GitHub sources disappear - that's the whole point!
**Archive Behavior (Aligned with Gitea API)**:
- **Regular repositories**: Uses Gitea's native archive feature (PATCH `/repos/{owner}/{repo}` with `archived: true`)
- Makes repository read-only while preserving all data
- **Mirror repositories**: Uses rename strategy (Gitea API returns 422 for archiving mirrors)
- Renamed with `[ARCHIVED]` prefix for clear identification
- Description updated with preservation notice and timestamp
- Mirror interval set to 8760h (1 year) to minimize sync attempts
- Repository remains fully accessible and cloneable
### Execution Settings ### Execution Settings
| Variable | Description | Default | Options | | Variable | Description | Default | Options |

View File

@@ -7,7 +7,7 @@ import { membershipRoleEnum } from "@/types/organizations";
import { Octokit } from "@octokit/rest"; import { Octokit } from "@octokit/rest";
import type { Config } from "@/types/config"; import type { Config } from "@/types/config";
import type { Organization, Repository } from "./db/schema"; import type { Organization, Repository } from "./db/schema";
import { httpPost, httpGet, httpDelete, httpPut } from "./http-client"; import { httpPost, httpGet, httpDelete, httpPut, httpPatch } from "./http-client";
import { createMirrorJob } from "./helpers"; import { createMirrorJob } from "./helpers";
import { db, organizations, repositories } from "./db"; import { db, organizations, repositories } from "./db";
import { eq, and } from "drizzle-orm"; import { eq, and } from "drizzle-orm";
@@ -2016,6 +2016,12 @@ export async function deleteGiteaRepo(
/** /**
* Archive a repository in Gitea * Archive a repository in Gitea
*
* IMPORTANT: This function NEVER deletes data. It only marks repositories as archived.
* - For regular repos: Uses Gitea's archive feature (makes read-only)
* - For mirror repos: Renames with [ARCHIVED] prefix (Gitea doesn't allow archiving mirrors)
*
* This ensures backups are preserved even when the GitHub source disappears.
*/ */
export async function archiveGiteaRepo( export async function archiveGiteaRepo(
client: { url: string; token: string }, client: { url: string; token: string },
@@ -2023,24 +2029,115 @@ export async function archiveGiteaRepo(
repo: string repo: string
): Promise<void> { ): Promise<void> {
try { try {
const response = await httpPut( // First, check if this is a mirror repository
const repoResponse = await httpGet(
`${client.url}/api/v1/repos/${owner}/${repo}`, `${client.url}/api/v1/repos/${owner}/${repo}`,
{
archived: true,
},
{ {
Authorization: `token ${client.token}`, Authorization: `token ${client.token}`,
'Content-Type': 'application/json',
} }
); );
if (response.status >= 400) { if (!repoResponse.data) {
throw new Error(`Failed to archive repository ${owner}/${repo}: ${response.status} ${response.statusText}`); console.warn(`[Archive] Repository ${owner}/${repo} not found in Gitea. Skipping.`);
return;
} }
console.log(`Successfully archived repository ${owner}/${repo} in Gitea`); if (repoResponse.data?.mirror) {
console.log(`[Archive] Repository ${owner}/${repo} is a mirror. Using safe rename strategy.`);
// IMPORTANT: Gitea API doesn't allow archiving mirror repositories
// According to Gitea source code, attempting to archive a mirror returns:
// "repo is a mirror, cannot archive/un-archive" (422 Unprocessable Entity)
//
// Our solution: Rename the repo to clearly mark it as orphaned
// This preserves all data while indicating the repo is no longer actively synced
const currentName = repoResponse.data.name;
// Skip if already marked as archived
if (currentName.startsWith('[ARCHIVED]')) {
console.log(`[Archive] Repository ${owner}/${repo} already marked as archived. Skipping.`);
return;
}
const archivedName = `[ARCHIVED] ${currentName}`;
const currentDesc = repoResponse.data.description || '';
const archiveNotice = `\n\n⚠ ARCHIVED: Original GitHub repository no longer exists. Preserved as backup on ${new Date().toISOString()}`;
// Only add notice if not already present
const newDescription = currentDesc.includes('⚠️ ARCHIVED:')
? currentDesc
: currentDesc + archiveNotice;
const renameResponse = await httpPatch(
`${client.url}/api/v1/repos/${owner}/${repo}`,
{
name: archivedName,
description: newDescription,
},
{
Authorization: `token ${client.token}`,
'Content-Type': 'application/json',
}
);
if (renameResponse.status >= 400) {
// If rename fails, log but don't throw - data is still preserved
console.error(`[Archive] Failed to rename mirror repository ${owner}/${repo}: ${renameResponse.status}`);
console.log(`[Archive] Repository ${owner}/${repo} remains accessible but not marked as archived`);
return;
}
console.log(`[Archive] Successfully marked mirror repository ${owner}/${repo} as archived (renamed to ${archivedName})`);
// Also try to reduce sync frequency to prevent unnecessary API calls
// This is optional - if it fails, the repo is still preserved
try {
await httpPatch(
`${client.url}/api/v1/repos/${owner}/${archivedName}`,
{
mirror_interval: "8760h", // 1 year - minimizes sync attempts
},
{
Authorization: `token ${client.token}`,
'Content-Type': 'application/json',
}
);
console.log(`[Archive] Reduced sync frequency for ${owner}/${archivedName} to yearly`);
} catch (intervalError) {
// Non-critical - repo is still preserved even if we can't change interval
console.debug(`[Archive] Could not update mirror interval (non-critical):`, intervalError);
}
} else {
// For non-mirror repositories, use Gitea's native archive feature
// This makes the repository read-only but preserves all data
console.log(`[Archive] Archiving regular repository ${owner}/${repo}`);
const response = await httpPatch(
`${client.url}/api/v1/repos/${owner}/${repo}`,
{
archived: true,
},
{
Authorization: `token ${client.token}`,
'Content-Type': 'application/json',
}
);
if (response.status >= 400) {
// If archive fails, log but data is still preserved in Gitea
console.error(`[Archive] Failed to archive repository ${owner}/${repo}: ${response.status}`);
console.log(`[Archive] Repository ${owner}/${repo} remains accessible but not marked as archived`);
return;
}
console.log(`[Archive] Successfully archived repository ${owner}/${repo} (now read-only)`);
}
} catch (error) { } catch (error) {
console.error(`Error archiving repository ${owner}/${repo}:`, error); // Even on error, the repository data is preserved in Gitea
throw error; // We just couldn't mark it as archived
console.error(`[Archive] Could not mark repository ${owner}/${repo} as archived:`, error);
console.log(`[Archive] Repository ${owner}/${repo} data is preserved but not marked as archived`);
// Don't throw - we want cleanup to continue for other repos
} }
} }

View File

@@ -27,15 +27,37 @@ async function identifyOrphanedRepositories(config: any): Promise<any[]> {
const decryptedToken = getDecryptedGitHubToken(config); const decryptedToken = getDecryptedGitHubToken(config);
const octokit = createGitHubClient(decryptedToken); const octokit = createGitHubClient(decryptedToken);
// Fetch GitHub data let allGithubRepos = [];
const [basicAndForkedRepos, starredRepos] = await Promise.all([ let githubApiAccessible = true;
getGithubRepositories({ octokit, config }),
config.githubConfig?.includeStarred try {
? getGithubStarredRepositories({ octokit, config }) // Fetch GitHub data
: Promise.resolve([]), const [basicAndForkedRepos, starredRepos] = await Promise.all([
]); getGithubRepositories({ octokit, config }),
config.githubConfig?.includeStarred
? getGithubStarredRepositories({ octokit, config })
: Promise.resolve([]),
]);
allGithubRepos = [...basicAndForkedRepos, ...starredRepos];
} catch (githubError: any) {
// Handle GitHub API errors gracefully
console.warn(`[Repository Cleanup] GitHub API error for user ${userId}: ${githubError.message}`);
// Check if it's a critical error (like account deleted/banned)
if (githubError.status === 404 || githubError.status === 403) {
console.error(`[Repository Cleanup] CRITICAL: GitHub account may be deleted/banned. Skipping cleanup to prevent data loss.`);
console.error(`[Repository Cleanup] Consider using CLEANUP_ORPHANED_REPO_ACTION=archive instead of delete for safety.`);
// Return empty array to skip cleanup entirely when GitHub account is inaccessible
return [];
}
// For other errors, also skip cleanup to be safe
console.error(`[Repository Cleanup] Skipping cleanup due to GitHub API error. This prevents accidental deletion of backups.`);
return [];
}
const allGithubRepos = [...basicAndForkedRepos, ...starredRepos];
const githubRepoFullNames = new Set(allGithubRepos.map(repo => repo.fullName)); const githubRepoFullNames = new Set(allGithubRepos.map(repo => repo.fullName));
// Get all repositories from our database // Get all repositories from our database
@@ -44,13 +66,19 @@ async function identifyOrphanedRepositories(config: any): Promise<any[]> {
.from(repositories) .from(repositories)
.where(eq(repositories.userId, userId)); .where(eq(repositories.userId, userId));
// Identify orphaned repositories // Only identify repositories as orphaned if we successfully accessed GitHub
// This prevents false positives when GitHub is down or account is inaccessible
const orphanedRepos = dbRepos.filter(repo => !githubRepoFullNames.has(repo.fullName)); const orphanedRepos = dbRepos.filter(repo => !githubRepoFullNames.has(repo.fullName));
if (orphanedRepos.length > 0) {
console.log(`[Repository Cleanup] Found ${orphanedRepos.length} orphaned repositories for user ${userId}`);
}
return orphanedRepos; return orphanedRepos;
} catch (error) { } catch (error) {
console.error(`[Repository Cleanup] Error identifying orphaned repositories for user ${userId}:`, error); console.error(`[Repository Cleanup] Error identifying orphaned repositories for user ${userId}:`, error);
throw error; // Return empty array on error to prevent accidental deletions
return [];
} }
} }