mirror of
https://github.com/RayLabsHQ/gitea-mirror.git
synced 2025-12-06 11:36:44 +03:00
Attempt to address #84
This commit is contained in:
14
README.md
14
README.md
@@ -235,11 +235,21 @@ AUTO_IMPORT_REPOS=true
|
||||
|
||||
# Auto-cleanup orphaned repositories
|
||||
CLEANUP_DELETE_IF_NOT_IN_GITHUB=true
|
||||
CLEANUP_ORPHANED_REPO_ACTION=archive # or 'delete'
|
||||
CLEANUP_ORPHANED_REPO_ACTION=archive # 'archive' (recommended) or 'delete'
|
||||
CLEANUP_DRY_RUN=false # Set to true to test without changes
|
||||
```
|
||||
|
||||
**Important**: The scheduler checks every minute for tasks to run. The `GITEA_MIRROR_INTERVAL` determines how often each repository is actually synced. For example, with `8h`, each repo syncs every 8 hours from its last successful sync.
|
||||
**Important Notes**:
|
||||
- The scheduler checks every minute for tasks to run. The `GITEA_MIRROR_INTERVAL` determines how often each repository is actually synced. For example, with `8h`, each repo syncs every 8 hours from its last successful sync.
|
||||
|
||||
**🛡️ Backup Protection Features**:
|
||||
- **No Accidental Deletions**: Repository cleanup is automatically skipped if GitHub is inaccessible (account deleted, banned, or API errors)
|
||||
- **Archive Never Deletes Data**: The `archive` action preserves all repository data:
|
||||
- Regular repositories: Made read-only using Gitea's archive feature
|
||||
- Mirror repositories: Renamed with `[ARCHIVED]` prefix (Gitea API limitation prevents archiving mirrors)
|
||||
- Failed operations: Repository remains fully accessible even if marking as archived fails
|
||||
- **The Whole Point of Backups**: Your Gitea mirrors are preserved even when GitHub sources disappear - that's why you have backups!
|
||||
- **Strongly Recommended**: Always use `CLEANUP_ORPHANED_REPO_ACTION=archive` (default) instead of `delete`
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
|
||||
@@ -206,10 +206,25 @@ Configure automatic cleanup of old events and data.
|
||||
|----------|-------------|---------|---------|
|
||||
| `CLEANUP_DELETE_FROM_GITEA` | Delete repositories from Gitea | `false` | `true`, `false` |
|
||||
| `CLEANUP_DELETE_IF_NOT_IN_GITHUB` | Delete repos not found in GitHub (automatically enables cleanup) | `true` | `true`, `false` |
|
||||
| `CLEANUP_ORPHANED_REPO_ACTION` | Action for orphaned repositories | `archive` | `skip`, `archive`, `delete` |
|
||||
| `CLEANUP_ORPHANED_REPO_ACTION` | Action for orphaned repositories. **Note**: `archive` is recommended to preserve backups | `archive` | `skip`, `archive`, `delete` |
|
||||
| `CLEANUP_DRY_RUN` | Test mode without actual deletion | `true` | `true`, `false` |
|
||||
| `CLEANUP_PROTECTED_REPOS` | Comma-separated list of protected repository names | - | Comma-separated strings |
|
||||
|
||||
**🛡️ Safety Features (Backup Protection)**:
|
||||
- **GitHub Failures Don't Delete Backups**: Cleanup is automatically skipped if GitHub API returns errors (404, 403, connection issues)
|
||||
- **Archive Never Deletes**: The `archive` action ALWAYS preserves repository data, it never deletes
|
||||
- **Graceful Degradation**: If marking as archived fails, the repository remains fully accessible in Gitea
|
||||
- **The Purpose of Backups**: Your mirrors are preserved even when GitHub sources disappear - that's the whole point!
|
||||
|
||||
**Archive Behavior (Aligned with Gitea API)**:
|
||||
- **Regular repositories**: Uses Gitea's native archive feature (PATCH `/repos/{owner}/{repo}` with `archived: true`)
|
||||
- Makes repository read-only while preserving all data
|
||||
- **Mirror repositories**: Uses rename strategy (Gitea API returns 422 for archiving mirrors)
|
||||
- Renamed with `[ARCHIVED]` prefix for clear identification
|
||||
- Description updated with preservation notice and timestamp
|
||||
- Mirror interval set to 8760h (1 year) to minimize sync attempts
|
||||
- Repository remains fully accessible and cloneable
|
||||
|
||||
### Execution Settings
|
||||
|
||||
| Variable | Description | Default | Options |
|
||||
|
||||
119
src/lib/gitea.ts
119
src/lib/gitea.ts
@@ -7,7 +7,7 @@ import { membershipRoleEnum } from "@/types/organizations";
|
||||
import { Octokit } from "@octokit/rest";
|
||||
import type { Config } from "@/types/config";
|
||||
import type { Organization, Repository } from "./db/schema";
|
||||
import { httpPost, httpGet, httpDelete, httpPut } from "./http-client";
|
||||
import { httpPost, httpGet, httpDelete, httpPut, httpPatch } from "./http-client";
|
||||
import { createMirrorJob } from "./helpers";
|
||||
import { db, organizations, repositories } from "./db";
|
||||
import { eq, and } from "drizzle-orm";
|
||||
@@ -2016,6 +2016,12 @@ export async function deleteGiteaRepo(
|
||||
|
||||
/**
|
||||
* Archive a repository in Gitea
|
||||
*
|
||||
* IMPORTANT: This function NEVER deletes data. It only marks repositories as archived.
|
||||
* - For regular repos: Uses Gitea's archive feature (makes read-only)
|
||||
* - For mirror repos: Renames with [ARCHIVED] prefix (Gitea doesn't allow archiving mirrors)
|
||||
*
|
||||
* This ensures backups are preserved even when the GitHub source disappears.
|
||||
*/
|
||||
export async function archiveGiteaRepo(
|
||||
client: { url: string; token: string },
|
||||
@@ -2023,24 +2029,115 @@ export async function archiveGiteaRepo(
|
||||
repo: string
|
||||
): Promise<void> {
|
||||
try {
|
||||
const response = await httpPut(
|
||||
// First, check if this is a mirror repository
|
||||
const repoResponse = await httpGet(
|
||||
`${client.url}/api/v1/repos/${owner}/${repo}`,
|
||||
{
|
||||
archived: true,
|
||||
},
|
||||
{
|
||||
Authorization: `token ${client.token}`,
|
||||
'Content-Type': 'application/json',
|
||||
}
|
||||
);
|
||||
|
||||
if (response.status >= 400) {
|
||||
throw new Error(`Failed to archive repository ${owner}/${repo}: ${response.status} ${response.statusText}`);
|
||||
if (!repoResponse.data) {
|
||||
console.warn(`[Archive] Repository ${owner}/${repo} not found in Gitea. Skipping.`);
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`Successfully archived repository ${owner}/${repo} in Gitea`);
|
||||
if (repoResponse.data?.mirror) {
|
||||
console.log(`[Archive] Repository ${owner}/${repo} is a mirror. Using safe rename strategy.`);
|
||||
|
||||
// IMPORTANT: Gitea API doesn't allow archiving mirror repositories
|
||||
// According to Gitea source code, attempting to archive a mirror returns:
|
||||
// "repo is a mirror, cannot archive/un-archive" (422 Unprocessable Entity)
|
||||
//
|
||||
// Our solution: Rename the repo to clearly mark it as orphaned
|
||||
// This preserves all data while indicating the repo is no longer actively synced
|
||||
|
||||
const currentName = repoResponse.data.name;
|
||||
|
||||
// Skip if already marked as archived
|
||||
if (currentName.startsWith('[ARCHIVED]')) {
|
||||
console.log(`[Archive] Repository ${owner}/${repo} already marked as archived. Skipping.`);
|
||||
return;
|
||||
}
|
||||
|
||||
const archivedName = `[ARCHIVED] ${currentName}`;
|
||||
const currentDesc = repoResponse.data.description || '';
|
||||
const archiveNotice = `\n\n⚠️ ARCHIVED: Original GitHub repository no longer exists. Preserved as backup on ${new Date().toISOString()}`;
|
||||
|
||||
// Only add notice if not already present
|
||||
const newDescription = currentDesc.includes('⚠️ ARCHIVED:')
|
||||
? currentDesc
|
||||
: currentDesc + archiveNotice;
|
||||
|
||||
const renameResponse = await httpPatch(
|
||||
`${client.url}/api/v1/repos/${owner}/${repo}`,
|
||||
{
|
||||
name: archivedName,
|
||||
description: newDescription,
|
||||
},
|
||||
{
|
||||
Authorization: `token ${client.token}`,
|
||||
'Content-Type': 'application/json',
|
||||
}
|
||||
);
|
||||
|
||||
if (renameResponse.status >= 400) {
|
||||
// If rename fails, log but don't throw - data is still preserved
|
||||
console.error(`[Archive] Failed to rename mirror repository ${owner}/${repo}: ${renameResponse.status}`);
|
||||
console.log(`[Archive] Repository ${owner}/${repo} remains accessible but not marked as archived`);
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`[Archive] Successfully marked mirror repository ${owner}/${repo} as archived (renamed to ${archivedName})`);
|
||||
|
||||
// Also try to reduce sync frequency to prevent unnecessary API calls
|
||||
// This is optional - if it fails, the repo is still preserved
|
||||
try {
|
||||
await httpPatch(
|
||||
`${client.url}/api/v1/repos/${owner}/${archivedName}`,
|
||||
{
|
||||
mirror_interval: "8760h", // 1 year - minimizes sync attempts
|
||||
},
|
||||
{
|
||||
Authorization: `token ${client.token}`,
|
||||
'Content-Type': 'application/json',
|
||||
}
|
||||
);
|
||||
console.log(`[Archive] Reduced sync frequency for ${owner}/${archivedName} to yearly`);
|
||||
} catch (intervalError) {
|
||||
// Non-critical - repo is still preserved even if we can't change interval
|
||||
console.debug(`[Archive] Could not update mirror interval (non-critical):`, intervalError);
|
||||
}
|
||||
} else {
|
||||
// For non-mirror repositories, use Gitea's native archive feature
|
||||
// This makes the repository read-only but preserves all data
|
||||
console.log(`[Archive] Archiving regular repository ${owner}/${repo}`);
|
||||
|
||||
const response = await httpPatch(
|
||||
`${client.url}/api/v1/repos/${owner}/${repo}`,
|
||||
{
|
||||
archived: true,
|
||||
},
|
||||
{
|
||||
Authorization: `token ${client.token}`,
|
||||
'Content-Type': 'application/json',
|
||||
}
|
||||
);
|
||||
|
||||
if (response.status >= 400) {
|
||||
// If archive fails, log but data is still preserved in Gitea
|
||||
console.error(`[Archive] Failed to archive repository ${owner}/${repo}: ${response.status}`);
|
||||
console.log(`[Archive] Repository ${owner}/${repo} remains accessible but not marked as archived`);
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`[Archive] Successfully archived repository ${owner}/${repo} (now read-only)`);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`Error archiving repository ${owner}/${repo}:`, error);
|
||||
throw error;
|
||||
// Even on error, the repository data is preserved in Gitea
|
||||
// We just couldn't mark it as archived
|
||||
console.error(`[Archive] Could not mark repository ${owner}/${repo} as archived:`, error);
|
||||
console.log(`[Archive] Repository ${owner}/${repo} data is preserved but not marked as archived`);
|
||||
// Don't throw - we want cleanup to continue for other repos
|
||||
}
|
||||
}
|
||||
|
||||
@@ -27,15 +27,37 @@ async function identifyOrphanedRepositories(config: any): Promise<any[]> {
|
||||
const decryptedToken = getDecryptedGitHubToken(config);
|
||||
const octokit = createGitHubClient(decryptedToken);
|
||||
|
||||
// Fetch GitHub data
|
||||
const [basicAndForkedRepos, starredRepos] = await Promise.all([
|
||||
getGithubRepositories({ octokit, config }),
|
||||
config.githubConfig?.includeStarred
|
||||
? getGithubStarredRepositories({ octokit, config })
|
||||
: Promise.resolve([]),
|
||||
]);
|
||||
let allGithubRepos = [];
|
||||
let githubApiAccessible = true;
|
||||
|
||||
try {
|
||||
// Fetch GitHub data
|
||||
const [basicAndForkedRepos, starredRepos] = await Promise.all([
|
||||
getGithubRepositories({ octokit, config }),
|
||||
config.githubConfig?.includeStarred
|
||||
? getGithubStarredRepositories({ octokit, config })
|
||||
: Promise.resolve([]),
|
||||
]);
|
||||
|
||||
allGithubRepos = [...basicAndForkedRepos, ...starredRepos];
|
||||
} catch (githubError: any) {
|
||||
// Handle GitHub API errors gracefully
|
||||
console.warn(`[Repository Cleanup] GitHub API error for user ${userId}: ${githubError.message}`);
|
||||
|
||||
// Check if it's a critical error (like account deleted/banned)
|
||||
if (githubError.status === 404 || githubError.status === 403) {
|
||||
console.error(`[Repository Cleanup] CRITICAL: GitHub account may be deleted/banned. Skipping cleanup to prevent data loss.`);
|
||||
console.error(`[Repository Cleanup] Consider using CLEANUP_ORPHANED_REPO_ACTION=archive instead of delete for safety.`);
|
||||
|
||||
// Return empty array to skip cleanup entirely when GitHub account is inaccessible
|
||||
return [];
|
||||
}
|
||||
|
||||
// For other errors, also skip cleanup to be safe
|
||||
console.error(`[Repository Cleanup] Skipping cleanup due to GitHub API error. This prevents accidental deletion of backups.`);
|
||||
return [];
|
||||
}
|
||||
|
||||
const allGithubRepos = [...basicAndForkedRepos, ...starredRepos];
|
||||
const githubRepoFullNames = new Set(allGithubRepos.map(repo => repo.fullName));
|
||||
|
||||
// Get all repositories from our database
|
||||
@@ -44,13 +66,19 @@ async function identifyOrphanedRepositories(config: any): Promise<any[]> {
|
||||
.from(repositories)
|
||||
.where(eq(repositories.userId, userId));
|
||||
|
||||
// Identify orphaned repositories
|
||||
// Only identify repositories as orphaned if we successfully accessed GitHub
|
||||
// This prevents false positives when GitHub is down or account is inaccessible
|
||||
const orphanedRepos = dbRepos.filter(repo => !githubRepoFullNames.has(repo.fullName));
|
||||
|
||||
if (orphanedRepos.length > 0) {
|
||||
console.log(`[Repository Cleanup] Found ${orphanedRepos.length} orphaned repositories for user ${userId}`);
|
||||
}
|
||||
|
||||
return orphanedRepos;
|
||||
} catch (error) {
|
||||
console.error(`[Repository Cleanup] Error identifying orphaned repositories for user ${userId}:`, error);
|
||||
throw error;
|
||||
// Return empty array on error to prevent accidental deletions
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user