feat: enhance event management by adding duplicate removal, cleanup functionality, and improving activity logging

This commit is contained in:
Arunavo Ray
2025-05-24 13:25:58 +05:30
parent 546db472e5
commit 98610482ae
15 changed files with 603 additions and 305 deletions

View File

@@ -60,40 +60,36 @@ The database file should be located in the `./data/gitea-mirror.db` directory. I
The following scripts help manage events in the SQLite database:
### Event Inspection (check-events.ts)
> **Note**: For a more user-friendly approach, you can use the cleanup button in the Activity Log page of the web interface to delete all activities with a single click.
Displays all events currently stored in the database.
```bash
bun scripts/check-events.ts
```
### Event Cleanup (cleanup-events.ts)
Removes old events from the database to prevent it from growing too large.
Removes old events and duplicate events from the database to prevent it from growing too large.
```bash
# Remove events older than 7 days (default)
# Remove events older than 7 days (default) and duplicates
bun scripts/cleanup-events.ts
# Remove events older than X days
# Remove events older than X days and duplicates
bun scripts/cleanup-events.ts 14
```
This script can be scheduled to run periodically (e.g., daily) using cron or another scheduler. When using Docker, this is automatically scheduled to run daily.
### Mark Events as Read (mark-events-read.ts)
### Remove Duplicate Events (remove-duplicate-events.ts)
Marks all unread events as read.
Specifically removes duplicate events based on deduplication keys without affecting old events.
```bash
bun scripts/mark-events-read.ts
# Remove duplicate events for all users
bun scripts/remove-duplicate-events.ts
# Remove duplicate events for a specific user
bun scripts/remove-duplicate-events.ts <userId>
```
### Make Events Appear Older (make-events-old.ts)
For testing purposes, this script modifies event timestamps to make them appear older.
### Mirror Jobs Cleanup (cleanup-mirror-jobs.ts)
Removes old mirror jobs from the database to prevent it from growing too large.
@@ -108,10 +104,20 @@ bun scripts/cleanup-mirror-jobs.ts 14
This script can be scheduled to run periodically (e.g., daily) using cron or another scheduler. When using Docker, this is automatically scheduled to run daily.
### Fix Interrupted Jobs (fix-interrupted-jobs.ts)
Fixes interrupted jobs that might be preventing cleanup by marking them as failed.
```bash
bun scripts/make-events-old.ts
# Fix all interrupted jobs
bun scripts/fix-interrupted-jobs.ts
# Fix interrupted jobs for a specific user
bun scripts/fix-interrupted-jobs.ts <userId>
```
Use this script if you're having trouble cleaning up activities due to "interrupted" jobs that won't delete.
## Deployment Scripts
### Docker Deployment

View File

@@ -1,38 +0,0 @@
#!/usr/bin/env bun
/**
* Script to check events in the database
*/
import { Database } from "bun:sqlite";
import path from "path";
import fs from "fs";
// Define the database path
const dataDir = path.join(process.cwd(), "data");
if (!fs.existsSync(dataDir)) {
console.error("Data directory not found:", dataDir);
process.exit(1);
}
const dbPath = path.join(dataDir, "gitea-mirror.db");
if (!fs.existsSync(dbPath)) {
console.error("Database file not found:", dbPath);
process.exit(1);
}
// Open the database
const db = new Database(dbPath);
// Check if the events table exists
const tableExists = db.query("SELECT name FROM sqlite_master WHERE type='table' AND name='events'").get();
if (!tableExists) {
console.error("Events table does not exist");
process.exit(1);
}
// Get all events
const events = db.query("SELECT * FROM events").all();
console.log("Events in the database:");
console.log(JSON.stringify(events, null, 2));

View File

@@ -9,7 +9,7 @@
* Where [days] is the number of days to keep events (default: 7)
*/
import { cleanupOldEvents } from "../src/lib/events";
import { cleanupOldEvents, removeDuplicateEvents } from "../src/lib/events";
// Parse command line arguments
const args = process.argv.slice(2);
@@ -24,13 +24,20 @@ async function runCleanup() {
try {
console.log(`Starting event cleanup (retention: ${daysToKeep} days)...`);
// Call the cleanupOldEvents function from the events module
// First, remove duplicate events
console.log("Step 1: Removing duplicate events...");
const duplicateResult = await removeDuplicateEvents();
console.log(`- Duplicate events removed: ${duplicateResult.duplicatesRemoved}`);
// Then, clean up old events
console.log("Step 2: Cleaning up old events...");
const result = await cleanupOldEvents(daysToKeep);
console.log(`Cleanup summary:`);
console.log(`- Duplicate events removed: ${duplicateResult.duplicatesRemoved}`);
console.log(`- Read events deleted: ${result.readEventsDeleted}`);
console.log(`- Unread events deleted: ${result.unreadEventsDeleted}`);
console.log(`- Total events deleted: ${result.readEventsDeleted + result.unreadEventsDeleted}`);
console.log(`- Total events deleted: ${result.readEventsDeleted + result.unreadEventsDeleted + duplicateResult.duplicatesRemoved}`);
console.log("Event cleanup completed successfully");
} catch (error) {

View File

@@ -0,0 +1,74 @@
#!/usr/bin/env bun
/**
* Script to fix interrupted jobs that might be preventing cleanup
* This script marks all in-progress jobs as failed to allow them to be deleted
*
* Usage:
* bun scripts/fix-interrupted-jobs.ts [userId]
*
* Where [userId] is optional - if provided, only fixes jobs for that user
*/
import { db, mirrorJobs } from "../src/lib/db";
import { eq } from "drizzle-orm";
// Parse command line arguments
const args = process.argv.slice(2);
const userId = args.length > 0 ? args[0] : undefined;
async function fixInterruptedJobs() {
try {
console.log("Checking for interrupted jobs...");
// Build the query
let query = db
.select()
.from(mirrorJobs)
.where(eq(mirrorJobs.inProgress, true));
if (userId) {
console.log(`Filtering for user: ${userId}`);
query = query.where(eq(mirrorJobs.userId, userId));
}
// Find all in-progress jobs
const inProgressJobs = await query;
if (inProgressJobs.length === 0) {
console.log("No interrupted jobs found.");
return;
}
console.log(`Found ${inProgressJobs.length} interrupted jobs:`);
inProgressJobs.forEach(job => {
console.log(`- Job ${job.id}: ${job.message} (${job.repositoryName || job.organizationName || 'Unknown'})`);
});
// Mark all in-progress jobs as failed
let updateQuery = db
.update(mirrorJobs)
.set({
inProgress: false,
completedAt: new Date(),
status: "failed",
message: "Job interrupted and marked as failed by cleanup script"
})
.where(eq(mirrorJobs.inProgress, true));
if (userId) {
updateQuery = updateQuery.where(eq(mirrorJobs.userId, userId));
}
await updateQuery;
console.log(`✅ Successfully marked ${inProgressJobs.length} interrupted jobs as failed.`);
console.log("These jobs can now be deleted through the normal cleanup process.");
} catch (error) {
console.error("Error fixing interrupted jobs:", error);
process.exit(1);
}
}
// Run the fix
fixInterruptedJobs();

View File

@@ -1,29 +0,0 @@
#!/usr/bin/env bun
/**
* Script to make events appear older for testing cleanup
*/
import { db, events } from "../src/lib/db";
async function makeEventsOld() {
try {
console.log("Making events appear older...");
// Calculate a timestamp from 2 days ago
const oldDate = new Date();
oldDate.setDate(oldDate.getDate() - 2);
// Update all events to have an older timestamp
const result = await db
.update(events)
.set({ createdAt: oldDate });
console.log(`Updated ${result.changes || 0} events to appear older`);
} catch (error) {
console.error("Error updating event timestamps:", error);
process.exit(1);
}
}
// Run the function
makeEventsOld();

View File

@@ -1,27 +0,0 @@
#!/usr/bin/env bun
/**
* Script to mark all events as read
*/
import { db, events } from "../src/lib/db";
import { eq } from "drizzle-orm";
async function markEventsAsRead() {
try {
console.log("Marking all events as read...");
// Update all events to mark them as read
const result = await db
.update(events)
.set({ read: true })
.where(eq(events.read, false));
console.log(`Marked ${result.changes || 0} events as read`);
} catch (error) {
console.error("Error marking events as read:", error);
process.exit(1);
}
}
// Run the function
markEventsAsRead();

View File

@@ -0,0 +1,44 @@
#!/usr/bin/env bun
/**
* Script to remove duplicate events from the database
* This script identifies and removes events with duplicate deduplication keys
*
* Usage:
* bun scripts/remove-duplicate-events.ts [userId]
*
* Where [userId] is optional - if provided, only removes duplicates for that user
*/
import { removeDuplicateEvents } from "../src/lib/events";
// Parse command line arguments
const args = process.argv.slice(2);
const userId = args.length > 0 ? args[0] : undefined;
async function runDuplicateRemoval() {
try {
if (userId) {
console.log(`Starting duplicate event removal for user: ${userId}...`);
} else {
console.log("Starting duplicate event removal for all users...");
}
// Call the removeDuplicateEvents function
const result = await removeDuplicateEvents(userId);
console.log(`Duplicate removal summary:`);
console.log(`- Duplicate events removed: ${result.duplicatesRemoved}`);
if (result.duplicatesRemoved > 0) {
console.log("Duplicate event removal completed successfully");
} else {
console.log("No duplicate events found");
}
} catch (error) {
console.error("Error running duplicate event removal:", error);
process.exit(1);
}
}
// Run the duplicate removal
runDuplicateRemoval();

View File

@@ -1,133 +0,0 @@
#!/usr/bin/env bun
/**
* Script to update the mirror_jobs table with new columns for resilience
*/
import { Database } from "bun:sqlite";
import fs from "fs";
import path from "path";
// Define the database paths
const dataDir = path.join(process.cwd(), "data");
const dbPath = path.join(dataDir, "gitea-mirror.db");
// Ensure data directory exists
if (!fs.existsSync(dataDir)) {
fs.mkdirSync(dataDir, { recursive: true });
console.log(`Created data directory at ${dataDir}`);
}
// Check if database exists
if (!fs.existsSync(dbPath)) {
console.error(`Database file not found at ${dbPath}`);
console.error("Please run 'bun run init-db' first to create the database.");
process.exit(1);
}
// Connect to the database
const db = new Database(dbPath);
// Enable foreign keys
db.exec("PRAGMA foreign_keys = ON;");
// Function to check if a column exists in a table
function columnExists(tableName: string, columnName: string): boolean {
const result = db.query(
`PRAGMA table_info(${tableName})`
).all() as { name: string }[];
return result.some(column => column.name === columnName);
}
// Main function to update the mirror_jobs table
async function updateMirrorJobsTable() {
console.log("Checking mirror_jobs table for missing columns...");
// Start a transaction
db.exec("BEGIN TRANSACTION;");
try {
// Check and add each new column if it doesn't exist
const columnsToAdd = [
{ name: "job_type", definition: "TEXT NOT NULL DEFAULT 'mirror'" },
{ name: "batch_id", definition: "TEXT" },
{ name: "total_items", definition: "INTEGER" },
{ name: "completed_items", definition: "INTEGER DEFAULT 0" },
{ name: "item_ids", definition: "TEXT" }, // JSON array as text
{ name: "completed_item_ids", definition: "TEXT DEFAULT '[]'" }, // JSON array as text
{ name: "in_progress", definition: "INTEGER NOT NULL DEFAULT 0" }, // Boolean as integer
{ name: "started_at", definition: "TIMESTAMP" },
{ name: "completed_at", definition: "TIMESTAMP" },
{ name: "last_checkpoint", definition: "TIMESTAMP" }
];
let columnsAdded = 0;
for (const column of columnsToAdd) {
if (!columnExists("mirror_jobs", column.name)) {
console.log(`Adding column '${column.name}' to mirror_jobs table...`);
db.exec(`ALTER TABLE mirror_jobs ADD COLUMN ${column.name} ${column.definition};`);
columnsAdded++;
}
}
// Commit the transaction
db.exec("COMMIT;");
if (columnsAdded > 0) {
console.log(`✅ Added ${columnsAdded} new columns to mirror_jobs table.`);
} else {
console.log("✅ All required columns already exist in mirror_jobs table.");
}
// Create indexes for better performance
console.log("Creating indexes for mirror_jobs table...");
// Only create indexes if they don't exist
const indexesResult = db.query(
`SELECT name FROM sqlite_master WHERE type='index' AND tbl_name='mirror_jobs'`
).all() as { name: string }[];
const existingIndexes = indexesResult.map(idx => idx.name);
const indexesToCreate = [
{ name: "idx_mirror_jobs_user_id", columns: "user_id" },
{ name: "idx_mirror_jobs_batch_id", columns: "batch_id" },
{ name: "idx_mirror_jobs_in_progress", columns: "in_progress" },
{ name: "idx_mirror_jobs_job_type", columns: "job_type" },
{ name: "idx_mirror_jobs_timestamp", columns: "timestamp" }
];
let indexesCreated = 0;
for (const index of indexesToCreate) {
if (!existingIndexes.includes(index.name)) {
console.log(`Creating index '${index.name}'...`);
db.exec(`CREATE INDEX ${index.name} ON mirror_jobs(${index.columns});`);
indexesCreated++;
}
}
if (indexesCreated > 0) {
console.log(`✅ Created ${indexesCreated} new indexes for mirror_jobs table.`);
} else {
console.log("✅ All required indexes already exist for mirror_jobs table.");
}
console.log("Mirror jobs table update completed successfully.");
} catch (error) {
// Rollback the transaction in case of error
db.exec("ROLLBACK;");
console.error("❌ Error updating mirror_jobs table:", error);
process.exit(1);
} finally {
// Close the database connection
db.close();
}
}
// Run the update function
updateMirrorJobsTable().catch(error => {
console.error("Unhandled error:", error);
process.exit(1);
});