feat: enhance event management by adding duplicate removal, cleanup functionality, and improving activity logging

This commit is contained in:
Arunavo Ray
2025-05-24 13:25:58 +05:30
parent 546db472e5
commit 98610482ae
15 changed files with 603 additions and 305 deletions

View File

@@ -1,12 +1,21 @@
import { useCallback, useEffect, useState } from 'react';
import { useCallback, useEffect, useState, useRef } from 'react';
import { Button } from '@/components/ui/button';
import { ChevronDown, Download, RefreshCw, Search } from 'lucide-react';
import { ChevronDown, Download, RefreshCw, Search, Trash2 } from 'lucide-react';
import {
DropdownMenu,
DropdownMenuContent,
DropdownMenuItem,
DropdownMenuTrigger,
} from '../ui/dropdown-menu';
import {
Dialog,
DialogContent,
DialogDescription,
DialogFooter,
DialogHeader,
DialogTitle,
DialogTrigger,
} from '../ui/dialog';
import { apiRequest, formatDate } from '@/lib/utils';
import { useAuth } from '@/hooks/useAuth';
import type { MirrorJob } from '@/lib/db/schema';
@@ -30,12 +39,30 @@ import { useNavigation } from '@/components/layout/MainLayout';
type MirrorJobWithKey = MirrorJob & { _rowKey: string };
function genKey(job: MirrorJob): string {
return `${
job.id ?? (typeof crypto !== 'undefined'
? crypto.randomUUID()
: Math.random().toString(36).slice(2))
}-${job.timestamp}`;
// Maximum number of activities to keep in memory to prevent performance issues
const MAX_ACTIVITIES = 1000;
// More robust key generation to prevent collisions
function genKey(job: MirrorJob, index?: number): string {
const baseId = job.id || `temp-${Date.now()}-${Math.random().toString(36).slice(2)}`;
const timestamp = job.timestamp instanceof Date ? job.timestamp.getTime() : new Date(job.timestamp).getTime();
const indexSuffix = index !== undefined ? `-${index}` : '';
return `${baseId}-${timestamp}${indexSuffix}`;
}
// Create a deep clone without structuredClone for better browser compatibility
function deepClone<T>(obj: T): T {
if (obj === null || typeof obj !== 'object') return obj;
if (obj instanceof Date) return new Date(obj.getTime()) as T;
if (Array.isArray(obj)) return obj.map(item => deepClone(item)) as T;
const cloned = {} as T;
for (const key in obj) {
if (Object.prototype.hasOwnProperty.call(obj, key)) {
cloned[key] = deepClone(obj[key]);
}
}
return cloned;
}
export function ActivityLog() {
@@ -46,6 +73,16 @@ export function ActivityLog() {
const [activities, setActivities] = useState<MirrorJobWithKey[]>([]);
const [isLoading, setIsLoading] = useState(false);
const [showCleanupDialog, setShowCleanupDialog] = useState(false);
// Ref to track if component is mounted to prevent state updates after unmount
const isMountedRef = useRef(true);
useEffect(() => {
return () => {
isMountedRef.current = false;
};
}, []);
const { filter, setFilter } = useFilterParams({
searchTerm: '',
@@ -57,12 +94,41 @@ export function ActivityLog() {
/* ----------------------------- SSE hook ----------------------------- */
const handleNewMessage = useCallback((data: MirrorJob) => {
const withKey: MirrorJobWithKey = {
...structuredClone(data),
_rowKey: genKey(data),
};
if (!isMountedRef.current) return;
setActivities((prev) => [withKey, ...prev]);
setActivities((prev) => {
// Create a deep clone of the new activity
const clonedData = deepClone(data);
// Check if this activity already exists to prevent duplicates
const existingIndex = prev.findIndex(activity =>
activity.id === clonedData.id ||
(activity.repositoryId === clonedData.repositoryId &&
activity.organizationId === clonedData.organizationId &&
activity.message === clonedData.message &&
Math.abs(new Date(activity.timestamp).getTime() - new Date(clonedData.timestamp).getTime()) < 1000)
);
if (existingIndex !== -1) {
// Update existing activity instead of adding duplicate
const updated = [...prev];
updated[existingIndex] = {
...clonedData,
_rowKey: prev[existingIndex]._rowKey, // Keep the same key
};
return updated;
}
// Add new activity with unique key
const withKey: MirrorJobWithKey = {
...clonedData,
_rowKey: genKey(clonedData, prev.length),
};
// Limit the number of activities to prevent memory issues
const newActivities = [withKey, ...prev];
return newActivities.slice(0, MAX_ACTIVITIES);
});
}, []);
const { connected } = useSSE({
@@ -88,20 +154,37 @@ export function ActivityLog() {
return false;
}
const data: MirrorJobWithKey[] = res.activities.map((a) => ({
...structuredClone(a),
_rowKey: genKey(a),
}));
// Process activities with robust cloning and unique keys
const data: MirrorJobWithKey[] = res.activities.map((activity, index) => {
const clonedActivity = deepClone(activity);
return {
...clonedActivity,
_rowKey: genKey(clonedActivity, index),
};
});
setActivities(data);
// Sort by timestamp (newest first) to ensure consistent ordering
data.sort((a, b) => {
const timeA = new Date(a.timestamp).getTime();
const timeB = new Date(b.timestamp).getTime();
return timeB - timeA;
});
if (isMountedRef.current) {
setActivities(data);
}
return true;
} catch (err) {
toast.error(
err instanceof Error ? err.message : 'Failed to fetch activities.',
);
if (isMountedRef.current) {
toast.error(
err instanceof Error ? err.message : 'Failed to fetch activities.',
);
}
return false;
} finally {
setIsLoading(false);
if (isMountedRef.current) {
setIsLoading(false);
}
}
}, [user?.id]); // Only depend on user.id, not entire user object
@@ -210,6 +293,50 @@ export function ActivityLog() {
link.click();
};
const handleCleanupClick = () => {
setShowCleanupDialog(true);
};
const confirmCleanup = async () => {
if (!user?.id) return;
try {
setIsLoading(true);
setShowCleanupDialog(false);
// Use fetch directly to avoid potential axios issues
const response = await fetch('/api/activities/cleanup', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ userId: user.id }),
});
if (!response.ok) {
const errorData = await response.json().catch(() => ({ error: 'Unknown error occurred' }));
throw new Error(errorData.error || `HTTP ${response.status}: ${response.statusText}`);
}
const res = await response.json();
if (res.success) {
// Clear the activities from the UI
setActivities([]);
toast.success(`All activities cleaned up successfully. Deleted ${res.result.mirrorJobsDeleted} mirror jobs and ${res.result.eventsDeleted} events.`);
} else {
toast.error(res.error || 'Failed to cleanup activities.');
}
} catch (error) {
console.error('Error cleaning up activities:', error);
toast.error(error instanceof Error ? error.message : 'Failed to cleanup activities.');
} finally {
setIsLoading(false);
}
};
const cancelCleanup = () => {
setShowCleanupDialog(false);
};
/* ------------------------------ UI ------------------------------ */
return (
@@ -308,6 +435,17 @@ export function ActivityLog() {
>
<RefreshCw className='h-4 w-4' />
</Button>
{/* cleanup all activities */}
<Button
variant="outline"
size="icon"
onClick={handleCleanupClick}
title="Delete all activities"
className="text-destructive hover:text-destructive"
>
<Trash2 className='h-4 w-4' />
</Button>
</div>
{/* activity list */}
@@ -317,6 +455,30 @@ export function ActivityLog() {
filter={filter}
setFilter={setFilter}
/>
{/* cleanup confirmation dialog */}
<Dialog open={showCleanupDialog} onOpenChange={setShowCleanupDialog}>
<DialogContent>
<DialogHeader>
<DialogTitle>Delete All Activities</DialogTitle>
<DialogDescription>
Are you sure you want to delete ALL activities? This action cannot be undone and will remove all mirror jobs and events from the database.
</DialogDescription>
</DialogHeader>
<DialogFooter>
<Button variant="outline" onClick={cancelCleanup}>
Cancel
</Button>
<Button
variant="destructive"
onClick={confirmCleanup}
disabled={isLoading}
>
{isLoading ? 'Deleting...' : 'Delete All Activities'}
</Button>
</DialogFooter>
</DialogContent>
</Dialog>
</div>
);
}

View File

@@ -1,6 +1,6 @@
import { v4 as uuidv4 } from "uuid";
import { db, events } from "./db";
import { eq, and, gt, lt } from "drizzle-orm";
import { eq, and, gt, lt, inArray } from "drizzle-orm";
/**
* Publishes an event to a specific channel for a user
@@ -10,21 +10,58 @@ export async function publishEvent({
userId,
channel,
payload,
deduplicationKey,
}: {
userId: string;
channel: string;
payload: any;
deduplicationKey?: string; // Optional key to prevent duplicate events
}): Promise<string> {
try {
const eventId = uuidv4();
console.log(`Publishing event to channel ${channel} for user ${userId}`);
// Check for duplicate events if deduplication key is provided
if (deduplicationKey) {
const existingEvent = await db
.select()
.from(events)
.where(
and(
eq(events.userId, userId),
eq(events.channel, channel),
eq(events.read, false)
)
)
.limit(10); // Check recent unread events
// Check if any existing event has the same deduplication key in payload
const isDuplicate = existingEvent.some(event => {
try {
const eventPayload = JSON.parse(event.payload as string);
return eventPayload.deduplicationKey === deduplicationKey;
} catch {
return false;
}
});
if (isDuplicate) {
console.log(`Skipping duplicate event with key: ${deduplicationKey}`);
return eventId; // Return a valid ID but don't create the event
}
}
// Add deduplication key to payload if provided
const eventPayload = deduplicationKey
? { ...payload, deduplicationKey }
: payload;
// Insert the event into the SQLite database
await db.insert(events).values({
id: eventId,
userId,
channel,
payload: JSON.stringify(payload),
payload: JSON.stringify(eventPayload),
createdAt: new Date(),
});
@@ -103,6 +140,78 @@ export async function getNewEvents({
}
}
/**
* Removes duplicate events based on deduplication keys
* This can be called periodically to clean up any duplicates that may have slipped through
*/
export async function removeDuplicateEvents(userId?: string): Promise<{ duplicatesRemoved: number }> {
try {
console.log("Removing duplicate events...");
// Build the base query
let query = db.select().from(events);
if (userId) {
query = query.where(eq(events.userId, userId));
}
const allEvents = await query;
const duplicateIds: string[] = [];
const seenKeys = new Set<string>();
// Group events by user and channel, then check for duplicates
const eventsByUserChannel = new Map<string, typeof allEvents>();
for (const event of allEvents) {
const key = `${event.userId}-${event.channel}`;
if (!eventsByUserChannel.has(key)) {
eventsByUserChannel.set(key, []);
}
eventsByUserChannel.get(key)!.push(event);
}
// Check each group for duplicates
for (const [, events] of eventsByUserChannel) {
const channelSeenKeys = new Set<string>();
// Sort by creation time (keep the earliest)
events.sort((a, b) => new Date(a.createdAt).getTime() - new Date(b.createdAt).getTime());
for (const event of events) {
try {
const payload = JSON.parse(event.payload as string);
if (payload.deduplicationKey) {
if (channelSeenKeys.has(payload.deduplicationKey)) {
duplicateIds.push(event.id);
} else {
channelSeenKeys.add(payload.deduplicationKey);
}
}
} catch {
// Skip events with invalid JSON
}
}
}
// Remove duplicates
if (duplicateIds.length > 0) {
console.log(`Removing ${duplicateIds.length} duplicate events`);
// Delete in batches to avoid query size limits
const batchSize = 100;
for (let i = 0; i < duplicateIds.length; i += batchSize) {
const batch = duplicateIds.slice(i, i + batchSize);
await db.delete(events).where(inArray(events.id, batch));
}
}
console.log(`Removed ${duplicateIds.length} duplicate events`);
return { duplicatesRemoved: duplicateIds.length };
} catch (error) {
console.error("Error removing duplicate events:", error);
return { duplicatesRemoved: 0 };
}
}
/**
* Cleans up old events to prevent the database from growing too large
* Should be called periodically (e.g., daily via a cron job)

View File

@@ -295,15 +295,8 @@ export async function getOrCreateGiteaOrg({
if (orgRes.ok) {
const org = await orgRes.json();
await createMirrorJob({
userId: config.userId,
organizationId: org.id,
organizationName: orgName,
status: "imported",
message: `Organization ${orgName} fetched successfully`,
details: `Organization ${orgName} was fetched from GitHub`,
});
// Note: Organization events are handled by the main mirroring process
// to avoid duplicate events
return org.id;
}
@@ -325,13 +318,8 @@ export async function getOrCreateGiteaOrg({
throw new Error(`Failed to create Gitea org: ${await createRes.text()}`);
}
await createMirrorJob({
userId: config.userId,
organizationName: orgName,
status: "imported",
message: `Organization ${orgName} created successfully`,
details: `Organization ${orgName} was created in Gitea`,
});
// Note: Organization creation events are handled by the main mirroring process
// to avoid duplicate events
const newOrg = await createRes.json();
return newOrg.id;
@@ -417,15 +405,8 @@ export async function mirrorGitHubRepoToGiteaOrg({
})
.where(eq(repositories.id, repository.id!));
// Append log for "mirroring" status
await createMirrorJob({
userId: config.userId,
repositoryId: repository.id,
repositoryName: repository.name,
message: `Started mirroring repository: ${repository.name}`,
details: `Repository ${repository.name} is now in the mirroring state.`,
status: "mirroring",
});
// Note: "mirroring" status events are handled by the concurrency system
// to avoid duplicate events during batch operations
const apiUrl = `${config.giteaConfig.url}/api/v1/repos/migrate`;

View File

@@ -17,6 +17,7 @@ export async function createMirrorJob({
totalItems,
itemIds,
inProgress,
skipDuplicateEvent,
}: {
userId: string;
organizationId?: string;
@@ -31,6 +32,7 @@ export async function createMirrorJob({
totalItems?: number;
itemIds?: string[];
inProgress?: boolean;
skipDuplicateEvent?: boolean; // Option to skip event publishing for internal operations
}) {
const jobId = uuidv4();
const currentTimestamp = new Date();
@@ -64,13 +66,27 @@ export async function createMirrorJob({
// Insert the job into the database
await db.insert(mirrorJobs).values(job);
// Publish the event using SQLite instead of Redis
const channel = `mirror-status:${userId}`;
await publishEvent({
userId,
channel,
payload: job
});
// Publish the event using SQLite instead of Redis (unless skipped)
if (!skipDuplicateEvent) {
const channel = `mirror-status:${userId}`;
// Create deduplication key based on the operation
let deduplicationKey: string | undefined;
if (repositoryId && status) {
deduplicationKey = `repo-${repositoryId}-${status}`;
} else if (organizationId && status) {
deduplicationKey = `org-${organizationId}-${status}`;
} else if (batchId) {
deduplicationKey = `batch-${batchId}-${status}`;
}
await publishEvent({
userId,
channel,
payload: job,
deduplicationKey
});
}
return jobId;
} catch (error) {
@@ -156,16 +172,27 @@ export async function updateMirrorJobProgress({
.set(updates)
.where(mirrorJobs.id === jobId);
// Publish the event
// Publish the event with deduplication
const updatedJob = {
...job,
...updates,
};
// Create deduplication key for progress updates
let deduplicationKey: string | undefined;
if (completedItemId) {
deduplicationKey = `progress-${jobId}-${completedItemId}`;
} else if (isCompleted) {
deduplicationKey = `completed-${jobId}`;
} else {
deduplicationKey = `update-${jobId}-${Date.now()}`;
}
await publishEvent({
userId: job.userId,
channel: `mirror-status:${job.userId}`,
payload: updatedJob,
deduplicationKey
});
return updatedJob;

View File

@@ -181,7 +181,7 @@ export async function processWithResilience<T, R>(
getItemId,
getItemName,
resumeFromJobId,
checkpointInterval = 5,
checkpointInterval = 10, // Increased from 5 to reduce event frequency
...otherOptions
} = options;

View File

@@ -0,0 +1,115 @@
import type { APIRoute } from "astro";
import { db, mirrorJobs, events } from "@/lib/db";
import { eq, count } from "drizzle-orm";
export const POST: APIRoute = async ({ request }) => {
try {
let body;
try {
body = await request.json();
} catch (jsonError) {
console.error("Invalid JSON in request body:", jsonError);
return new Response(
JSON.stringify({ error: "Invalid JSON in request body." }),
{ status: 400, headers: { "Content-Type": "application/json" } }
);
}
const { userId } = body || {};
if (!userId) {
return new Response(
JSON.stringify({ error: "Missing 'userId' in request body." }),
{ status: 400, headers: { "Content-Type": "application/json" } }
);
}
// Start a transaction to ensure all operations succeed or fail together
const result = await db.transaction(async (tx) => {
// Count activities before deletion
const mirrorJobsCountResult = await tx
.select({ count: count() })
.from(mirrorJobs)
.where(eq(mirrorJobs.userId, userId));
const eventsCountResult = await tx
.select({ count: count() })
.from(events)
.where(eq(events.userId, userId));
const totalMirrorJobs = mirrorJobsCountResult[0]?.count || 0;
const totalEvents = eventsCountResult[0]?.count || 0;
console.log(`Found ${totalMirrorJobs} mirror jobs and ${totalEvents} events to delete for user ${userId}`);
// First, mark all in-progress jobs as completed/failed to allow deletion
await tx
.update(mirrorJobs)
.set({
inProgress: false,
completedAt: new Date(),
status: "failed",
message: "Job interrupted and cleaned up by user"
})
.where(eq(mirrorJobs.userId, userId));
console.log(`Updated in-progress jobs to allow deletion`);
// Delete all mirror jobs for the user (now that none are in progress)
await tx
.delete(mirrorJobs)
.where(eq(mirrorJobs.userId, userId));
// Delete all events for the user
await tx
.delete(events)
.where(eq(events.userId, userId));
return {
mirrorJobsDeleted: totalMirrorJobs,
eventsDeleted: totalEvents,
totalMirrorJobs,
totalEvents,
};
});
console.log(`Cleaned up activities for user ${userId}:`, result);
return new Response(
JSON.stringify({
success: true,
message: "All activities cleaned up successfully.",
result: {
mirrorJobsDeleted: result.mirrorJobsDeleted,
eventsDeleted: result.eventsDeleted,
},
}),
{ status: 200, headers: { "Content-Type": "application/json" } }
);
} catch (error) {
console.error("Error cleaning up activities:", error);
// Provide more specific error messages
let errorMessage = "An unknown error occurred.";
if (error instanceof Error) {
errorMessage = error.message;
// Check for common database errors
if (error.message.includes("FOREIGN KEY constraint failed")) {
errorMessage = "Cannot delete activities due to database constraints. Some jobs may still be referenced by other records.";
} else if (error.message.includes("database is locked")) {
errorMessage = "Database is currently locked. Please try again in a moment.";
} else if (error.message.includes("no such table")) {
errorMessage = "Database tables are missing. Please check your database setup.";
}
}
return new Response(
JSON.stringify({
success: false,
error: errorMessage,
}),
{ status: 500, headers: { "Content-Type": "application/json" } }
);
}
};

View File

@@ -126,7 +126,7 @@ export const POST: APIRoute = async ({ request }) => {
concurrencyLimit: CONCURRENCY_LIMIT,
maxRetries: 2,
retryDelay: 2000,
checkpointInterval: 1, // Checkpoint after each repository
checkpointInterval: 5, // Checkpoint every 5 repositories to reduce event frequency
onProgress: (completed, total, result) => {
const percentComplete = Math.round((completed / total) * 100);
console.log(`Mirroring progress: ${percentComplete}% (${completed}/${total})`);