feat: Implement comprehensive job recovery and resume process improvements

- Added a startup recovery script to handle interrupted jobs before application startup.
- Enhanced recovery system with database connection validation and stale job cleanup.
- Improved middleware to check for recovery needs and handle recovery during requests.
- Updated health check endpoint to include recovery system status and metrics.
- Introduced test scripts for verifying recovery functionality and job state management.
- Enhanced logging and error handling throughout the recovery process.
This commit is contained in:
Arunavo Ray
2025-05-24 13:45:25 +05:30
parent 98610482ae
commit a988be1028
10 changed files with 1006 additions and 111 deletions

View File

@@ -1,22 +1,58 @@
import { defineMiddleware } from 'astro:middleware';
import { initializeRecovery } from './lib/recovery';
import { initializeRecovery, hasJobsNeedingRecovery, getRecoveryStatus } from './lib/recovery';
// Flag to track if recovery has been initialized
let recoveryInitialized = false;
let recoveryAttempted = false;
export const onRequest = defineMiddleware(async (context, next) => {
// Initialize recovery system only once when the server starts
if (!recoveryInitialized) {
console.log('Initializing recovery system from middleware...');
// This is a fallback in case the startup script didn't run
if (!recoveryInitialized && !recoveryAttempted) {
recoveryAttempted = true;
try {
await initializeRecovery();
console.log('Recovery system initialized successfully');
// Check if recovery is actually needed before attempting
const needsRecovery = await hasJobsNeedingRecovery();
if (needsRecovery) {
console.log('⚠️ Middleware detected jobs needing recovery (startup script may not have run)');
console.log('Attempting recovery from middleware...');
// Run recovery with a shorter timeout since this is during request handling
const recoveryResult = await Promise.race([
initializeRecovery({
skipIfRecentAttempt: true,
maxRetries: 2,
retryDelay: 3000,
}),
new Promise<boolean>((_, reject) => {
setTimeout(() => reject(new Error('Middleware recovery timeout')), 15000);
})
]);
if (recoveryResult) {
console.log('✅ Middleware recovery completed successfully');
} else {
console.log('⚠️ Middleware recovery completed with some issues');
}
} else {
console.log('✅ No recovery needed (startup script likely handled it)');
}
recoveryInitialized = true;
} catch (error) {
console.error('Error initializing recovery system:', error);
console.error('⚠️ Middleware recovery failed or timed out:', error);
console.log('Application will continue, but some jobs may remain interrupted');
// Log recovery status for debugging
const status = getRecoveryStatus();
console.log('Recovery status:', status);
recoveryInitialized = true; // Mark as attempted to avoid retries
}
recoveryInitialized = true;
}
// Continue with the request
return next();
});