Compare commits

...

3 Commits

Author SHA1 Message Date
ARUNAVO RAY
b39d7a2179 Merge pull request #107 from RayLabsHQ/fix/issue-97-migration-duplicates
fix: resolve migration 0005 duplicate constraint failure (#97)
2025-10-02 07:45:42 +05:30
Arunavo Ray
bf99a95dc6 ci: add more paths to trigger Docker builds
- Added docker-entrypoint.sh to trigger paths
- Added drizzle/** for database migrations
- Added scripts/** for database management scripts
- Added src/** for source code changes

This ensures Docker images are rebuilt when critical runtime
files change, not just package dependencies.
2025-10-01 08:02:56 +05:30
Arunavo Ray
2ea917fdaa fix: resolve migration 0005 duplicate constraint failure (#97)
**Problem:**
- Users upgrading to v3.7.2 encountered database migration failures
- Migration 0005 tried to add unique index without handling existing duplicates
- Hybrid initialization (manual SQL + Drizzle) caused schema inconsistencies
- Error: "UNIQUE constraint failed: repositories.user_id, repositories.full_name"

**Solution:**

1. **Fixed Migration 0005:**
   - Added deduplication step before creating unique index
   - Removes duplicate (user_id, full_name) entries, keeping most recent
   - Safely creates unique constraint after cleanup

2. **Removed Hybrid Database Initialization:**
   - Eliminated 154 lines of manual SQL from docker-entrypoint.sh
   - Now uses Drizzle exclusively for schema management
   - Single source of truth prevents schema drift
   - Migrations run automatically via src/lib/db/index.ts

**Testing:**
-  Fresh database initialization works
-  Duplicate deduplication verified
-  Unique constraint properly enforced
-  All 6 migrations apply cleanly

**Changes:**
- docker-entrypoint.sh: Removed manual table creation SQL
- drizzle/0005_polite_preak.sql: Added deduplication before index creation

Fixes #97
2025-10-01 07:57:16 +05:30
3 changed files with 24 additions and 154 deletions

View File

@@ -10,6 +10,10 @@ on:
- 'package.json'
- 'bun.lock*'
- '.github/workflows/docker-build.yml'
- 'docker-entrypoint.sh'
- 'drizzle/**'
- 'scripts/**'
- 'src/**'
pull_request:
paths:
- 'Dockerfile'
@@ -17,6 +21,10 @@ on:
- 'package.json'
- 'bun.lock*'
- '.github/workflows/docker-build.yml'
- 'docker-entrypoint.sh'
- 'drizzle/**'
- 'scripts/**'
- 'src/**'
schedule:
- cron: '0 0 * * 0' # Weekly security scan on Sunday at midnight

View File

@@ -120,161 +120,13 @@ fi
# Dependencies are already installed during the Docker build process
# Initialize the database if it doesn't exist
# Note: Drizzle migrations will be run automatically when the app starts (see src/lib/db/index.ts)
if [ ! -f "/app/data/gitea-mirror.db" ]; then
echo "Initializing database..."
if [ -f "dist/scripts/init-db.js" ]; then
bun dist/scripts/init-db.js
elif [ -f "dist/scripts/manage-db.js" ]; then
bun dist/scripts/manage-db.js init
elif [ -f "scripts/manage-db.ts" ]; then
bun scripts/manage-db.ts init
else
echo "Warning: Could not find database initialization scripts in dist/scripts."
echo "Creating and initializing database manually..."
# Create the database file
echo "Database not found. It will be created and initialized via Drizzle migrations on first app startup..."
# Create empty database file so migrations can run
touch /app/data/gitea-mirror.db
# Initialize the database with required tables
sqlite3 /app/data/gitea-mirror.db <<EOF
CREATE TABLE IF NOT EXISTS users (
id TEXT PRIMARY KEY,
username TEXT NOT NULL,
password TEXT NOT NULL,
email TEXT NOT NULL,
created_at INTEGER NOT NULL,
updated_at INTEGER NOT NULL
);
CREATE TABLE IF NOT EXISTS configs (
id TEXT PRIMARY KEY,
user_id TEXT NOT NULL,
name TEXT NOT NULL,
is_active INTEGER NOT NULL DEFAULT 1,
github_config TEXT NOT NULL,
gitea_config TEXT NOT NULL,
include TEXT NOT NULL DEFAULT '["*"]',
exclude TEXT NOT NULL DEFAULT '[]',
schedule_config TEXT NOT NULL,
created_at INTEGER NOT NULL DEFAULT (strftime('%s','now')),
updated_at INTEGER NOT NULL DEFAULT (strftime('%s','now')),
FOREIGN KEY (user_id) REFERENCES users(id)
);
CREATE TABLE IF NOT EXISTS repositories (
id TEXT PRIMARY KEY,
user_id TEXT NOT NULL,
config_id TEXT NOT NULL,
name TEXT NOT NULL,
full_name TEXT NOT NULL,
url TEXT NOT NULL,
clone_url TEXT NOT NULL,
owner TEXT NOT NULL,
organization TEXT,
mirrored_location TEXT DEFAULT '',
destination_org TEXT,
is_private INTEGER NOT NULL DEFAULT 0,
is_fork INTEGER NOT NULL DEFAULT 0,
forked_from TEXT,
has_issues INTEGER NOT NULL DEFAULT 0,
is_starred INTEGER NOT NULL DEFAULT 0,
is_archived INTEGER NOT NULL DEFAULT 0,
size INTEGER NOT NULL DEFAULT 0,
has_lfs INTEGER NOT NULL DEFAULT 0,
has_submodules INTEGER NOT NULL DEFAULT 0,
language TEXT,
description TEXT,
default_branch TEXT NOT NULL,
visibility TEXT NOT NULL DEFAULT 'public',
status TEXT NOT NULL DEFAULT 'imported',
last_mirrored INTEGER,
error_message TEXT,
created_at INTEGER NOT NULL DEFAULT (strftime('%s','now')),
updated_at INTEGER NOT NULL DEFAULT (strftime('%s','now')),
FOREIGN KEY (user_id) REFERENCES users(id),
FOREIGN KEY (config_id) REFERENCES configs(id)
);
-- Uniqueness of (user_id, full_name) for repositories is enforced via drizzle migrations
CREATE TABLE IF NOT EXISTS organizations (
id TEXT PRIMARY KEY,
user_id TEXT NOT NULL,
config_id TEXT NOT NULL,
name TEXT NOT NULL,
avatar_url TEXT NOT NULL,
membership_role TEXT NOT NULL DEFAULT 'member',
is_included INTEGER NOT NULL DEFAULT 1,
status TEXT NOT NULL DEFAULT 'imported',
last_mirrored INTEGER,
error_message TEXT,
repository_count INTEGER NOT NULL DEFAULT 0,
created_at INTEGER NOT NULL DEFAULT (strftime('%s','now')),
updated_at INTEGER NOT NULL DEFAULT (strftime('%s','now')),
FOREIGN KEY (user_id) REFERENCES users(id),
FOREIGN KEY (config_id) REFERENCES configs(id)
);
CREATE TABLE IF NOT EXISTS mirror_jobs (
id TEXT PRIMARY KEY,
user_id TEXT NOT NULL,
repository_id TEXT,
repository_name TEXT,
organization_id TEXT,
organization_name TEXT,
details TEXT,
status TEXT NOT NULL DEFAULT 'imported',
message TEXT NOT NULL,
timestamp TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
-- New fields for job resilience
job_type TEXT NOT NULL DEFAULT 'mirror',
batch_id TEXT,
total_items INTEGER,
completed_items INTEGER DEFAULT 0,
item_ids TEXT, -- JSON array as text
completed_item_ids TEXT DEFAULT '[]', -- JSON array as text
in_progress INTEGER NOT NULL DEFAULT 0, -- Boolean as integer
started_at TIMESTAMP,
completed_at TIMESTAMP,
last_checkpoint TIMESTAMP,
FOREIGN KEY (user_id) REFERENCES users(id)
);
CREATE INDEX IF NOT EXISTS idx_mirror_jobs_user_id ON mirror_jobs(user_id);
CREATE INDEX IF NOT EXISTS idx_mirror_jobs_batch_id ON mirror_jobs(batch_id);
CREATE INDEX IF NOT EXISTS idx_mirror_jobs_in_progress ON mirror_jobs(in_progress);
CREATE INDEX IF NOT EXISTS idx_mirror_jobs_job_type ON mirror_jobs(job_type);
CREATE INDEX IF NOT EXISTS idx_mirror_jobs_timestamp ON mirror_jobs(timestamp);
CREATE TABLE IF NOT EXISTS events (
id TEXT PRIMARY KEY,
user_id TEXT NOT NULL,
channel TEXT NOT NULL,
payload TEXT NOT NULL,
read INTEGER NOT NULL DEFAULT 0,
created_at INTEGER NOT NULL DEFAULT (strftime('%s','now')),
FOREIGN KEY (user_id) REFERENCES users(id)
);
CREATE INDEX IF NOT EXISTS idx_events_user_channel ON events(user_id, channel);
CREATE INDEX IF NOT EXISTS idx_events_created_at ON events(created_at);
CREATE INDEX IF NOT EXISTS idx_events_read ON events(read);
EOF
echo "Database initialized with required tables."
fi
else
echo "Database already exists, checking for issues..."
if [ -f "dist/scripts/fix-db-issues.js" ]; then
bun dist/scripts/fix-db-issues.js
elif [ -f "dist/scripts/manage-db.js" ]; then
bun dist/scripts/manage-db.js fix
elif [ -f "scripts/manage-db.ts" ]; then
bun scripts/manage-db.ts fix
fi
echo "Database exists, checking integrity..."
echo "Database already exists, Drizzle will check for pending migrations on startup..."
fi
# Extract version from package.json and set as environment variable

View File

@@ -1 +1,11 @@
CREATE UNIQUE INDEX `uniq_repositories_user_full_name` ON `repositories` (`user_id`,`full_name`);
-- Step 1: Remove duplicate repositories, keeping the most recently updated one
-- This handles cases where users have duplicate entries from before the unique constraint
DELETE FROM repositories
WHERE rowid NOT IN (
SELECT MAX(rowid)
FROM repositories
GROUP BY user_id, full_name
);
--> statement-breakpoint
-- Step 2: Now create the unique index safely
CREATE UNIQUE INDEX uniq_repositories_user_full_name ON repositories (user_id, full_name);