fix(sync): batch inserts + normalize nulls to avoid SQLite param mismatch

- Batch repository inserts with dynamic sizing under SQLite 999-param limit
- Normalize undefined → null to keep multi-row insert shapes consistent
- De-duplicate owned + starred repos by fullName (prefer starred variant)
- Enforce uniqueness via (user_id, full_name) + onConflictDoNothing
- Handle starred name collisions (suffix/prefix) across mirror + metadata
- Add repo-utils helpers + tests; guard Octokit.plugin in tests
- Remove manual unique index from entrypoint; rely on drizzle-kit migrations
This commit is contained in:
Arunavo Ray
2025-09-13 23:38:50 +05:30
parent 51a6c8ca58
commit 18ecdbc252
15 changed files with 2439 additions and 154 deletions

View File

@@ -10,6 +10,7 @@ import {
getGithubStarredRepositories,
} from "@/lib/github";
import { jsonResponse, createSecureErrorResponse } from "@/lib/utils";
import { mergeGitReposPreferStarred, calcBatchSizeForInsert } from "@/lib/repo-utils";
import { getDecryptedGitHubToken } from "@/lib/utils/config-encryption";
export const POST: APIRoute = async ({ request }) => {
@@ -55,7 +56,8 @@ export const POST: APIRoute = async ({ request }) => {
getGithubOrganizations({ octokit, config }),
]);
const allGithubRepos = [...basicAndForkedRepos, ...starredRepos];
// Merge and de-duplicate by fullName, preferring starred variant when duplicated
const allGithubRepos = mergeGitReposPreferStarred(basicAndForkedRepos, starredRepos);
// Prepare full list of repos and orgs
const newRepos = allGithubRepos.map((repo) => ({
@@ -67,25 +69,25 @@ export const POST: APIRoute = async ({ request }) => {
url: repo.url,
cloneUrl: repo.cloneUrl,
owner: repo.owner,
organization: repo.organization,
organization: repo.organization ?? null,
mirroredLocation: repo.mirroredLocation || "",
destinationOrg: repo.destinationOrg || null,
isPrivate: repo.isPrivate,
isForked: repo.isForked,
forkedFrom: repo.forkedFrom,
forkedFrom: repo.forkedFrom ?? null,
hasIssues: repo.hasIssues,
isStarred: repo.isStarred,
isArchived: repo.isArchived,
size: repo.size,
hasLFS: repo.hasLFS,
hasSubmodules: repo.hasSubmodules,
language: repo.language || null,
description: repo.description || null,
language: repo.language ?? null,
description: repo.description ?? null,
defaultBranch: repo.defaultBranch,
visibility: repo.visibility,
status: repo.status,
lastMirrored: repo.lastMirrored,
errorMessage: repo.errorMessage,
lastMirrored: repo.lastMirrored ?? null,
errorMessage: repo.errorMessage ?? null,
createdAt: repo.createdAt,
updatedAt: repo.updatedAt,
}));
@@ -128,12 +130,27 @@ export const POST: APIRoute = async ({ request }) => {
);
insertedOrgs = newOrgs.filter((o) => !existingOrgNames.has(o.name));
// Batch insert repositories to avoid SQLite parameter limit (dynamic by column count)
const sample = newRepos[0];
const columnCount = Object.keys(sample ?? {}).length || 1;
const REPO_BATCH_SIZE = calcBatchSizeForInsert(columnCount);
if (insertedRepos.length > 0) {
await tx.insert(repositories).values(insertedRepos);
for (let i = 0; i < insertedRepos.length; i += REPO_BATCH_SIZE) {
const batch = insertedRepos.slice(i, i + REPO_BATCH_SIZE);
await tx
.insert(repositories)
.values(batch)
.onConflictDoNothing({ target: [repositories.userId, repositories.fullName] });
}
}
// Batch insert organizations (they have fewer fields, so we can use larger batches)
const ORG_BATCH_SIZE = 100;
if (insertedOrgs.length > 0) {
await tx.insert(organizations).values(insertedOrgs);
for (let i = 0; i < insertedOrgs.length; i += ORG_BATCH_SIZE) {
const batch = insertedOrgs.slice(i, i + ORG_BATCH_SIZE);
await tx.insert(organizations).values(batch);
}
}
});

View File

@@ -122,25 +122,36 @@ export const POST: APIRoute = async ({ request }) => {
destinationOrg: null,
isPrivate: repo.private,
isForked: repo.fork,
forkedFrom: undefined,
forkedFrom: null,
hasIssues: repo.has_issues,
isStarred: false,
isArchived: repo.archived,
size: repo.size,
hasLFS: false,
hasSubmodules: false,
language: repo.language || null,
description: repo.description || null,
language: repo.language ?? null,
description: repo.description ?? null,
defaultBranch: repo.default_branch ?? "main",
visibility: (repo.visibility ?? "public") as RepositoryVisibility,
status: "imported" as RepoStatus,
lastMirrored: undefined,
errorMessage: undefined,
lastMirrored: null,
errorMessage: null,
createdAt: repo.created_at ? new Date(repo.created_at) : new Date(),
updatedAt: repo.updated_at ? new Date(repo.updated_at) : new Date(),
}));
await db.insert(repositories).values(repoRecords);
// Batch insert repositories to avoid SQLite parameter limit
// Compute batch size based on column count
const sample = repoRecords[0];
const columnCount = Object.keys(sample ?? {}).length || 1;
const BATCH_SIZE = Math.max(1, Math.floor(999 / columnCount));
for (let i = 0; i < repoRecords.length; i += BATCH_SIZE) {
const batch = repoRecords.slice(i, i + BATCH_SIZE);
await db
.insert(repositories)
.values(batch)
.onConflictDoNothing({ target: [repositories.userId, repositories.fullName] });
}
// Insert organization metadata
const organizationRecord = {

View File

@@ -80,25 +80,23 @@ export const POST: APIRoute = async ({ request }) => {
cloneUrl: repoData.clone_url,
owner: repoData.owner.login,
organization:
repoData.owner.type === "Organization"
? repoData.owner.login
: undefined,
repoData.owner.type === "Organization" ? repoData.owner.login : null,
isPrivate: repoData.private,
isForked: repoData.fork,
forkedFrom: undefined,
forkedFrom: null,
hasIssues: repoData.has_issues,
isStarred: false,
isArchived: repoData.archived,
size: repoData.size,
hasLFS: false,
hasSubmodules: false,
language: repoData.language || null,
description: repoData.description || null,
language: repoData.language ?? null,
description: repoData.description ?? null,
defaultBranch: repoData.default_branch,
visibility: (repoData.visibility ?? "public") as RepositoryVisibility,
status: "imported" as Repository["status"],
lastMirrored: undefined,
errorMessage: undefined,
lastMirrored: null,
errorMessage: null,
mirroredLocation: "",
destinationOrg: null,
createdAt: repoData.created_at
@@ -109,7 +107,10 @@ export const POST: APIRoute = async ({ request }) => {
: new Date(),
};
await db.insert(repositories).values(metadata);
await db
.insert(repositories)
.values(metadata)
.onConflictDoNothing({ target: [repositories.userId, repositories.fullName] });
createMirrorJob({
userId,