Files
gitea-mirror/src/lib/github.ts
ARUNAVO RAY 6f2e0cbca0 Add GitHub starred-list filtering with searchable selector (#247)
* feat: add starred list filtering and selector UI

* docs: add starred lists UI screenshot

* lib: improve starred list name matching
2026-03-24 07:33:46 +05:30

791 lines
22 KiB
TypeScript

import type { GitOrg, MembershipRole } from "@/types/organizations";
import type { GitRepo, RepoStatus } from "@/types/Repository";
import { Octokit } from "@octokit/rest";
import { throttling } from "@octokit/plugin-throttling";
import type { Config } from "@/types/config";
// Conditionally import rate limit manager (not available in test environment)
let RateLimitManager: any = null;
let publishEvent: any = null;
if (process.env.NODE_ENV !== "test") {
try {
const rateLimitModule = await import("@/lib/rate-limit-manager");
RateLimitManager = rateLimitModule.RateLimitManager;
const eventsModule = await import("@/lib/events");
publishEvent = eventsModule.publishEvent;
} catch (error) {
console.warn("Rate limit manager not available:", error);
}
}
// Extend Octokit with throttling plugin when available (tests may stub Octokit)
// Fallback to base Octokit if .plugin is not present
const MyOctokit: any = (Octokit as any)?.plugin?.call
? (Octokit as any).plugin(throttling)
: (Octokit as any);
/**
* Creates an authenticated Octokit instance with rate limit tracking and throttling
*/
export function createGitHubClient(
token: string,
userId?: string,
username?: string,
): Octokit {
// Create a proper User-Agent to identify our application
// This helps GitHub understand our traffic patterns and can provide better rate limits
const userAgent = username
? `gitea-mirror/3.5.4 (user:${username})`
: "gitea-mirror/3.5.4";
// Support GH_API_URL (preferred) or GITHUB_API_URL (may conflict with GitHub Actions)
// GitHub Actions sets GITHUB_API_URL to https://api.github.com by default
const baseUrl = process.env.GH_API_URL || process.env.GITHUB_API_URL || "https://api.github.com";
const octokit = new MyOctokit({
auth: token, // Always use token for authentication (5000 req/hr vs 60 for unauthenticated)
userAgent, // Identify our application and user
baseUrl, // Configurable for E2E testing
log: {
debug: () => {},
info: console.log,
warn: console.warn,
error: console.error,
},
request: {
// Add default headers for better identification
headers: {
accept: "application/vnd.github.v3+json",
"x-github-api-version": "2022-11-28", // Use a stable API version
},
},
throttle: {
onRateLimit: async (
retryAfter: number,
options: any,
octokit: any,
retryCount: number,
) => {
const isSearch = options.url.includes("/search/");
const maxRetries = isSearch ? 5 : 3; // Search endpoints get more retries
console.warn(
`[GitHub] Rate limit hit for ${options.method} ${options.url}. Retry ${retryCount + 1}/${maxRetries}`,
);
// Update rate limit status and notify UI (if available)
if (userId && RateLimitManager) {
await RateLimitManager.updateFromResponse(userId, {
"retry-after": retryAfter.toString(),
"x-ratelimit-remaining": "0",
"x-ratelimit-reset": (Date.now() / 1000 + retryAfter).toString(),
});
}
if (userId && publishEvent) {
await publishEvent({
userId,
channel: "rate-limit",
payload: {
type: "rate-limited",
provider: "github",
retryAfter,
retryCount,
endpoint: options.url,
message: `Rate limit hit. Waiting ${retryAfter}s before retry ${retryCount + 1}/${maxRetries}...`,
},
});
}
// Retry with exponential backoff
if (retryCount < maxRetries) {
console.log(`[GitHub] Waiting ${retryAfter}s before retry...`);
return true;
}
// Max retries reached
console.error(
`[GitHub] Max retries (${maxRetries}) reached for ${options.url}`,
);
return false;
},
onSecondaryRateLimit: async (
retryAfter: number,
options: any,
octokit: any,
retryCount: number,
) => {
console.warn(
`[GitHub] Secondary rate limit hit for ${options.method} ${options.url}`,
);
// Update status and notify UI (if available)
if (userId && publishEvent) {
await publishEvent({
userId,
channel: "rate-limit",
payload: {
type: "secondary-limited",
provider: "github",
retryAfter,
retryCount,
endpoint: options.url,
message: `Secondary rate limit hit. Waiting ${retryAfter}s...`,
},
});
}
// Retry up to 2 times for secondary rate limits
if (retryCount < 2) {
console.log(
`[GitHub] Waiting ${retryAfter}s for secondary rate limit...`,
);
return true;
}
return false;
},
// Throttle options to prevent hitting limits
fallbackSecondaryRateRetryAfter: 60, // Wait 60s on secondary rate limit
minimumSecondaryRateRetryAfter: 5, // Min 5s wait
retryAfterBaseValue: 1000, // Base retry in ms
},
});
// Add rate limit tracking hooks if userId is provided and RateLimitManager is available
if (userId && RateLimitManager) {
octokit.hook.after("request", async (response: any, _options: any) => {
if (response.headers) {
await RateLimitManager.updateFromResponse(userId, response.headers);
}
});
octokit.hook.error("request", async (error: any, options: any) => {
// Handle rate limit errors
if (error.status === 403 || error.status === 429) {
const message = error.message || "";
if (
message.includes("rate limit") ||
message.includes("API rate limit")
) {
console.error(
`[GitHub] Rate limit error for user ${userId}: ${message}`,
);
// Update rate limit status from error response (if available)
if (error.response?.headers && RateLimitManager) {
await RateLimitManager.updateFromResponse(
userId,
error.response.headers,
);
}
// Create error event for UI (if available)
if (publishEvent) {
await publishEvent({
userId,
channel: "rate-limit",
payload: {
type: "error",
provider: "github",
error: message,
endpoint: options.url,
message: `Rate limit exceeded: ${message}`,
},
});
}
}
}
throw error;
});
}
return octokit;
}
/**
* Clone a repository from GitHub
*/
export async function getGithubRepoCloneUrl({
octokit,
owner,
repo,
}: {
octokit: Octokit;
owner: string;
repo: string;
}): Promise<{ url: string; cloneUrl: string }> {
const { data } = await octokit.repos.get({
owner,
repo,
});
return {
url: data.html_url,
cloneUrl: data.clone_url,
};
}
/**
* Get user repositories from GitHub
* todo: need to handle pagination and apply more filters based on user config
*/
export async function getGithubRepositories({
octokit,
config,
}: {
octokit: Octokit;
config: Partial<Config>;
}): Promise<GitRepo[]> {
try {
const repos = await octokit.paginate(
octokit.repos.listForAuthenticatedUser,
{ per_page: 100 },
);
const skipForks = config.githubConfig?.skipForks ?? false;
const filteredRepos = repos.filter((repo) => {
const isForkAllowed = !skipForks || !repo.fork;
return isForkAllowed;
});
return filteredRepos.map((repo) => ({
name: repo.name,
fullName: repo.full_name,
url: repo.html_url,
cloneUrl: repo.clone_url,
owner: repo.owner.login,
organization:
repo.owner.type === "Organization" ? repo.owner.login : undefined,
mirroredLocation: "",
destinationOrg: null,
isPrivate: repo.private,
isForked: repo.fork,
forkedFrom: (repo as typeof repo & { parent?: { full_name: string } })
.parent?.full_name,
hasIssues: repo.has_issues,
isStarred: false,
isArchived: repo.archived,
size: repo.size,
hasLFS: false,
hasSubmodules: false,
language: repo.language,
description: repo.description,
defaultBranch: repo.default_branch,
visibility: (repo.visibility ?? "public") as GitRepo["visibility"],
status: "imported",
isDisabled: repo.disabled ?? false,
lastMirrored: undefined,
errorMessage: undefined,
importedAt: new Date(),
createdAt: repo.created_at ? new Date(repo.created_at) : new Date(),
updatedAt: repo.updated_at ? new Date(repo.updated_at) : new Date(),
}));
} catch (error) {
throw new Error(
`Error fetching repositories: ${
error instanceof Error ? error.message : String(error)
}`,
);
}
}
function getStarredListMatchKey(rawValue: string): string {
const normalized = rawValue.normalize("NFKC").trim().toLowerCase();
const tokens = normalized.match(/[\p{L}\p{N}]+/gu);
return tokens ? tokens.join("") : "";
}
function normalizeStarredListNames(rawLists: unknown): string[] {
if (!Array.isArray(rawLists)) return [];
const deduped = new Map<string, string>();
for (const value of rawLists) {
if (typeof value !== "string") continue;
const trimmed = value.trim();
if (!trimmed) continue;
const matchKey = getStarredListMatchKey(trimmed);
if (!matchKey || deduped.has(matchKey)) continue;
deduped.set(matchKey, trimmed);
}
return [...deduped.values()];
}
function toHttpsCloneUrl(repoUrl: string): string {
return repoUrl.endsWith(".git") ? repoUrl : `${repoUrl}.git`;
}
interface GitHubStarListNode {
id: string;
name: string;
}
interface GitHubRepositoryListItem {
__typename: "Repository";
name: string;
nameWithOwner: string;
url: string;
sshUrl: string;
isPrivate: boolean;
isFork: boolean;
isArchived: boolean;
isDisabled: boolean;
hasIssuesEnabled: boolean;
diskUsage: number;
description: string | null;
defaultBranchRef: { name: string } | null;
visibility: "PUBLIC" | "PRIVATE" | "INTERNAL";
updatedAt: string;
createdAt: string;
owner: {
__typename: "Organization" | "User" | string;
login: string;
};
primaryLanguage: {
name: string;
} | null;
}
async function getGithubStarLists(octokit: Octokit): Promise<GitHubStarListNode[]> {
const allLists: GitHubStarListNode[] = [];
let cursor: string | null = null;
do {
const result = await octokit.graphql<{
viewer: {
lists: {
nodes: Array<GitHubStarListNode | null> | null;
pageInfo: {
hasNextPage: boolean;
endCursor: string | null;
};
};
};
}>(
`
query($after: String) {
viewer {
lists(first: 50, after: $after) {
nodes {
id
name
}
pageInfo {
hasNextPage
endCursor
}
}
}
}
`,
{ after: cursor },
);
const lists = (result.viewer.lists.nodes ?? []).filter(
(list): list is GitHubStarListNode =>
!!list &&
typeof list.id === "string" &&
typeof list.name === "string",
);
allLists.push(...lists);
if (!result.viewer.lists.pageInfo.hasNextPage) break;
cursor = result.viewer.lists.pageInfo.endCursor;
} while (cursor);
return allLists;
}
async function getGithubRepositoriesForStarList(
octokit: Octokit,
listId: string,
): Promise<GitHubRepositoryListItem[]> {
const repositories: GitHubRepositoryListItem[] = [];
let cursor: string | null = null;
do {
const result = await octokit.graphql<{
node: {
items: {
nodes: Array<GitHubRepositoryListItem | null> | null;
pageInfo: {
hasNextPage: boolean;
endCursor: string | null;
};
};
} | null;
}>(
`
query($listId: ID!, $after: String) {
node(id: $listId) {
... on UserList {
items(first: 100, after: $after) {
nodes {
__typename
... on Repository {
name
nameWithOwner
url
sshUrl
isPrivate
isFork
isArchived
isDisabled
hasIssuesEnabled
diskUsage
description
defaultBranchRef {
name
}
visibility
updatedAt
createdAt
owner {
__typename
login
}
primaryLanguage {
name
}
}
}
pageInfo {
hasNextPage
endCursor
}
}
}
}
}
`,
{ listId, after: cursor },
);
const listNode = result.node;
if (!listNode) break;
const nodes = listNode.items.nodes ?? [];
for (const node of nodes) {
if (node?.__typename === "Repository") {
repositories.push(node);
}
}
if (!listNode.items.pageInfo.hasNextPage) break;
cursor = listNode.items.pageInfo.endCursor;
} while (cursor);
return repositories;
}
function mapGraphqlRepoToGitRepo(repo: GitHubRepositoryListItem): GitRepo {
const visibility = (repo.visibility ?? "PUBLIC").toLowerCase() as GitRepo["visibility"];
const createdAt = repo.createdAt ? new Date(repo.createdAt) : new Date();
const updatedAt = repo.updatedAt ? new Date(repo.updatedAt) : new Date();
return {
name: repo.name,
fullName: repo.nameWithOwner,
url: repo.url,
cloneUrl: toHttpsCloneUrl(repo.url),
owner: repo.owner.login,
organization: repo.owner.__typename === "Organization" ? repo.owner.login : undefined,
mirroredLocation: "",
destinationOrg: null,
isPrivate: repo.isPrivate,
isForked: repo.isFork,
forkedFrom: undefined,
hasIssues: repo.hasIssuesEnabled,
isStarred: true,
isArchived: repo.isArchived,
size: repo.diskUsage ?? 0,
hasLFS: false,
hasSubmodules: false,
language: repo.primaryLanguage?.name ?? null,
description: repo.description,
defaultBranch: repo.defaultBranchRef?.name || "main",
visibility,
status: "imported",
isDisabled: repo.isDisabled,
lastMirrored: undefined,
errorMessage: undefined,
importedAt: new Date(),
createdAt,
updatedAt,
};
}
export async function getGithubStarredRepositories({
octokit,
config,
}: {
octokit: Octokit;
config: Partial<Config>;
}): Promise<GitRepo[]> {
try {
const configuredLists = normalizeStarredListNames(
config.githubConfig?.starredLists,
);
if (configuredLists.length > 0) {
const allLists = await getGithubStarLists(octokit);
const configuredMatchKeySet = new Set(
configuredLists.map((list) => getStarredListMatchKey(list)),
);
const matchedLists = allLists.filter((list) =>
configuredMatchKeySet.has(getStarredListMatchKey(list.name)),
);
if (matchedLists.length === 0) {
const availableListNames = normalizeStarredListNames(
allLists.map((list) => list.name),
);
const preview = availableListNames.slice(0, 20).join(", ");
const availableSuffix = preview
? `. Available lists: ${preview}${availableListNames.length > 20 ? ", ..." : ""}`
: "";
throw new Error(
`Configured GitHub star lists not found: ${configuredLists.join(", ")}${availableSuffix}`,
);
}
const deduped = new Map<string, GitRepo>();
for (const list of matchedLists) {
const repos = await getGithubRepositoriesForStarList(octokit, list.id);
for (const repo of repos) {
const key = repo.nameWithOwner.toLowerCase();
if (deduped.has(key)) continue;
deduped.set(key, mapGraphqlRepoToGitRepo(repo));
}
}
return [...deduped.values()];
}
const starredRepos = await octokit.paginate(
octokit.activity.listReposStarredByAuthenticatedUser,
{
per_page: 100,
},
);
return starredRepos.map((repo) => ({
name: repo.name,
fullName: repo.full_name,
url: repo.html_url,
cloneUrl: repo.clone_url,
owner: repo.owner.login,
organization:
repo.owner.type === "Organization" ? repo.owner.login : undefined,
mirroredLocation: "",
destinationOrg: null,
isPrivate: repo.private,
isForked: repo.fork,
forkedFrom: undefined,
hasIssues: repo.has_issues,
isStarred: true,
isArchived: repo.archived,
size: repo.size,
hasLFS: false, // Placeholder
hasSubmodules: false, // Placeholder
language: repo.language,
description: repo.description,
defaultBranch: repo.default_branch,
visibility: (repo.visibility ?? "public") as GitRepo["visibility"],
status: "imported",
isDisabled: repo.disabled ?? false,
lastMirrored: undefined,
errorMessage: undefined,
importedAt: new Date(),
createdAt: repo.created_at ? new Date(repo.created_at) : new Date(),
updatedAt: repo.updated_at ? new Date(repo.updated_at) : new Date(),
}));
} catch (error) {
throw new Error(
`Error fetching starred repositories: ${
error instanceof Error ? error.message : String(error)
}`,
);
}
}
export async function getGithubStarredListNames({
octokit,
}: {
octokit: Octokit;
}): Promise<string[]> {
const lists = await getGithubStarLists(octokit);
return normalizeStarredListNames(lists.map((list) => list.name));
}
/**
* Get user github organizations
*/
export async function getGithubOrganizations({
octokit,
config,
}: {
octokit: Octokit;
config: Partial<Config>;
}): Promise<{ organizations: GitOrg[]; failedOrgs: { name: string; avatarUrl: string; reason: string }[] }> {
try {
const { data: orgs } = await octokit.orgs.listForAuthenticatedUser({
per_page: 100,
});
// Get excluded organizations from environment variable
const excludedOrgsEnv = process.env.GITHUB_EXCLUDED_ORGS;
const excludedOrgs = excludedOrgsEnv
? excludedOrgsEnv.split(",").map((org) => org.trim().toLowerCase())
: [];
// Filter out excluded organizations
const filteredOrgs = orgs.filter((org) => {
if (excludedOrgs.includes(org.login.toLowerCase())) {
console.log(
`Skipping organization ${org.login} - excluded via GITHUB_EXCLUDED_ORGS environment variable`,
);
return false;
}
return true;
});
const failedOrgs: { name: string; avatarUrl: string; reason: string }[] = [];
const results = await Promise.all(
filteredOrgs.map(async (org) => {
try {
const [{ data: orgDetails }, { data: membership }] = await Promise.all([
octokit.orgs.get({ org: org.login }),
octokit.orgs.getMembershipForAuthenticatedUser({ org: org.login }),
]);
const totalRepos =
orgDetails.public_repos + (orgDetails.total_private_repos ?? 0);
return {
name: org.login,
avatarUrl: org.avatar_url,
membershipRole: membership.role as MembershipRole,
isIncluded: false,
status: "imported" as RepoStatus,
repositoryCount: totalRepos,
createdAt: new Date(),
updatedAt: new Date(),
};
} catch (error: any) {
// Capture organizations that return 403 (SAML enforcement, insufficient token scope, etc.)
if (error?.status === 403) {
const reason = error?.message || "access denied";
console.warn(
`Failed to import organization ${org.login} - ${reason}`,
);
failedOrgs.push({ name: org.login, avatarUrl: org.avatar_url, reason });
return null;
}
throw error;
}
}),
);
return {
organizations: results.filter((org): org is NonNullable<typeof org> => org !== null),
failedOrgs,
};
} catch (error) {
throw new Error(
`Error fetching organizations: ${
error instanceof Error ? error.message : String(error)
}`,
);
}
}
/**
* Get repositories for a specific organization
*/
export async function getGithubOrganizationRepositories({
octokit,
organizationName,
}: {
octokit: Octokit;
organizationName: string;
}): Promise<GitRepo[]> {
try {
const repos = await octokit.paginate(octokit.repos.listForOrg, {
org: organizationName,
per_page: 100,
});
return repos.map((repo) => ({
name: repo.name,
fullName: repo.full_name,
url: repo.html_url,
cloneUrl: repo.clone_url ?? "",
owner: repo.owner.login,
organization: repo.owner.login,
mirroredLocation: "",
destinationOrg: null,
isPrivate: repo.private,
isForked: repo.fork,
forkedFrom: (repo as typeof repo & { parent?: { full_name: string } })
.parent?.full_name,
hasIssues: repo.has_issues ?? false,
isStarred: false, // Organization starred repos are separate API
isArchived: repo.archived ?? false,
size: repo.size ?? 0,
hasLFS: false,
hasSubmodules: false,
language: repo.language,
description: repo.description,
defaultBranch: repo.default_branch ?? "main",
visibility: (repo.visibility ?? "public") as GitRepo["visibility"],
status: "imported",
isDisabled: repo.disabled ?? false,
lastMirrored: undefined,
errorMessage: undefined,
importedAt: new Date(),
createdAt: repo.created_at ? new Date(repo.created_at) : new Date(),
updatedAt: repo.updated_at ? new Date(repo.updated_at) : new Date(),
}));
} catch (error) {
throw new Error(
`Error fetching organization repositories: ${
error instanceof Error ? error.message : String(error)
}`,
);
}
}