diff --git a/lib/providers/rules/hosting.ts b/lib/providers/rules/hosting.ts index b26773a..39acd45 100644 --- a/lib/providers/rules/hosting.ts +++ b/lib/providers/rules/hosting.ts @@ -135,7 +135,12 @@ export const HOSTING_PROVIDERS: Array< name: "Render", domain: "render.com", category: "hosting", - rule: { kind: "headerEquals", name: "server", value: "render" }, + rule: { + any: [ + { kind: "headerEquals", name: "server", value: "render" }, + { kind: "headerPresent", name: "x-render-origin-server" }, + ], + }, }, { name: "Squarespace", @@ -240,12 +245,6 @@ export const HOSTING_PROVIDERS: Array< ], }, }, - { - name: "Render", - domain: "render.com", - category: "hosting", - rule: { kind: "headerPresent", name: "x-render-origin-server" }, - }, { name: "Ghost", domain: "ghost.org", diff --git a/scripts/db/seed.ts b/scripts/db/seed.ts index b2884c4..59cd340 100644 --- a/scripts/db/seed.ts +++ b/scripts/db/seed.ts @@ -4,8 +4,13 @@ * This script syncs the provider catalog to the database by: * - Inserting new catalog providers (matched by name/slug) * - Updating existing providers to match catalog definitions (matched by name/slug) + * - Replacing "discovered" providers with catalog providers when rules match + * + * Providers are matched by: + * 1. Primary: slug (derived from name) within the same category + * 2. Secondary: rule evaluation for discovered providers (e.g., catalog provider + * with mxSuffix "tutanota.de" replaces discovered provider "mail.tutanota.de") * - * Providers are matched solely by their slug (derived from name). * Multiple providers can share the same domain (e.g., Amazon S3, CloudFront both use aws.amazon.com). * * Usage: @@ -21,7 +26,13 @@ dotenv.config(); import { eq, sql } from "drizzle-orm"; import { db } from "@/lib/db/client"; -import { type providerCategory, providers } from "@/lib/db/schema"; +import { + certificates, + hosting, + type providerCategory, + providers, + registrations, +} from "@/lib/db/schema"; import { CA_PROVIDERS, DNS_PROVIDERS, @@ -29,23 +40,28 @@ import { HOSTING_PROVIDERS, REGISTRAR_PROVIDERS, } from "@/lib/providers/catalog"; +import { evalRule } from "@/lib/providers/detection"; +import type { DetectionContext, Provider, Rule } from "@/lib/schemas"; import { slugify } from "@/lib/slugify"; type SeedDef = { name: string; domain: string | null; category: (typeof providerCategory.enumValues)[number]; + rule?: Rule; aliases?: string[]; }; function collect(): SeedDef[] { const arr: SeedDef[] = []; - const push = ( - cat: SeedDef["category"], - src: { name: string; domain: string }[], - ) => { + const push = (cat: SeedDef["category"], src: Provider[]) => { for (const p of src) - arr.push({ name: p.name, domain: p.domain ?? null, category: cat }); + arr.push({ + name: p.name, + domain: p.domain ?? null, + category: cat, + rule: p.rule, + }); }; push("dns", DNS_PROVIDERS); push("email", EMAIL_PROVIDERS); @@ -55,6 +71,69 @@ function collect(): SeedDef[] { return arr; } +/** + * Check if a catalog provider's rules would match a discovered provider. + * + * For example: + * - Catalog provider "Tuta" with mxSuffix "tutanota.de" + * - Discovered provider named "mail.tutanota.de" (from MX record) + * - Returns true because the catalog rule matches the discovered name + */ +function catalogRuleMatchesDiscovered( + catalogDef: SeedDef, + discoveredProvider: { name: string; domain: string | null }, +): boolean { + if (!catalogDef.rule) return false; + + // Build detection context based on discovered provider's name/domain + // The discovered name is typically extracted from DNS records (MX/NS/etc.) + const ctx: DetectionContext = { + headers: {}, + mx: [], + ns: [], + }; + + // Populate context based on category to test if catalog rules would match + switch (catalogDef.category) { + case "email": + // Discovered email providers are typically auto-created from MX record hostnames + ctx.mx = [discoveredProvider.name]; + if (discoveredProvider.domain) ctx.mx.push(discoveredProvider.domain); + break; + case "dns": + // Discovered DNS providers are typically auto-created from NS record hostnames + ctx.ns = [discoveredProvider.name]; + if (discoveredProvider.domain) ctx.ns.push(discoveredProvider.domain); + break; + case "hosting": + // Hosting providers use header-based detection, harder to match retrospectively + // Skip rule-based matching for hosting + return false; + case "ca": + // CA providers use issuer string detection + if (discoveredProvider.name) { + ctx.issuer = discoveredProvider.name.toLowerCase(); + } + break; + case "registrar": + // Registrar providers use registrar name detection + if (discoveredProvider.name) { + ctx.registrar = discoveredProvider.name.toLowerCase(); + } + break; + } + + try { + return evalRule(catalogDef.rule, ctx); + } catch (err) { + console.warn( + `Failed to evaluate rule for ${catalogDef.name} against ${discoveredProvider.name}:`, + err, + ); + return false; + } +} + async function main() { // Parse command-line arguments const isDryRun = process.argv.includes("--dry-run"); @@ -95,22 +174,71 @@ async function main() { const lowerDomain = def.domain ? def.domain.toLowerCase() : null; // Match by slug (name-based matching) - // The slug (derived from provider name) is the ONLY identifier + // The slug (derived from provider name) is the ONLY identifier for catalog-to-catalog // Domain is NOT unique since multiple services share parent company domains // (e.g., Amazon S3, CloudFront, Route 53 all use aws.amazon.com) const slugKey = `${def.category}:${slug}`; const existing = bySlug.get(slugKey); if (!existing) { - // New record - queue for insert - toInsert.push({ - name: def.name, - domain: lowerDomain, - category: def.category, - slug, - source: "catalog", - }); - inserted++; + // No exact slug match - check if catalog rules match any discovered providers + let ruleMatched = false; + + if (def.rule) { + for (const [_existingSlugKey, existingProvider] of bySlug.entries()) { + // Only consider discovered providers in the same category + if ( + existingProvider.source === "discovered" && + existingProvider.category === def.category + ) { + if ( + catalogRuleMatchesDiscovered(def, { + name: existingProvider.name, + domain: existingProvider.domain, + }) + ) { + // Catalog rule matches this discovered provider - replace it + toUpdate.push({ + id: existingProvider.id, + name: def.name, + slug, + source: "catalog", + domain: lowerDomain, + }); + updated++; + ruleMatched = true; + + // Update the bySlug map to prevent duplicate matches + const oldSlugKey = `${existingProvider.category}:${existingProvider.slug}`; + bySlug.delete(oldSlugKey); + bySlug.set(slugKey, { + ...existingProvider, + name: def.name, + slug, + source: "catalog", + domain: lowerDomain, + }); + + console.log( + ` ๐Ÿ“ Replacing discovered "${existingProvider.name}" with catalog "${def.name}" (rule match)`, + ); + break; // Only replace one discovered provider per catalog entry + } + } + } + } + + if (!ruleMatched) { + // New record - queue for insert + toInsert.push({ + name: def.name, + domain: lowerDomain, + category: def.category, + slug, + source: "catalog", + }); + inserted++; + } } else { // Existing record - check if update is needed const needsUpdate = @@ -146,17 +274,33 @@ async function main() { // Batch insert new providers if (toInsert.length > 0) { + // Deduplicate by (category, slug) to prevent constraint violations + // This handles cases where multiple catalog entries map to the same slug + // (e.g., multiple rules for the same provider) + const uniqueInserts = new Map(); + for (const ins of toInsert) { + const key = `${ins.category}:${ins.slug}`; + if (!uniqueInserts.has(key)) { + uniqueInserts.set(key, ins); + } else { + console.warn( + `โš ๏ธ Skipping duplicate insert for "${ins.name}" (slug: ${ins.slug}) - already queued`, + ); + } + } + const deduplicatedInserts = Array.from(uniqueInserts.values()); + console.log( - `${isDryRun ? "[DRY RUN] Would insert" : "Inserting"} ${toInsert.length} new provider(s)...`, + `${isDryRun ? "[DRY RUN] Would insert" : "Inserting"} ${deduplicatedInserts.length} new provider(s)...`, ); if (isDryRun) { - for (const ins of toInsert) { + for (const ins of deduplicatedInserts) { console.log( ` - ${ins.category}: ${ins.name} (${ins.domain || "no domain"})`, ); } } else { - await db.insert(providers).values(toInsert); + await db.insert(providers).values(deduplicatedInserts); } } @@ -185,12 +329,175 @@ async function main() { } } + // Cleanup phase: Remove orphaned discovered providers that match catalog rules + // This handles leftovers from the old logic where both catalog and discovered versions exist + console.log("\nCleaning up orphaned discovered providers..."); + + // Refresh the provider list after inserts/updates + const refreshedProviders = await db.select().from(providers); + + // Build a map of catalog providers by category + const catalogByCategory = new Map< + string, + Array<{ + id: string; + name: string; + domain: string | null; + rule?: Rule; + }> + >(); + + for (const def of defs) { + const slug = slugify(def.name); + + // Find the corresponding provider in the refreshed list + const provider = refreshedProviders.find( + (p) => p.category === def.category && p.slug === slug, + ); + + if (provider && def.rule) { + if (!catalogByCategory.has(def.category)) { + catalogByCategory.set(def.category, []); + } + catalogByCategory.get(def.category)?.push({ + id: provider.id, + name: provider.name, + domain: provider.domain, + rule: def.rule, + }); + } + } + + // Find discovered providers that should be merged into catalog providers + const toCleanup: Array<{ + discoveredId: string; + discoveredName: string; + catalogId: string; + catalogName: string; + }> = []; + + for (const discovered of refreshedProviders) { + // Only process discovered providers + if (discovered.source !== "discovered") continue; + + const catalogProviders = catalogByCategory.get(discovered.category) ?? []; + + // Check if any catalog provider's rules match this discovered provider + for (const catalog of catalogProviders) { + // Skip if this is the same provider (shouldn't happen, but safeguard) + if (catalog.id === discovered.id) continue; + + if ( + catalogRuleMatchesDiscovered( + { + name: catalog.name, + domain: catalog.domain, + category: discovered.category, + rule: catalog.rule, + }, + { + name: discovered.name, + domain: discovered.domain, + }, + ) + ) { + toCleanup.push({ + discoveredId: discovered.id, + discoveredName: discovered.name, + catalogId: catalog.id, + catalogName: catalog.name, + }); + break; // Only match one catalog provider per discovered provider + } + } + } + + let cleaned = 0; + + if (toCleanup.length > 0) { + console.log( + `${isDryRun ? "[DRY RUN] Would clean up" : "Cleaning up"} ${toCleanup.length} orphaned provider(s)...`, + ); + + for (const cleanup of toCleanup) { + console.log( + ` ๐Ÿงน Merging "${cleanup.discoveredName}" โ†’ "${cleanup.catalogName}"`, + ); + + if (!isDryRun) { + try { + // Wrap all FK migrations and deletion in a single transaction + // to ensure atomicity - either all succeed or all rollback + await db.transaction(async (tx) => { + // Migrate foreign key references from discovered โ†’ catalog + // Update registrations table + await tx + .update(registrations) + .set({ registrarProviderId: cleanup.catalogId }) + .where( + eq(registrations.registrarProviderId, cleanup.discoveredId), + ); + + await tx + .update(registrations) + .set({ resellerProviderId: cleanup.catalogId }) + .where( + eq(registrations.resellerProviderId, cleanup.discoveredId), + ); + + // Update certificates table + await tx + .update(certificates) + .set({ caProviderId: cleanup.catalogId }) + .where(eq(certificates.caProviderId, cleanup.discoveredId)); + + // Update hosting table + await tx + .update(hosting) + .set({ hostingProviderId: cleanup.catalogId }) + .where(eq(hosting.hostingProviderId, cleanup.discoveredId)); + + await tx + .update(hosting) + .set({ emailProviderId: cleanup.catalogId }) + .where(eq(hosting.emailProviderId, cleanup.discoveredId)); + + await tx + .update(hosting) + .set({ dnsProviderId: cleanup.catalogId }) + .where(eq(hosting.dnsProviderId, cleanup.discoveredId)); + + // Delete the orphaned discovered provider + await tx + .delete(providers) + .where(eq(providers.id, cleanup.discoveredId)); + }); + + cleaned++; + } catch (err) { + console.error( + `โŒ Failed to merge "${cleanup.discoveredName}" โ†’ "${cleanup.catalogName}":`, + err, + ); + throw err; // Re-throw to fail the script and prevent incomplete migrations + } + } else { + // Dry run: just count what would be cleaned + cleaned++; + } + } + } else { + console.log(" โœจ No orphaned providers found"); + } + if (isDryRun) { console.log( - `\nโœ… DRY RUN COMPLETE: Would have inserted ${inserted}, updated ${updated}`, + `\nโœ… DRY RUN COMPLETE: Would have inserted ${inserted}, updated ${updated}, cleaned ${cleaned}`, ); } else { - console.log(`\nโœ… Seeded ${inserted} inserted, ${updated} updated`); + console.log( + `\nโœ… Seeded ${inserted} inserted, ${updated} updated, ${cleaned} cleaned`, + ); } }