You've already forked domainstack.io
mirror of
https://github.com/jakejarvis/domainstack.io.git
synced 2025-12-02 19:33:48 -05:00
feat: enhance provider seeding logic to support rule-based matching and cleanup of orphaned discovered providers (#204)
This commit is contained in:
@@ -135,7 +135,12 @@ export const HOSTING_PROVIDERS: Array<
|
|||||||
name: "Render",
|
name: "Render",
|
||||||
domain: "render.com",
|
domain: "render.com",
|
||||||
category: "hosting",
|
category: "hosting",
|
||||||
rule: { kind: "headerEquals", name: "server", value: "render" },
|
rule: {
|
||||||
|
any: [
|
||||||
|
{ kind: "headerEquals", name: "server", value: "render" },
|
||||||
|
{ kind: "headerPresent", name: "x-render-origin-server" },
|
||||||
|
],
|
||||||
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "Squarespace",
|
name: "Squarespace",
|
||||||
@@ -240,12 +245,6 @@ export const HOSTING_PROVIDERS: Array<
|
|||||||
],
|
],
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "Render",
|
|
||||||
domain: "render.com",
|
|
||||||
category: "hosting",
|
|
||||||
rule: { kind: "headerPresent", name: "x-render-origin-server" },
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "Ghost",
|
name: "Ghost",
|
||||||
domain: "ghost.org",
|
domain: "ghost.org",
|
||||||
|
|||||||
@@ -4,8 +4,13 @@
|
|||||||
* This script syncs the provider catalog to the database by:
|
* This script syncs the provider catalog to the database by:
|
||||||
* - Inserting new catalog providers (matched by name/slug)
|
* - Inserting new catalog providers (matched by name/slug)
|
||||||
* - Updating existing providers to match catalog definitions (matched by name/slug)
|
* - Updating existing providers to match catalog definitions (matched by name/slug)
|
||||||
|
* - Replacing "discovered" providers with catalog providers when rules match
|
||||||
|
*
|
||||||
|
* Providers are matched by:
|
||||||
|
* 1. Primary: slug (derived from name) within the same category
|
||||||
|
* 2. Secondary: rule evaluation for discovered providers (e.g., catalog provider
|
||||||
|
* with mxSuffix "tutanota.de" replaces discovered provider "mail.tutanota.de")
|
||||||
*
|
*
|
||||||
* Providers are matched solely by their slug (derived from name).
|
|
||||||
* Multiple providers can share the same domain (e.g., Amazon S3, CloudFront both use aws.amazon.com).
|
* Multiple providers can share the same domain (e.g., Amazon S3, CloudFront both use aws.amazon.com).
|
||||||
*
|
*
|
||||||
* Usage:
|
* Usage:
|
||||||
@@ -21,7 +26,13 @@ dotenv.config();
|
|||||||
|
|
||||||
import { eq, sql } from "drizzle-orm";
|
import { eq, sql } from "drizzle-orm";
|
||||||
import { db } from "@/lib/db/client";
|
import { db } from "@/lib/db/client";
|
||||||
import { type providerCategory, providers } from "@/lib/db/schema";
|
import {
|
||||||
|
certificates,
|
||||||
|
hosting,
|
||||||
|
type providerCategory,
|
||||||
|
providers,
|
||||||
|
registrations,
|
||||||
|
} from "@/lib/db/schema";
|
||||||
import {
|
import {
|
||||||
CA_PROVIDERS,
|
CA_PROVIDERS,
|
||||||
DNS_PROVIDERS,
|
DNS_PROVIDERS,
|
||||||
@@ -29,23 +40,28 @@ import {
|
|||||||
HOSTING_PROVIDERS,
|
HOSTING_PROVIDERS,
|
||||||
REGISTRAR_PROVIDERS,
|
REGISTRAR_PROVIDERS,
|
||||||
} from "@/lib/providers/catalog";
|
} from "@/lib/providers/catalog";
|
||||||
|
import { evalRule } from "@/lib/providers/detection";
|
||||||
|
import type { DetectionContext, Provider, Rule } from "@/lib/schemas";
|
||||||
import { slugify } from "@/lib/slugify";
|
import { slugify } from "@/lib/slugify";
|
||||||
|
|
||||||
type SeedDef = {
|
type SeedDef = {
|
||||||
name: string;
|
name: string;
|
||||||
domain: string | null;
|
domain: string | null;
|
||||||
category: (typeof providerCategory.enumValues)[number];
|
category: (typeof providerCategory.enumValues)[number];
|
||||||
|
rule?: Rule;
|
||||||
aliases?: string[];
|
aliases?: string[];
|
||||||
};
|
};
|
||||||
|
|
||||||
function collect(): SeedDef[] {
|
function collect(): SeedDef[] {
|
||||||
const arr: SeedDef[] = [];
|
const arr: SeedDef[] = [];
|
||||||
const push = (
|
const push = (cat: SeedDef["category"], src: Provider[]) => {
|
||||||
cat: SeedDef["category"],
|
|
||||||
src: { name: string; domain: string }[],
|
|
||||||
) => {
|
|
||||||
for (const p of src)
|
for (const p of src)
|
||||||
arr.push({ name: p.name, domain: p.domain ?? null, category: cat });
|
arr.push({
|
||||||
|
name: p.name,
|
||||||
|
domain: p.domain ?? null,
|
||||||
|
category: cat,
|
||||||
|
rule: p.rule,
|
||||||
|
});
|
||||||
};
|
};
|
||||||
push("dns", DNS_PROVIDERS);
|
push("dns", DNS_PROVIDERS);
|
||||||
push("email", EMAIL_PROVIDERS);
|
push("email", EMAIL_PROVIDERS);
|
||||||
@@ -55,6 +71,69 @@ function collect(): SeedDef[] {
|
|||||||
return arr;
|
return arr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if a catalog provider's rules would match a discovered provider.
|
||||||
|
*
|
||||||
|
* For example:
|
||||||
|
* - Catalog provider "Tuta" with mxSuffix "tutanota.de"
|
||||||
|
* - Discovered provider named "mail.tutanota.de" (from MX record)
|
||||||
|
* - Returns true because the catalog rule matches the discovered name
|
||||||
|
*/
|
||||||
|
function catalogRuleMatchesDiscovered(
|
||||||
|
catalogDef: SeedDef,
|
||||||
|
discoveredProvider: { name: string; domain: string | null },
|
||||||
|
): boolean {
|
||||||
|
if (!catalogDef.rule) return false;
|
||||||
|
|
||||||
|
// Build detection context based on discovered provider's name/domain
|
||||||
|
// The discovered name is typically extracted from DNS records (MX/NS/etc.)
|
||||||
|
const ctx: DetectionContext = {
|
||||||
|
headers: {},
|
||||||
|
mx: [],
|
||||||
|
ns: [],
|
||||||
|
};
|
||||||
|
|
||||||
|
// Populate context based on category to test if catalog rules would match
|
||||||
|
switch (catalogDef.category) {
|
||||||
|
case "email":
|
||||||
|
// Discovered email providers are typically auto-created from MX record hostnames
|
||||||
|
ctx.mx = [discoveredProvider.name];
|
||||||
|
if (discoveredProvider.domain) ctx.mx.push(discoveredProvider.domain);
|
||||||
|
break;
|
||||||
|
case "dns":
|
||||||
|
// Discovered DNS providers are typically auto-created from NS record hostnames
|
||||||
|
ctx.ns = [discoveredProvider.name];
|
||||||
|
if (discoveredProvider.domain) ctx.ns.push(discoveredProvider.domain);
|
||||||
|
break;
|
||||||
|
case "hosting":
|
||||||
|
// Hosting providers use header-based detection, harder to match retrospectively
|
||||||
|
// Skip rule-based matching for hosting
|
||||||
|
return false;
|
||||||
|
case "ca":
|
||||||
|
// CA providers use issuer string detection
|
||||||
|
if (discoveredProvider.name) {
|
||||||
|
ctx.issuer = discoveredProvider.name.toLowerCase();
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case "registrar":
|
||||||
|
// Registrar providers use registrar name detection
|
||||||
|
if (discoveredProvider.name) {
|
||||||
|
ctx.registrar = discoveredProvider.name.toLowerCase();
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
return evalRule(catalogDef.rule, ctx);
|
||||||
|
} catch (err) {
|
||||||
|
console.warn(
|
||||||
|
`Failed to evaluate rule for ${catalogDef.name} against ${discoveredProvider.name}:`,
|
||||||
|
err,
|
||||||
|
);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async function main() {
|
async function main() {
|
||||||
// Parse command-line arguments
|
// Parse command-line arguments
|
||||||
const isDryRun = process.argv.includes("--dry-run");
|
const isDryRun = process.argv.includes("--dry-run");
|
||||||
@@ -95,22 +174,71 @@ async function main() {
|
|||||||
const lowerDomain = def.domain ? def.domain.toLowerCase() : null;
|
const lowerDomain = def.domain ? def.domain.toLowerCase() : null;
|
||||||
|
|
||||||
// Match by slug (name-based matching)
|
// Match by slug (name-based matching)
|
||||||
// The slug (derived from provider name) is the ONLY identifier
|
// The slug (derived from provider name) is the ONLY identifier for catalog-to-catalog
|
||||||
// Domain is NOT unique since multiple services share parent company domains
|
// Domain is NOT unique since multiple services share parent company domains
|
||||||
// (e.g., Amazon S3, CloudFront, Route 53 all use aws.amazon.com)
|
// (e.g., Amazon S3, CloudFront, Route 53 all use aws.amazon.com)
|
||||||
const slugKey = `${def.category}:${slug}`;
|
const slugKey = `${def.category}:${slug}`;
|
||||||
const existing = bySlug.get(slugKey);
|
const existing = bySlug.get(slugKey);
|
||||||
|
|
||||||
if (!existing) {
|
if (!existing) {
|
||||||
// New record - queue for insert
|
// No exact slug match - check if catalog rules match any discovered providers
|
||||||
toInsert.push({
|
let ruleMatched = false;
|
||||||
name: def.name,
|
|
||||||
domain: lowerDomain,
|
if (def.rule) {
|
||||||
category: def.category,
|
for (const [_existingSlugKey, existingProvider] of bySlug.entries()) {
|
||||||
slug,
|
// Only consider discovered providers in the same category
|
||||||
source: "catalog",
|
if (
|
||||||
});
|
existingProvider.source === "discovered" &&
|
||||||
inserted++;
|
existingProvider.category === def.category
|
||||||
|
) {
|
||||||
|
if (
|
||||||
|
catalogRuleMatchesDiscovered(def, {
|
||||||
|
name: existingProvider.name,
|
||||||
|
domain: existingProvider.domain,
|
||||||
|
})
|
||||||
|
) {
|
||||||
|
// Catalog rule matches this discovered provider - replace it
|
||||||
|
toUpdate.push({
|
||||||
|
id: existingProvider.id,
|
||||||
|
name: def.name,
|
||||||
|
slug,
|
||||||
|
source: "catalog",
|
||||||
|
domain: lowerDomain,
|
||||||
|
});
|
||||||
|
updated++;
|
||||||
|
ruleMatched = true;
|
||||||
|
|
||||||
|
// Update the bySlug map to prevent duplicate matches
|
||||||
|
const oldSlugKey = `${existingProvider.category}:${existingProvider.slug}`;
|
||||||
|
bySlug.delete(oldSlugKey);
|
||||||
|
bySlug.set(slugKey, {
|
||||||
|
...existingProvider,
|
||||||
|
name: def.name,
|
||||||
|
slug,
|
||||||
|
source: "catalog",
|
||||||
|
domain: lowerDomain,
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(
|
||||||
|
` 📝 Replacing discovered "${existingProvider.name}" with catalog "${def.name}" (rule match)`,
|
||||||
|
);
|
||||||
|
break; // Only replace one discovered provider per catalog entry
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!ruleMatched) {
|
||||||
|
// New record - queue for insert
|
||||||
|
toInsert.push({
|
||||||
|
name: def.name,
|
||||||
|
domain: lowerDomain,
|
||||||
|
category: def.category,
|
||||||
|
slug,
|
||||||
|
source: "catalog",
|
||||||
|
});
|
||||||
|
inserted++;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// Existing record - check if update is needed
|
// Existing record - check if update is needed
|
||||||
const needsUpdate =
|
const needsUpdate =
|
||||||
@@ -146,17 +274,33 @@ async function main() {
|
|||||||
|
|
||||||
// Batch insert new providers
|
// Batch insert new providers
|
||||||
if (toInsert.length > 0) {
|
if (toInsert.length > 0) {
|
||||||
|
// Deduplicate by (category, slug) to prevent constraint violations
|
||||||
|
// This handles cases where multiple catalog entries map to the same slug
|
||||||
|
// (e.g., multiple rules for the same provider)
|
||||||
|
const uniqueInserts = new Map<string, (typeof toInsert)[number]>();
|
||||||
|
for (const ins of toInsert) {
|
||||||
|
const key = `${ins.category}:${ins.slug}`;
|
||||||
|
if (!uniqueInserts.has(key)) {
|
||||||
|
uniqueInserts.set(key, ins);
|
||||||
|
} else {
|
||||||
|
console.warn(
|
||||||
|
`⚠️ Skipping duplicate insert for "${ins.name}" (slug: ${ins.slug}) - already queued`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const deduplicatedInserts = Array.from(uniqueInserts.values());
|
||||||
|
|
||||||
console.log(
|
console.log(
|
||||||
`${isDryRun ? "[DRY RUN] Would insert" : "Inserting"} ${toInsert.length} new provider(s)...`,
|
`${isDryRun ? "[DRY RUN] Would insert" : "Inserting"} ${deduplicatedInserts.length} new provider(s)...`,
|
||||||
);
|
);
|
||||||
if (isDryRun) {
|
if (isDryRun) {
|
||||||
for (const ins of toInsert) {
|
for (const ins of deduplicatedInserts) {
|
||||||
console.log(
|
console.log(
|
||||||
` - ${ins.category}: ${ins.name} (${ins.domain || "no domain"})`,
|
` - ${ins.category}: ${ins.name} (${ins.domain || "no domain"})`,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
await db.insert(providers).values(toInsert);
|
await db.insert(providers).values(deduplicatedInserts);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -185,12 +329,175 @@ async function main() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Cleanup phase: Remove orphaned discovered providers that match catalog rules
|
||||||
|
// This handles leftovers from the old logic where both catalog and discovered versions exist
|
||||||
|
console.log("\nCleaning up orphaned discovered providers...");
|
||||||
|
|
||||||
|
// Refresh the provider list after inserts/updates
|
||||||
|
const refreshedProviders = await db.select().from(providers);
|
||||||
|
|
||||||
|
// Build a map of catalog providers by category
|
||||||
|
const catalogByCategory = new Map<
|
||||||
|
string,
|
||||||
|
Array<{
|
||||||
|
id: string;
|
||||||
|
name: string;
|
||||||
|
domain: string | null;
|
||||||
|
rule?: Rule;
|
||||||
|
}>
|
||||||
|
>();
|
||||||
|
|
||||||
|
for (const def of defs) {
|
||||||
|
const slug = slugify(def.name);
|
||||||
|
|
||||||
|
// Find the corresponding provider in the refreshed list
|
||||||
|
const provider = refreshedProviders.find(
|
||||||
|
(p) => p.category === def.category && p.slug === slug,
|
||||||
|
);
|
||||||
|
|
||||||
|
if (provider && def.rule) {
|
||||||
|
if (!catalogByCategory.has(def.category)) {
|
||||||
|
catalogByCategory.set(def.category, []);
|
||||||
|
}
|
||||||
|
catalogByCategory.get(def.category)?.push({
|
||||||
|
id: provider.id,
|
||||||
|
name: provider.name,
|
||||||
|
domain: provider.domain,
|
||||||
|
rule: def.rule,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find discovered providers that should be merged into catalog providers
|
||||||
|
const toCleanup: Array<{
|
||||||
|
discoveredId: string;
|
||||||
|
discoveredName: string;
|
||||||
|
catalogId: string;
|
||||||
|
catalogName: string;
|
||||||
|
}> = [];
|
||||||
|
|
||||||
|
for (const discovered of refreshedProviders) {
|
||||||
|
// Only process discovered providers
|
||||||
|
if (discovered.source !== "discovered") continue;
|
||||||
|
|
||||||
|
const catalogProviders = catalogByCategory.get(discovered.category) ?? [];
|
||||||
|
|
||||||
|
// Check if any catalog provider's rules match this discovered provider
|
||||||
|
for (const catalog of catalogProviders) {
|
||||||
|
// Skip if this is the same provider (shouldn't happen, but safeguard)
|
||||||
|
if (catalog.id === discovered.id) continue;
|
||||||
|
|
||||||
|
if (
|
||||||
|
catalogRuleMatchesDiscovered(
|
||||||
|
{
|
||||||
|
name: catalog.name,
|
||||||
|
domain: catalog.domain,
|
||||||
|
category: discovered.category,
|
||||||
|
rule: catalog.rule,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: discovered.name,
|
||||||
|
domain: discovered.domain,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
) {
|
||||||
|
toCleanup.push({
|
||||||
|
discoveredId: discovered.id,
|
||||||
|
discoveredName: discovered.name,
|
||||||
|
catalogId: catalog.id,
|
||||||
|
catalogName: catalog.name,
|
||||||
|
});
|
||||||
|
break; // Only match one catalog provider per discovered provider
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let cleaned = 0;
|
||||||
|
|
||||||
|
if (toCleanup.length > 0) {
|
||||||
|
console.log(
|
||||||
|
`${isDryRun ? "[DRY RUN] Would clean up" : "Cleaning up"} ${toCleanup.length} orphaned provider(s)...`,
|
||||||
|
);
|
||||||
|
|
||||||
|
for (const cleanup of toCleanup) {
|
||||||
|
console.log(
|
||||||
|
` 🧹 Merging "${cleanup.discoveredName}" → "${cleanup.catalogName}"`,
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!isDryRun) {
|
||||||
|
try {
|
||||||
|
// Wrap all FK migrations and deletion in a single transaction
|
||||||
|
// to ensure atomicity - either all succeed or all rollback
|
||||||
|
await db.transaction(async (tx) => {
|
||||||
|
// Migrate foreign key references from discovered → catalog
|
||||||
|
// Update registrations table
|
||||||
|
await tx
|
||||||
|
.update(registrations)
|
||||||
|
.set({ registrarProviderId: cleanup.catalogId })
|
||||||
|
.where(
|
||||||
|
eq(registrations.registrarProviderId, cleanup.discoveredId),
|
||||||
|
);
|
||||||
|
|
||||||
|
await tx
|
||||||
|
.update(registrations)
|
||||||
|
.set({ resellerProviderId: cleanup.catalogId })
|
||||||
|
.where(
|
||||||
|
eq(registrations.resellerProviderId, cleanup.discoveredId),
|
||||||
|
);
|
||||||
|
|
||||||
|
// Update certificates table
|
||||||
|
await tx
|
||||||
|
.update(certificates)
|
||||||
|
.set({ caProviderId: cleanup.catalogId })
|
||||||
|
.where(eq(certificates.caProviderId, cleanup.discoveredId));
|
||||||
|
|
||||||
|
// Update hosting table
|
||||||
|
await tx
|
||||||
|
.update(hosting)
|
||||||
|
.set({ hostingProviderId: cleanup.catalogId })
|
||||||
|
.where(eq(hosting.hostingProviderId, cleanup.discoveredId));
|
||||||
|
|
||||||
|
await tx
|
||||||
|
.update(hosting)
|
||||||
|
.set({ emailProviderId: cleanup.catalogId })
|
||||||
|
.where(eq(hosting.emailProviderId, cleanup.discoveredId));
|
||||||
|
|
||||||
|
await tx
|
||||||
|
.update(hosting)
|
||||||
|
.set({ dnsProviderId: cleanup.catalogId })
|
||||||
|
.where(eq(hosting.dnsProviderId, cleanup.discoveredId));
|
||||||
|
|
||||||
|
// Delete the orphaned discovered provider
|
||||||
|
await tx
|
||||||
|
.delete(providers)
|
||||||
|
.where(eq(providers.id, cleanup.discoveredId));
|
||||||
|
});
|
||||||
|
|
||||||
|
cleaned++;
|
||||||
|
} catch (err) {
|
||||||
|
console.error(
|
||||||
|
`❌ Failed to merge "${cleanup.discoveredName}" → "${cleanup.catalogName}":`,
|
||||||
|
err,
|
||||||
|
);
|
||||||
|
throw err; // Re-throw to fail the script and prevent incomplete migrations
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Dry run: just count what would be cleaned
|
||||||
|
cleaned++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
console.log(" ✨ No orphaned providers found");
|
||||||
|
}
|
||||||
|
|
||||||
if (isDryRun) {
|
if (isDryRun) {
|
||||||
console.log(
|
console.log(
|
||||||
`\n✅ DRY RUN COMPLETE: Would have inserted ${inserted}, updated ${updated}`,
|
`\n✅ DRY RUN COMPLETE: Would have inserted ${inserted}, updated ${updated}, cleaned ${cleaned}`,
|
||||||
);
|
);
|
||||||
} else {
|
} else {
|
||||||
console.log(`\n✅ Seeded ${inserted} inserted, ${updated} updated`);
|
console.log(
|
||||||
|
`\n✅ Seeded ${inserted} inserted, ${updated} updated, ${cleaned} cleaned`,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user