1
mirror of https://github.com/jakejarvis/rdapper.git synced 2026-01-27 21:45:19 -05:00

Implement WHOIS referral chain collection and merging logic (fixes #11)

- Introduced `collectWhoisReferralChain` to gather WHOIS responses while avoiding contradictory data from registrars.
- Updated `lookupDomain` to utilize the new chain collection method, ensuring TLD responses are prioritized.
- Added `mergeWhoisRecords` function to consolidate WHOIS data from multiple sources.
- Enhanced tests for referral handling and merging behavior, ensuring accurate data retention across scenarios.
This commit is contained in:
2025-10-20 14:08:26 -04:00
parent b00d0bbb0a
commit 2a1b7529cc
8 changed files with 250 additions and 20 deletions

View File

@@ -36,6 +36,13 @@ vi.mock("./whois/referral.js", async () => {
opts?: import("./types").LookupOptions,
) => client.whoisQuery(server, domain, opts),
),
collectWhoisReferralChain: vi.fn(
async (
server: string,
domain: string,
opts?: import("./types").LookupOptions,
) => [await client.whoisQuery(server, domain, opts)],
),
};
});
@@ -113,10 +120,9 @@ describe("WHOIS referral & includeRaw", () => {
});
it("does not follow referral when followWhoisReferral is false", async () => {
vi.mocked(whoisReferral.followWhoisReferrals).mockResolvedValue({
text: "Registrar WHOIS Server: whois.registrar.test\nDomain Name: EXAMPLE.COM",
serverQueried: "whois.verisign-grs.com",
});
// Ensure chain collector is used and only initial TLD response is returned
const original = vi.mocked(whoisReferral.collectWhoisReferralChain);
original.mockClear();
const res = await lookupDomain("example.com", {
timeoutMs: 200,
@@ -124,9 +130,7 @@ describe("WHOIS referral & includeRaw", () => {
followWhoisReferral: false,
});
expect(res.ok, res.error).toBe(true);
expect(vi.mocked(whoisReferral.followWhoisReferrals)).toHaveBeenCalledTimes(
1,
);
expect(original).toHaveBeenCalledTimes(1);
});
it("includes rawWhois when includeRaw is true", async () => {

View File

@@ -9,8 +9,12 @@ import {
ianaWhoisServerForTld,
parseIanaRegistrationInfoUrl,
} from "./whois/discovery";
import { mergeWhoisRecords } from "./whois/merge";
import { normalizeWhois } from "./whois/normalize";
import { followWhoisReferrals } from "./whois/referral";
import {
collectWhoisReferralChain,
followWhoisReferrals,
} from "./whois/referral";
/**
* High-level lookup that prefers RDAP and falls back to WHOIS.
@@ -32,7 +36,13 @@ export async function lookupDomain(
// If WHOIS-only, skip RDAP path
if (!opts?.whoisOnly) {
const bases = await getRdapBaseUrlsForTld(tld, opts);
let bases = await getRdapBaseUrlsForTld(tld, opts);
// Some ccTLD registries publish RDAP only at the registry TLD (e.g., br),
// while the public suffix can be multi-label (e.g., com.br). Fallback to last label.
if (bases.length === 0 && tld.includes(".")) {
const registryTld = tld.split(".").pop() ?? tld;
bases = await getRdapBaseUrlsForTld(registryTld, opts);
}
const tried: string[] = [];
for (const base of bases) {
tried.push(base);
@@ -80,16 +90,28 @@ export async function lookupDomain(
}
// Query the TLD server first; optionally follow registrar referrals (multi-hop)
const res = await followWhoisReferrals(whoisServer, domain, opts);
// Collect the chain and coalesce so we don't lose details when a registrar returns minimal/empty data.
const chain = await collectWhoisReferralChain(whoisServer, domain, opts);
if (chain.length === 0) {
// Fallback to previous behavior as a safety net
const res = await followWhoisReferrals(whoisServer, domain, opts);
const record: DomainRecord = normalizeWhois(
domain,
tld,
res.text,
res.serverQueried,
!!opts?.includeRaw,
);
return { ok: true, record };
}
const record: DomainRecord = normalizeWhois(
domain,
tld,
res.text,
res.serverQueried,
!!opts?.includeRaw,
// Normalize all WHOIS texts in the chain and merge conservatively
const normalizedRecords = chain.map((r) =>
normalizeWhois(domain, tld, r.text, r.serverQueried, !!opts?.includeRaw),
);
return { ok: true, record };
const [first, ...rest] = normalizedRecords;
const mergedRecord = rest.length ? mergeWhoisRecords(first, rest) : first;
return { ok: true, record: mergedRecord };
} catch (err: unknown) {
const message = err instanceof Error ? err.message : String(err);
return { ok: false, error: message };

View File

@@ -81,6 +81,12 @@ const WHOIS_AVAILABLE_PATTERNS: RegExp[] = [
/\bdomain status[:\s]+available\b/i,
/\bobject does not exist\b/i,
/\bthe queried object does not exist\b/i,
// Common variants across ccTLDs/registrars
/\bstatus:\s*free\b/i,
/\bstatus:\s*available\b/i,
/\bno object found\b/i,
/\bnicht gefunden\b/i,
/\bpending release\b/i, // often signals not registered/being deleted
];
export function isWhoisAvailable(text: string | undefined): boolean {

View File

@@ -37,9 +37,10 @@ export async function getRdapBaseUrlsForTld(
const target = tld.toLowerCase();
const bases: string[] = [];
for (const svc of data.services) {
const tlds = svc[0];
const tlds = svc[0].map((x) => x.toLowerCase());
const urls = svc[1];
if (tlds.map((x) => x.toLowerCase()).includes(target)) {
// Match exact TLD, and also support multi-label public suffixes present in IANA (rare)
if (tlds.includes(target)) {
for (const u of urls) {
const base = u.endsWith("/") ? u : `${u}/`;
bases.push(base);

45
src/whois/merge.test.ts Normal file
View File

@@ -0,0 +1,45 @@
import { describe, expect, it, vi } from "vitest";
vi.mock("./client.js", () => ({
whoisQuery: vi.fn(async (server: string) => {
if (server === "whois.nic.io") {
return {
serverQueried: server,
text: `Domain Name: GITPOD.IO\nCreation Date: 2019-05-14T00:00:00Z\nRegistry Expiry Date: 2030-05-14T00:00:00Z\nRegistrar WHOIS Server: whois.udag.net\nName Server: A.NS\n`,
};
}
if (server === "whois.udag.net") {
// Registrar returns minimal/empty or contradictory content
return { serverQueried: server, text: "NOT FOUND" };
}
return { serverQueried: server, text: "" };
}),
}));
import { mergeWhoisRecords } from "./merge";
import { normalizeWhois } from "./normalize";
import { collectWhoisReferralChain } from "./referral";
describe("WHOIS coalescing", () => {
it("retains TLD data when registrar provides no details", async () => {
const chain = await collectWhoisReferralChain("whois.nic.io", "gitpod.io", {
followWhoisReferral: true,
maxWhoisReferralHops: 2,
});
// Contradictory registrar should not be appended to chain
expect(chain.length).toBe(1);
const [first] = chain;
const base = normalizeWhois(
"gitpod.io",
"io",
first.text,
first.serverQueried,
false,
);
const merged = mergeWhoisRecords(base, []);
expect(merged.isRegistered).toBe(true);
expect(merged.creationDate).toBeDefined();
expect(merged.whoisServer?.toLowerCase()).toContain("whois.nic.io");
});
});

96
src/whois/merge.ts Normal file
View File

@@ -0,0 +1,96 @@
import { uniq } from "../lib/text";
import type { Contact, DomainRecord, Nameserver } from "../types";
function dedupeStatuses(
a?: DomainRecord["statuses"],
b?: DomainRecord["statuses"],
) {
const list = [...(a || []), ...(b || [])];
const seen = new Set<string>();
const out: NonNullable<DomainRecord["statuses"]> = [];
for (const s of list) {
const key = (s?.status || "").toLowerCase();
if (!key || seen.has(key)) continue;
seen.add(key);
out.push(s);
}
return out.length ? out : undefined;
}
function dedupeNameservers(a?: Nameserver[], b?: Nameserver[]) {
const map = new Map<string, Nameserver>();
for (const ns of [...(a || []), ...(b || [])]) {
const host = ns.host.toLowerCase();
const prev = map.get(host);
if (!prev) {
map.set(host, { ...ns, host });
continue;
}
const ipv4 = uniq([...(prev.ipv4 || []), ...(ns.ipv4 || [])]);
const ipv6 = uniq([...(prev.ipv6 || []), ...(ns.ipv6 || [])]);
map.set(host, { host, ipv4, ipv6 });
}
const out = Array.from(map.values());
return out.length ? out : undefined;
}
function dedupeContacts(a?: Contact[], b?: Contact[]) {
const list = [...(a || []), ...(b || [])];
const seen = new Set<string>();
const out: Contact[] = [];
for (const c of list) {
const key = `${c.type}|${(c.organization || c.name || c.email || "").toString().toLowerCase()}`;
if (seen.has(key)) continue;
seen.add(key);
out.push(c);
}
return out.length ? out : undefined;
}
/** Conservative merge: start with base; fill missing scalars; union arrays; prefer more informative dates. */
export function mergeWhoisRecords(
base: DomainRecord,
others: DomainRecord[],
): DomainRecord {
const merged: DomainRecord = { ...base };
for (const cur of others) {
merged.isRegistered = merged.isRegistered || cur.isRegistered;
merged.registry = merged.registry ?? cur.registry;
merged.registrar = merged.registrar ?? cur.registrar;
merged.reseller = merged.reseller ?? cur.reseller;
merged.statuses = dedupeStatuses(merged.statuses, cur.statuses);
// Dates: prefer earliest creation, latest updated/expiration when available
merged.creationDate = preferEarliestIso(
merged.creationDate,
cur.creationDate,
);
merged.updatedDate = preferLatestIso(merged.updatedDate, cur.updatedDate);
merged.expirationDate = preferLatestIso(
merged.expirationDate,
cur.expirationDate,
);
merged.deletionDate = merged.deletionDate ?? cur.deletionDate;
merged.transferLock = Boolean(merged.transferLock || cur.transferLock);
merged.dnssec = merged.dnssec ?? cur.dnssec;
merged.nameservers = dedupeNameservers(merged.nameservers, cur.nameservers);
merged.contacts = dedupeContacts(merged.contacts, cur.contacts);
merged.privacyEnabled = merged.privacyEnabled ?? cur.privacyEnabled;
// Keep whoisServer pointing to the latest contributing authoritative server
merged.whoisServer = cur.whoisServer ?? merged.whoisServer;
// rawWhois: keep last contributing text
merged.rawWhois = cur.rawWhois ?? merged.rawWhois;
}
return merged;
}
function preferEarliestIso(a?: string, b?: string): string | undefined {
if (!a) return b;
if (!b) return a;
return new Date(a) <= new Date(b) ? a : b;
}
function preferLatestIso(a?: string, b?: string): string | undefined {
if (!a) return b;
if (!b) return a;
return new Date(a) >= new Date(b) ? a : b;
}

View File

@@ -17,7 +17,7 @@ vi.mock("./client.js", () => ({
}),
}));
import { followWhoisReferrals } from "./referral";
import { collectWhoisReferralChain, followWhoisReferrals } from "./referral";
describe("WHOIS referral contradiction handling", () => {
it("keeps TLD WHOIS when registrar claims availability", async () => {
@@ -30,4 +30,16 @@ describe("WHOIS referral contradiction handling", () => {
expect(res.text.toLowerCase().includes("creation date")).toBe(true);
expect(res.text.toLowerCase().includes("no match")).toBe(false);
});
it("collects chain and does not append contradictory registrar", async () => {
const chain = await collectWhoisReferralChain(
"whois.nic.io",
"raindrop.io",
{ followWhoisReferral: true, maxWhoisReferralHops: 2 },
);
expect(Array.isArray(chain)).toBe(true);
// Mocked registrar is contradictory, so chain should contain only the TLD response
expect(chain.length).toBe(1);
expect(chain[0].serverQueried).toBe("whois.nic.io");
});
});

View File

@@ -46,6 +46,50 @@ export async function followWhoisReferrals(
return current;
}
/**
* Collect the WHOIS referral chain starting from the TLD server.
* Always includes the initial TLD response; may include one or more registrar responses.
* Stops on contradiction (registrar claims availability) or failures.
*/
export async function collectWhoisReferralChain(
initialServer: string,
domain: string,
opts?: LookupOptions,
): Promise<WhoisQueryResult[]> {
const results: WhoisQueryResult[] = [];
const maxHops = Math.max(0, opts?.maxWhoisReferralHops ?? 2);
const first = await whoisQuery(initialServer, domain, opts);
results.push(first);
if (opts?.followWhoisReferral === false || maxHops === 0) return results;
const visited = new Set<string>([normalize(first.serverQueried)]);
let current = first;
let hops = 0;
while (hops < maxHops) {
const next = extractWhoisReferral(current.text);
if (!next) break;
const normalized = normalize(next);
if (visited.has(normalized)) break;
visited.add(normalized);
try {
const res = await whoisQuery(next, domain, opts);
// If registrar claims availability while TLD indicated registered, stop.
const registeredBefore = !isWhoisAvailable(current.text);
const registeredAfter = !isWhoisAvailable(res.text);
if (registeredBefore && !registeredAfter) {
// Do not adopt or append contradictory registrar; keep authoritative TLD only.
break;
}
results.push(res);
current = res;
} catch {
break;
}
hops += 1;
}
return results;
}
function normalize(server: string): string {
return server.replace(/^whois:\/\//i, "").toLowerCase();
}