mirror of
https://github.com/jakejarvis/rdapper.git
synced 2026-01-27 21:45:19 -05:00
Implement WHOIS referral chain collection and merging logic (fixes #11)
- Introduced `collectWhoisReferralChain` to gather WHOIS responses while avoiding contradictory data from registrars. - Updated `lookupDomain` to utilize the new chain collection method, ensuring TLD responses are prioritized. - Added `mergeWhoisRecords` function to consolidate WHOIS data from multiple sources. - Enhanced tests for referral handling and merging behavior, ensuring accurate data retention across scenarios.
This commit is contained in:
@@ -36,6 +36,13 @@ vi.mock("./whois/referral.js", async () => {
|
||||
opts?: import("./types").LookupOptions,
|
||||
) => client.whoisQuery(server, domain, opts),
|
||||
),
|
||||
collectWhoisReferralChain: vi.fn(
|
||||
async (
|
||||
server: string,
|
||||
domain: string,
|
||||
opts?: import("./types").LookupOptions,
|
||||
) => [await client.whoisQuery(server, domain, opts)],
|
||||
),
|
||||
};
|
||||
});
|
||||
|
||||
@@ -113,10 +120,9 @@ describe("WHOIS referral & includeRaw", () => {
|
||||
});
|
||||
|
||||
it("does not follow referral when followWhoisReferral is false", async () => {
|
||||
vi.mocked(whoisReferral.followWhoisReferrals).mockResolvedValue({
|
||||
text: "Registrar WHOIS Server: whois.registrar.test\nDomain Name: EXAMPLE.COM",
|
||||
serverQueried: "whois.verisign-grs.com",
|
||||
});
|
||||
// Ensure chain collector is used and only initial TLD response is returned
|
||||
const original = vi.mocked(whoisReferral.collectWhoisReferralChain);
|
||||
original.mockClear();
|
||||
|
||||
const res = await lookupDomain("example.com", {
|
||||
timeoutMs: 200,
|
||||
@@ -124,9 +130,7 @@ describe("WHOIS referral & includeRaw", () => {
|
||||
followWhoisReferral: false,
|
||||
});
|
||||
expect(res.ok, res.error).toBe(true);
|
||||
expect(vi.mocked(whoisReferral.followWhoisReferrals)).toHaveBeenCalledTimes(
|
||||
1,
|
||||
);
|
||||
expect(original).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("includes rawWhois when includeRaw is true", async () => {
|
||||
|
||||
42
src/index.ts
42
src/index.ts
@@ -9,8 +9,12 @@ import {
|
||||
ianaWhoisServerForTld,
|
||||
parseIanaRegistrationInfoUrl,
|
||||
} from "./whois/discovery";
|
||||
import { mergeWhoisRecords } from "./whois/merge";
|
||||
import { normalizeWhois } from "./whois/normalize";
|
||||
import { followWhoisReferrals } from "./whois/referral";
|
||||
import {
|
||||
collectWhoisReferralChain,
|
||||
followWhoisReferrals,
|
||||
} from "./whois/referral";
|
||||
|
||||
/**
|
||||
* High-level lookup that prefers RDAP and falls back to WHOIS.
|
||||
@@ -32,7 +36,13 @@ export async function lookupDomain(
|
||||
|
||||
// If WHOIS-only, skip RDAP path
|
||||
if (!opts?.whoisOnly) {
|
||||
const bases = await getRdapBaseUrlsForTld(tld, opts);
|
||||
let bases = await getRdapBaseUrlsForTld(tld, opts);
|
||||
// Some ccTLD registries publish RDAP only at the registry TLD (e.g., br),
|
||||
// while the public suffix can be multi-label (e.g., com.br). Fallback to last label.
|
||||
if (bases.length === 0 && tld.includes(".")) {
|
||||
const registryTld = tld.split(".").pop() ?? tld;
|
||||
bases = await getRdapBaseUrlsForTld(registryTld, opts);
|
||||
}
|
||||
const tried: string[] = [];
|
||||
for (const base of bases) {
|
||||
tried.push(base);
|
||||
@@ -80,16 +90,28 @@ export async function lookupDomain(
|
||||
}
|
||||
|
||||
// Query the TLD server first; optionally follow registrar referrals (multi-hop)
|
||||
const res = await followWhoisReferrals(whoisServer, domain, opts);
|
||||
// Collect the chain and coalesce so we don't lose details when a registrar returns minimal/empty data.
|
||||
const chain = await collectWhoisReferralChain(whoisServer, domain, opts);
|
||||
if (chain.length === 0) {
|
||||
// Fallback to previous behavior as a safety net
|
||||
const res = await followWhoisReferrals(whoisServer, domain, opts);
|
||||
const record: DomainRecord = normalizeWhois(
|
||||
domain,
|
||||
tld,
|
||||
res.text,
|
||||
res.serverQueried,
|
||||
!!opts?.includeRaw,
|
||||
);
|
||||
return { ok: true, record };
|
||||
}
|
||||
|
||||
const record: DomainRecord = normalizeWhois(
|
||||
domain,
|
||||
tld,
|
||||
res.text,
|
||||
res.serverQueried,
|
||||
!!opts?.includeRaw,
|
||||
// Normalize all WHOIS texts in the chain and merge conservatively
|
||||
const normalizedRecords = chain.map((r) =>
|
||||
normalizeWhois(domain, tld, r.text, r.serverQueried, !!opts?.includeRaw),
|
||||
);
|
||||
return { ok: true, record };
|
||||
const [first, ...rest] = normalizedRecords;
|
||||
const mergedRecord = rest.length ? mergeWhoisRecords(first, rest) : first;
|
||||
return { ok: true, record: mergedRecord };
|
||||
} catch (err: unknown) {
|
||||
const message = err instanceof Error ? err.message : String(err);
|
||||
return { ok: false, error: message };
|
||||
|
||||
@@ -81,6 +81,12 @@ const WHOIS_AVAILABLE_PATTERNS: RegExp[] = [
|
||||
/\bdomain status[:\s]+available\b/i,
|
||||
/\bobject does not exist\b/i,
|
||||
/\bthe queried object does not exist\b/i,
|
||||
// Common variants across ccTLDs/registrars
|
||||
/\bstatus:\s*free\b/i,
|
||||
/\bstatus:\s*available\b/i,
|
||||
/\bno object found\b/i,
|
||||
/\bnicht gefunden\b/i,
|
||||
/\bpending release\b/i, // often signals not registered/being deleted
|
||||
];
|
||||
|
||||
export function isWhoisAvailable(text: string | undefined): boolean {
|
||||
|
||||
@@ -37,9 +37,10 @@ export async function getRdapBaseUrlsForTld(
|
||||
const target = tld.toLowerCase();
|
||||
const bases: string[] = [];
|
||||
for (const svc of data.services) {
|
||||
const tlds = svc[0];
|
||||
const tlds = svc[0].map((x) => x.toLowerCase());
|
||||
const urls = svc[1];
|
||||
if (tlds.map((x) => x.toLowerCase()).includes(target)) {
|
||||
// Match exact TLD, and also support multi-label public suffixes present in IANA (rare)
|
||||
if (tlds.includes(target)) {
|
||||
for (const u of urls) {
|
||||
const base = u.endsWith("/") ? u : `${u}/`;
|
||||
bases.push(base);
|
||||
|
||||
45
src/whois/merge.test.ts
Normal file
45
src/whois/merge.test.ts
Normal file
@@ -0,0 +1,45 @@
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
|
||||
vi.mock("./client.js", () => ({
|
||||
whoisQuery: vi.fn(async (server: string) => {
|
||||
if (server === "whois.nic.io") {
|
||||
return {
|
||||
serverQueried: server,
|
||||
text: `Domain Name: GITPOD.IO\nCreation Date: 2019-05-14T00:00:00Z\nRegistry Expiry Date: 2030-05-14T00:00:00Z\nRegistrar WHOIS Server: whois.udag.net\nName Server: A.NS\n`,
|
||||
};
|
||||
}
|
||||
if (server === "whois.udag.net") {
|
||||
// Registrar returns minimal/empty or contradictory content
|
||||
return { serverQueried: server, text: "NOT FOUND" };
|
||||
}
|
||||
return { serverQueried: server, text: "" };
|
||||
}),
|
||||
}));
|
||||
|
||||
import { mergeWhoisRecords } from "./merge";
|
||||
import { normalizeWhois } from "./normalize";
|
||||
import { collectWhoisReferralChain } from "./referral";
|
||||
|
||||
describe("WHOIS coalescing", () => {
|
||||
it("retains TLD data when registrar provides no details", async () => {
|
||||
const chain = await collectWhoisReferralChain("whois.nic.io", "gitpod.io", {
|
||||
followWhoisReferral: true,
|
||||
maxWhoisReferralHops: 2,
|
||||
});
|
||||
// Contradictory registrar should not be appended to chain
|
||||
expect(chain.length).toBe(1);
|
||||
|
||||
const [first] = chain;
|
||||
const base = normalizeWhois(
|
||||
"gitpod.io",
|
||||
"io",
|
||||
first.text,
|
||||
first.serverQueried,
|
||||
false,
|
||||
);
|
||||
const merged = mergeWhoisRecords(base, []);
|
||||
expect(merged.isRegistered).toBe(true);
|
||||
expect(merged.creationDate).toBeDefined();
|
||||
expect(merged.whoisServer?.toLowerCase()).toContain("whois.nic.io");
|
||||
});
|
||||
});
|
||||
96
src/whois/merge.ts
Normal file
96
src/whois/merge.ts
Normal file
@@ -0,0 +1,96 @@
|
||||
import { uniq } from "../lib/text";
|
||||
import type { Contact, DomainRecord, Nameserver } from "../types";
|
||||
|
||||
function dedupeStatuses(
|
||||
a?: DomainRecord["statuses"],
|
||||
b?: DomainRecord["statuses"],
|
||||
) {
|
||||
const list = [...(a || []), ...(b || [])];
|
||||
const seen = new Set<string>();
|
||||
const out: NonNullable<DomainRecord["statuses"]> = [];
|
||||
for (const s of list) {
|
||||
const key = (s?.status || "").toLowerCase();
|
||||
if (!key || seen.has(key)) continue;
|
||||
seen.add(key);
|
||||
out.push(s);
|
||||
}
|
||||
return out.length ? out : undefined;
|
||||
}
|
||||
|
||||
function dedupeNameservers(a?: Nameserver[], b?: Nameserver[]) {
|
||||
const map = new Map<string, Nameserver>();
|
||||
for (const ns of [...(a || []), ...(b || [])]) {
|
||||
const host = ns.host.toLowerCase();
|
||||
const prev = map.get(host);
|
||||
if (!prev) {
|
||||
map.set(host, { ...ns, host });
|
||||
continue;
|
||||
}
|
||||
const ipv4 = uniq([...(prev.ipv4 || []), ...(ns.ipv4 || [])]);
|
||||
const ipv6 = uniq([...(prev.ipv6 || []), ...(ns.ipv6 || [])]);
|
||||
map.set(host, { host, ipv4, ipv6 });
|
||||
}
|
||||
const out = Array.from(map.values());
|
||||
return out.length ? out : undefined;
|
||||
}
|
||||
|
||||
function dedupeContacts(a?: Contact[], b?: Contact[]) {
|
||||
const list = [...(a || []), ...(b || [])];
|
||||
const seen = new Set<string>();
|
||||
const out: Contact[] = [];
|
||||
for (const c of list) {
|
||||
const key = `${c.type}|${(c.organization || c.name || c.email || "").toString().toLowerCase()}`;
|
||||
if (seen.has(key)) continue;
|
||||
seen.add(key);
|
||||
out.push(c);
|
||||
}
|
||||
return out.length ? out : undefined;
|
||||
}
|
||||
|
||||
/** Conservative merge: start with base; fill missing scalars; union arrays; prefer more informative dates. */
|
||||
export function mergeWhoisRecords(
|
||||
base: DomainRecord,
|
||||
others: DomainRecord[],
|
||||
): DomainRecord {
|
||||
const merged: DomainRecord = { ...base };
|
||||
for (const cur of others) {
|
||||
merged.isRegistered = merged.isRegistered || cur.isRegistered;
|
||||
merged.registry = merged.registry ?? cur.registry;
|
||||
merged.registrar = merged.registrar ?? cur.registrar;
|
||||
merged.reseller = merged.reseller ?? cur.reseller;
|
||||
merged.statuses = dedupeStatuses(merged.statuses, cur.statuses);
|
||||
// Dates: prefer earliest creation, latest updated/expiration when available
|
||||
merged.creationDate = preferEarliestIso(
|
||||
merged.creationDate,
|
||||
cur.creationDate,
|
||||
);
|
||||
merged.updatedDate = preferLatestIso(merged.updatedDate, cur.updatedDate);
|
||||
merged.expirationDate = preferLatestIso(
|
||||
merged.expirationDate,
|
||||
cur.expirationDate,
|
||||
);
|
||||
merged.deletionDate = merged.deletionDate ?? cur.deletionDate;
|
||||
merged.transferLock = Boolean(merged.transferLock || cur.transferLock);
|
||||
merged.dnssec = merged.dnssec ?? cur.dnssec;
|
||||
merged.nameservers = dedupeNameservers(merged.nameservers, cur.nameservers);
|
||||
merged.contacts = dedupeContacts(merged.contacts, cur.contacts);
|
||||
merged.privacyEnabled = merged.privacyEnabled ?? cur.privacyEnabled;
|
||||
// Keep whoisServer pointing to the latest contributing authoritative server
|
||||
merged.whoisServer = cur.whoisServer ?? merged.whoisServer;
|
||||
// rawWhois: keep last contributing text
|
||||
merged.rawWhois = cur.rawWhois ?? merged.rawWhois;
|
||||
}
|
||||
return merged;
|
||||
}
|
||||
|
||||
function preferEarliestIso(a?: string, b?: string): string | undefined {
|
||||
if (!a) return b;
|
||||
if (!b) return a;
|
||||
return new Date(a) <= new Date(b) ? a : b;
|
||||
}
|
||||
|
||||
function preferLatestIso(a?: string, b?: string): string | undefined {
|
||||
if (!a) return b;
|
||||
if (!b) return a;
|
||||
return new Date(a) >= new Date(b) ? a : b;
|
||||
}
|
||||
@@ -17,7 +17,7 @@ vi.mock("./client.js", () => ({
|
||||
}),
|
||||
}));
|
||||
|
||||
import { followWhoisReferrals } from "./referral";
|
||||
import { collectWhoisReferralChain, followWhoisReferrals } from "./referral";
|
||||
|
||||
describe("WHOIS referral contradiction handling", () => {
|
||||
it("keeps TLD WHOIS when registrar claims availability", async () => {
|
||||
@@ -30,4 +30,16 @@ describe("WHOIS referral contradiction handling", () => {
|
||||
expect(res.text.toLowerCase().includes("creation date")).toBe(true);
|
||||
expect(res.text.toLowerCase().includes("no match")).toBe(false);
|
||||
});
|
||||
|
||||
it("collects chain and does not append contradictory registrar", async () => {
|
||||
const chain = await collectWhoisReferralChain(
|
||||
"whois.nic.io",
|
||||
"raindrop.io",
|
||||
{ followWhoisReferral: true, maxWhoisReferralHops: 2 },
|
||||
);
|
||||
expect(Array.isArray(chain)).toBe(true);
|
||||
// Mocked registrar is contradictory, so chain should contain only the TLD response
|
||||
expect(chain.length).toBe(1);
|
||||
expect(chain[0].serverQueried).toBe("whois.nic.io");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -46,6 +46,50 @@ export async function followWhoisReferrals(
|
||||
return current;
|
||||
}
|
||||
|
||||
/**
|
||||
* Collect the WHOIS referral chain starting from the TLD server.
|
||||
* Always includes the initial TLD response; may include one or more registrar responses.
|
||||
* Stops on contradiction (registrar claims availability) or failures.
|
||||
*/
|
||||
export async function collectWhoisReferralChain(
|
||||
initialServer: string,
|
||||
domain: string,
|
||||
opts?: LookupOptions,
|
||||
): Promise<WhoisQueryResult[]> {
|
||||
const results: WhoisQueryResult[] = [];
|
||||
const maxHops = Math.max(0, opts?.maxWhoisReferralHops ?? 2);
|
||||
const first = await whoisQuery(initialServer, domain, opts);
|
||||
results.push(first);
|
||||
if (opts?.followWhoisReferral === false || maxHops === 0) return results;
|
||||
|
||||
const visited = new Set<string>([normalize(first.serverQueried)]);
|
||||
let current = first;
|
||||
let hops = 0;
|
||||
while (hops < maxHops) {
|
||||
const next = extractWhoisReferral(current.text);
|
||||
if (!next) break;
|
||||
const normalized = normalize(next);
|
||||
if (visited.has(normalized)) break;
|
||||
visited.add(normalized);
|
||||
try {
|
||||
const res = await whoisQuery(next, domain, opts);
|
||||
// If registrar claims availability while TLD indicated registered, stop.
|
||||
const registeredBefore = !isWhoisAvailable(current.text);
|
||||
const registeredAfter = !isWhoisAvailable(res.text);
|
||||
if (registeredBefore && !registeredAfter) {
|
||||
// Do not adopt or append contradictory registrar; keep authoritative TLD only.
|
||||
break;
|
||||
}
|
||||
results.push(res);
|
||||
current = res;
|
||||
} catch {
|
||||
break;
|
||||
}
|
||||
hops += 1;
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
function normalize(server: string): string {
|
||||
return server.replace(/^whois:\/\//i, "").toLowerCase();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user