1
mirror of https://github.com/jakejarvis/rdapper.git synced 2025-10-18 20:14:27 -04:00

Add toRegistrableDomain function for domain normalization

This commit is contained in:
2025-10-10 10:44:04 -04:00
parent bfcecc5eff
commit 6070d91424
5 changed files with 75 additions and 100 deletions

View File

@@ -35,6 +35,16 @@ await isRegistered("example.com"); // => true
await isAvailable("likely-unregistered-thing-320485230458.com"); // => false
```
Normalize arbitrary input (domain or URL) to its registrable domain (eTLD+1):
```ts
import { toRegistrableDomain } from "rdapper";
toRegistrableDomain("https://sub.example.co.uk/page"); // => "example.co.uk"
toRegistrableDomain("spark-public.s3.amazonaws.com"); // => "amazonaws.com" (ICANN-only default)
toRegistrableDomain("192.168.0.1"); // => null
```
## API
- `lookupDomain(domain, options?) => Promise<LookupResult>`

View File

@@ -58,7 +58,6 @@ vi.mock("./lib/domain.js", async () => {
});
import { lookupDomain } from ".";
import * as domain from "./lib/domain";
import * as rdapClient from "./rdap/client";
import type { WhoisQueryResult } from "./whois/client";
import * as whoisClient from "./whois/client";
@@ -149,52 +148,3 @@ describe("WHOIS referral & includeRaw", () => {
expect(Boolean(res.record?.rawWhois)).toBe(true);
});
});
// 3) Multi-label public suffix fallback via exceptions (e.g., uk.com)
describe("WHOIS multi-label public suffix fallback", () => {
beforeEach(() => {
vi.clearAllMocks();
vi.mocked(discovery.ianaWhoisServerForTld).mockResolvedValue(
"whois.centralnic.tld",
);
vi.mocked(domain.getDomainParts).mockReturnValue({
publicSuffix: "uk.com",
tld: "com",
});
// Ensure referral helper defers to whoisQuery for initial TLD query in this suite
vi.mocked(whoisReferral.followWhoisReferrals).mockImplementation(
async (
server: string,
d: string,
o?: import("./types").LookupOptions,
): Promise<WhoisQueryResult> => whoisClient.whoisQuery(server, d, o),
);
});
it("tries exception server for multi-label public suffix when TLD says no match", async () => {
const whois = vi.mocked(whoisClient.whoisQuery);
whois.mockReset();
whois
.mockImplementationOnce(
async (): Promise<WhoisQueryResult> => ({
text: "No match for domain",
serverQueried: "whois.centralnic.tld",
}),
)
.mockImplementationOnce(
async (): Promise<WhoisQueryResult> => ({
text: "Domain Name: EXAMPLE.UK.COM\nRegistrar: Registrar LLC",
serverQueried: "whois.centralnic.com",
}),
);
const res = await lookupDomain("example.uk.com", {
timeoutMs: 200,
whoisOnly: true,
});
expect(res.ok, res.error).toBe(true);
expect(res.record?.source).toBe("whois");
expect(res.record?.tld).toBe("com");
expect(res.record?.whoisServer).toBe("whois.centralnic.com");
});
});

View File

@@ -4,7 +4,6 @@ import { fetchRdapDomain } from "./rdap/client";
import { fetchAndMergeRdapRelated } from "./rdap/merge";
import { normalizeRdap } from "./rdap/normalize";
import type { DomainRecord, LookupOptions, LookupResult } from "./types";
import { whoisQuery } from "./whois/client";
import {
getIanaWhoisTextForTld,
ianaWhoisServerForTld,
@@ -12,7 +11,6 @@ import {
} from "./whois/discovery";
import { normalizeWhois } from "./whois/normalize";
import { followWhoisReferrals } from "./whois/referral";
import { WHOIS_TLD_EXCEPTIONS } from "./whois/servers";
/**
* High-level lookup that prefers RDAP and falls back to WHOIS.
@@ -26,7 +24,11 @@ export async function lookupDomain(
if (!isLikelyDomain(domain)) {
return { ok: false, error: "Input does not look like a domain" };
}
const { publicSuffix, tld } = getDomainParts(domain);
const { publicSuffix: tld } = getDomainParts(domain);
if (!tld) {
return { ok: false, error: "Invalid TLD" };
}
// If WHOIS-only, skip RDAP path
if (!opts?.whoisOnly) {
@@ -76,39 +78,10 @@ export async function lookupDomain(
error: `No WHOIS server discovered for TLD '${tld}'. This registry may not publish public WHOIS over port 43.${hint}`,
};
}
// Query the TLD server first; optionally follow registrar referrals (multi-hop)
const res = await followWhoisReferrals(whoisServer, domain, opts);
// If TLD registry returns no match and there was no referral, try multi-label public suffix candidates
if (
publicSuffix.includes(".") &&
/no match|not found/i.test(res.text) &&
opts?.followWhoisReferral !== false
) {
const candidates: string[] = [];
const ps = publicSuffix.toLowerCase();
// Prefer explicit exceptions when known
const exception = WHOIS_TLD_EXCEPTIONS[ps];
if (exception) candidates.push(exception);
for (const server of candidates) {
try {
const alt = await whoisQuery(server, domain, opts);
if (alt.text && !/error/i.test(alt.text))
return {
ok: true,
record: normalizeWhois(
domain,
tld,
alt.text,
alt.serverQueried,
!!opts?.includeRaw,
),
};
} catch {
// try next
}
}
}
const record: DomainRecord = normalizeWhois(
domain,
tld,
@@ -145,4 +118,5 @@ export async function isRegistered(
return res.record.isRegistered === true;
}
export { toRegistrableDomain } from "./lib/domain";
export type * from "./types";

View File

@@ -1,12 +1,31 @@
import { expect, test } from "vitest";
import { getDomainParts, isLikelyDomain } from "./domain";
import { getDomainParts, isLikelyDomain, toRegistrableDomain } from "./domain";
test("getDomainParts.tld basic", () => {
expect(getDomainParts("example.com").tld).toBe("com");
expect(getDomainParts("sub.example.co.uk").tld).toBe("uk");
expect(getDomainParts("example.com").publicSuffix).toBe("com");
expect(getDomainParts("sub.example.co.uk").publicSuffix).toBe("co.uk");
});
test("isLikelyDomain", () => {
expect(isLikelyDomain("example.com")).toBe(true);
expect(isLikelyDomain("not a domain")).toBe(false);
});
test("toRegistrableDomain normalizes eTLD+1 and rejects non-ICANN", () => {
// Basic domains
expect(toRegistrableDomain("example.com")).toBe("example.com");
expect(toRegistrableDomain("http://www.writethedocs.org/conf")).toBe(
"writethedocs.org",
);
// Private/public SLDs should collapse to ICANN TLD + SLD by default
// (ICANN-only behavior; private suffixes ignored)
expect(toRegistrableDomain("spark-public.s3.amazonaws.com")).toBe(
"amazonaws.com",
);
// Reject IPs and invalid inputs
expect(toRegistrableDomain("192.168.0.1")).toBeNull();
expect(toRegistrableDomain("http://[::1]/")).toBeNull();
expect(toRegistrableDomain("")).toBeNull();
});

View File

@@ -1,20 +1,23 @@
import { getPublicSuffix } from "tldts";
import { parse } from "tldts";
export function getDomainParts(domain: string): {
publicSuffix: string;
tld: string;
} {
const lower = domain.toLowerCase().trim();
const suffix = getPublicSuffix(lower) || "";
const publicSuffix =
suffix || lower.split(".").filter(Boolean).pop() || lower;
const labels = publicSuffix.split(".").filter(Boolean);
const tld = labels.length ? labels[labels.length - 1] : publicSuffix;
return { publicSuffix, tld };
/**
* Parse a domain into its parts.
*/
export function getDomainParts(domain: string) {
const result = parse(domain);
return result;
}
export function isLikelyDomain(input: string): boolean {
return /^[a-z0-9.-]+$/i.test(input) && input.includes(".");
/**
* Basic domain validity check (hostname-like), not performing DNS or RDAP.
*/
export function isLikelyDomain(value: string): boolean {
const v = (value ?? "").trim();
// Accept punycoded labels (xn--) by allowing digits and hyphens in TLD as well,
// while disallowing leading/trailing hyphens in any label.
return /^(?=.{1,253}$)(?:(?!-)[a-z0-9-]{1,63}(?<!-)\.)+(?!-)[a-z0-9-]{2,63}(?<!-)$/.test(
v.toLowerCase(),
);
}
export function punyToUnicode(domain: string): string {
@@ -25,6 +28,25 @@ export function punyToUnicode(domain: string): string {
}
}
/**
* Normalize arbitrary input (domain or URL) to its registrable domain (eTLD+1).
* Returns null when the input is not a valid ICANN domain (e.g., invalid TLD, IPs).
*/
export function toRegistrableDomain(input: string): string | null {
const raw = (input ?? "").trim();
if (raw === "") return null;
const result = parse(raw);
// Reject IPs and non-ICANN/public suffixes.
if (result.isIp) return null;
if (!result.isIcann) return null;
const domain = result.domain ?? "";
if (domain === "") return null;
return domain.toLowerCase();
}
// Common WHOIS availability phrases seen across registries/registrars
const WHOIS_AVAILABLE_PATTERNS: RegExp[] = [
/\bno match\b/i,