From 36cf6d6b38ceb39b72ffad962b19c08f8e898eca Mon Sep 17 00:00:00 2001 From: Jake Jarvis Date: Wed, 24 Sep 2025 18:24:47 -0400 Subject: [PATCH] Enhance project structure: add AGENTS.md for repository guidelines, update .gitignore to exclude TypeScript build info, and refine package.json scripts for improved build and publish processes. --- .gitignore | 1 + AGENTS.md | 37 +++ package.json | 7 +- src/{ => api/__tests__}/lookup.smoke.test.ts | 2 +- src/{ => api}/lookup.ts | 27 +- src/index.ts | 4 +- .../__tests__/dates.test.ts} | 12 +- src/lib/__tests__/domain.test.ts | 13 + src/lib/async.ts | 21 ++ src/lib/dates.ts | 98 +++++++ src/lib/domain.ts | 50 ++++ src/lib/text.ts | 79 ++++++ .../__tests__/normalize.test.ts} | 2 +- src/{rdap.ts => rdap/bootstrap.ts} | 36 +-- src/rdap/client.ts | 35 +++ src/{normalize-rdap.ts => rdap/normalize.ts} | 5 +- src/{types.d.ts => types.ts} | 3 + src/utils.ts | 248 ------------------ src/whois.ts | 114 -------- .../__tests__/normalize.test.ts} | 14 +- src/whois/catalog.ts | 60 +++++ src/whois/client.ts | 66 +++++ src/whois/discovery.ts | 56 ++++ .../normalize.ts} | 5 +- tsconfig.build.json | 12 + tsconfig.tsbuildinfo | 1 - 26 files changed, 566 insertions(+), 442 deletions(-) create mode 100644 AGENTS.md rename src/{ => api/__tests__}/lookup.smoke.test.ts (92%) rename src/{ => api}/lookup.ts (80%) rename src/{utils.test.ts => lib/__tests__/dates.test.ts} (66%) create mode 100644 src/lib/__tests__/domain.test.ts create mode 100644 src/lib/async.ts create mode 100644 src/lib/dates.ts create mode 100644 src/lib/domain.ts create mode 100644 src/lib/text.ts rename src/{normalize-rdap.test.ts => rdap/__tests__/normalize.test.ts} (97%) rename src/{rdap.ts => rdap/bootstrap.ts} (58%) create mode 100644 src/rdap/client.ts rename src/{normalize-rdap.ts => rdap/normalize.ts} (98%) rename src/{types.d.ts => types.ts} (92%) delete mode 100644 src/utils.ts delete mode 100644 src/whois.ts rename src/{normalize-whois.test.ts => whois/__tests__/normalize.test.ts} (91%) create mode 100644 src/whois/catalog.ts create mode 100644 src/whois/client.ts create mode 100644 src/whois/discovery.ts rename src/{normalize-whois.ts => whois/normalize.ts} (98%) create mode 100644 tsconfig.build.json delete mode 100644 tsconfig.tsbuildinfo diff --git a/.gitignore b/.gitignore index b947077..f4e2c6d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ node_modules/ dist/ +*.tsbuildinfo diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..0c0fe2f --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,37 @@ +# Repository Guidelines + +## Project Structure & Module Organization +- `src/api/`: Public lookup orchestration (`lookup.ts`). +- `src/rdap/`: RDAP bootstrap, client, and normalization (`bootstrap.ts`, `client.ts`, `normalize.ts`). +- `src/whois/`: WHOIS TCP client, discovery, referral, normalization, catalog. +- `src/lib/`: Shared utilities (`dates.ts`, `async.ts`, `domain.ts`, `text.ts`). +- `src/types.ts`: Public types. `src/index.ts` re-exports API and types. +- Tests: per-module `__tests__/` folders with `*.test.ts` (e.g., `src/lib/__tests__/dates.test.ts`). +- `dist/`: Build output (generated). Do not edit. +- `cli.mjs`: Local CLI for manual checks. + +## Build, Test, and Development Commands +- `npm run build`: Clean and compile with `tsc -p tsconfig.build.json` (excludes tests); outputs to `dist/`. +- `npm test`: Compile tests, then run Node’s test runner on `dist/**/*.test.js`. +- `npm run lint`: Biome format+lint with autofix per `biome.json`. +- Example CLI: `npm run build && node cli.mjs example.com`. + +## Coding Style & Naming Conventions +- TypeScript strict; ES2022 ESM (`tsconfig.json`). +- Biome-enforced: spaces indentation; double quotes; organized imports. +- Filenames: kebab-case for modules (e.g., `normalize-rdap.ts`). +- Identifiers: camelCase; avoid abbreviations; explicit return types for exported functions. + +## Testing Guidelines +- Framework: Node `node:test`. +- Tests live under `src/**/__tests__` and are deterministic/offline by default. +- Smoke tests gated by `SMOKE=1` (e.g., `SMOKE=1 npm test`). +- Run all tests: `npm test`. + +## Commit & Pull Request Guidelines +- Commits: imperative, concise summaries (e.g., “Refactor lookup: tighten error handling”). +- PRs: include what/why, linked issues, and test notes; ensure `npm run lint && npm test` pass. + +## Release & Security Notes +- Publish only `dist/`; `prepublishOnly` runs the build. Tests are excluded via `tsconfig.build.json` and `files` in `package.json`. +- Node >= 18.17 with global `fetch`. WHOIS uses TCP 43; be mindful of registry rate limits. diff --git a/package.json b/package.json index 1cbf14f..7140580 100644 --- a/package.json +++ b/package.json @@ -17,9 +17,11 @@ "dist" ], "scripts": { - "build": "tsc", + "clean": "rm -rf dist", + "build": "npm run clean && tsc -p tsconfig.build.json", "test": "tsc && node --test dist/**/*.test.js", - "lint": "biome check --write" + "lint": "biome check --write", + "prepublishOnly": "npm run build" }, "dependencies": { "psl": "1.15.0" @@ -30,6 +32,7 @@ "@types/psl": "1.1.3", "typescript": "5.9.2" }, + "engines": { "node": ">=18.17" } diff --git a/src/lookup.smoke.test.ts b/src/api/__tests__/lookup.smoke.test.ts similarity index 92% rename from src/lookup.smoke.test.ts rename to src/api/__tests__/lookup.smoke.test.ts index 5e58869..dded28b 100644 --- a/src/lookup.smoke.test.ts +++ b/src/api/__tests__/lookup.smoke.test.ts @@ -1,6 +1,6 @@ import assert from "node:assert/strict"; import test from "node:test"; -import { lookupDomain } from "./lookup.js"; +import { lookupDomain } from "../lookup.js"; // Run only when SMOKE=1 to avoid flakiness and network in CI by default const shouldRun = process.env.SMOKE === "1"; diff --git a/src/lookup.ts b/src/api/lookup.ts similarity index 80% rename from src/lookup.ts rename to src/api/lookup.ts index 41602a1..af99095 100644 --- a/src/lookup.ts +++ b/src/api/lookup.ts @@ -1,13 +1,16 @@ -import { normalizeRdap } from "./normalize-rdap.js"; -import { normalizeWhois } from "./normalize-whois.js"; -import { fetchRdapDomain, getRdapBaseUrlsForTld } from "./rdap.js"; -import type { DomainRecord, LookupOptions, LookupResult } from "./types.js"; -import { getDomainParts, isLikelyDomain, toISO } from "./utils.js"; +import { toISO } from "../lib/dates.js"; +import { getDomainParts, isLikelyDomain } from "../lib/domain.js"; +import { getRdapBaseUrlsForTld } from "../rdap/bootstrap.js"; +import { fetchRdapDomain } from "../rdap/client.js"; +import { normalizeRdap } from "../rdap/normalize.js"; +import type { DomainRecord, LookupOptions, LookupResult } from "../types.js"; +import { WHOIS_TLD_EXCEPTIONS } from "../whois/catalog.js"; +import { whoisQuery } from "../whois/client.js"; import { extractWhoisReferral, ianaWhoisServerForTld, - whoisQuery, -} from "./whois.js"; +} from "../whois/discovery.js"; +import { normalizeWhois } from "../whois/normalize.js"; /** * High-level lookup that prefers RDAP and falls back to WHOIS. @@ -79,11 +82,11 @@ export async function lookupDomain( /no match|not found/i.test(res.text) && opts?.followWhoisReferral !== false ) { - const candidates = [ - `whois.nic.${publicSuffix.toLowerCase()}`, - // Widely used by many second-level public suffix registries - "whois.centralnic.com", - ]; + const candidates: string[] = []; + const ps = publicSuffix.toLowerCase(); + // Prefer explicit exceptions when known + const exception = WHOIS_TLD_EXCEPTIONS[ps]; + if (exception) candidates.push(exception); for (const server of candidates) { try { const alt = await whoisQuery(server, domain, opts); diff --git a/src/index.ts b/src/index.ts index bd8451c..894e703 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,2 +1,2 @@ -export * from "./lookup.js"; -// export * from "./types.js"; +export * from "./api/lookup.js"; +export type * from "./types.js"; diff --git a/src/utils.test.ts b/src/lib/__tests__/dates.test.ts similarity index 66% rename from src/utils.test.ts rename to src/lib/__tests__/dates.test.ts index c6c1677..498c248 100644 --- a/src/utils.test.ts +++ b/src/lib/__tests__/dates.test.ts @@ -1,6 +1,6 @@ import assert from "node:assert/strict"; import test from "node:test"; -import { extractTld, isLikelyDomain, toISO } from "./utils.js"; +import { toISO } from "../dates.js"; test("toISO parses ISO and common whois formats", () => { const iso = toISO("2023-01-02T03:04:05Z"); @@ -20,13 +20,3 @@ test("toISO parses ISO and common whois formats", () => { const mdy = toISO("Jan 02 2023"); assert.equal(mdy, "2023-01-02T00:00:00Z"); }); - -test("extractTld basic", () => { - assert.equal(extractTld("example.com"), "com"); - assert.equal(extractTld("sub.example.co.uk"), "uk"); -}); - -test("isLikelyDomain", () => { - assert.equal(isLikelyDomain("example.com"), true); - assert.equal(isLikelyDomain("not a domain"), false); -}); diff --git a/src/lib/__tests__/domain.test.ts b/src/lib/__tests__/domain.test.ts new file mode 100644 index 0000000..4a4901a --- /dev/null +++ b/src/lib/__tests__/domain.test.ts @@ -0,0 +1,13 @@ +import assert from "node:assert/strict"; +import test from "node:test"; +import { extractTld, isLikelyDomain } from "../domain.js"; + +test("extractTld basic", () => { + assert.equal(extractTld("example.com"), "com"); + assert.equal(extractTld("sub.example.co.uk"), "uk"); +}); + +test("isLikelyDomain", () => { + assert.equal(isLikelyDomain("example.com"), true); + assert.equal(isLikelyDomain("not a domain"), false); +}); diff --git a/src/lib/async.ts b/src/lib/async.ts new file mode 100644 index 0000000..265b49f --- /dev/null +++ b/src/lib/async.ts @@ -0,0 +1,21 @@ +export function withTimeout( + promise: Promise, + timeoutMs: number, + reason = "Timeout", +): Promise { + if (!Number.isFinite(timeoutMs) || timeoutMs <= 0) return promise; + let timer: ReturnType | undefined; + const timeout = new Promise((_, reject) => { + timer = setTimeout(() => reject(new Error(reason)), timeoutMs); + }); + return Promise.race([ + promise.finally(() => { + if (timer !== undefined) clearTimeout(timer); + }), + timeout, + ]); +} + +export function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} diff --git a/src/lib/dates.ts b/src/lib/dates.ts new file mode 100644 index 0000000..716dc3a --- /dev/null +++ b/src/lib/dates.ts @@ -0,0 +1,98 @@ +// Lightweight date parsing helpers to avoid external dependencies. +// We aim to parse common RDAP and WHOIS date representations and return a UTC ISO string. +export function toISO( + dateLike: string | number | Date | undefined | null, +): string | undefined { + if (dateLike == null) return undefined; + if (dateLike instanceof Date) return toIsoFromDate(dateLike); + if (typeof dateLike === "number") return toIsoFromDate(new Date(dateLike)); + const raw = String(dateLike).trim(); + if (!raw) return undefined; + // Try several structured formats seen in WHOIS outputs (treat as UTC when no TZ provided) + const tryFormats = [ + // 2023-01-02 03:04:05Z or without Z + /^(\d{4})-(\d{2})-(\d{2})[ T](\d{2}):(\d{2}):(\d{2})(?:Z)?$/, + // 2023/01/02 03:04:05 + /^(\d{4})\/(\d{2})\/(\d{2})[ T](\d{2}):(\d{2}):(\d{2})$/, + // 02-Jan-2023 + /^(\d{2})-([A-Za-z]{3})-(\d{4})$/, + // Jan 02 2023 + /^([A-Za-z]{3})\s+(\d{1,2})\s+(\d{4})$/, + ]; + for (const re of tryFormats) { + const m = raw.match(re); + if (!m) continue; + const d = parseWithRegex(m, re); + if (d) return toIsoFromDate(d); + } + // Fallback to native Date parsing (handles ISO and RFC2822 with TZ) + const native = new Date(raw); + if (!Number.isNaN(native.getTime())) return toIsoFromDate(native); + return undefined; +} + +function toIsoFromDate(d: Date): string | undefined { + try { + return new Date( + Date.UTC( + d.getUTCFullYear(), + d.getUTCMonth(), + d.getUTCDate(), + d.getUTCHours(), + d.getUTCMinutes(), + d.getUTCSeconds(), + 0, + ), + ) + .toISOString() + .replace(/\.\d{3}Z$/, "Z"); + } catch { + return undefined; + } +} + +function parseWithRegex(m: RegExpMatchArray, _re: RegExp): Date | undefined { + const monthMap: Record = { + jan: 0, + feb: 1, + mar: 2, + apr: 3, + may: 4, + jun: 5, + jul: 6, + aug: 7, + sep: 8, + oct: 9, + nov: 10, + dec: 11, + }; + try { + // If the matched string contains time components, parse as Y-M-D H:M:S + if (m[0].includes(":")) { + const [_, y, mo, d, hh, mm, ss] = m; + return new Date( + Date.UTC( + Number(y), + Number(mo) - 1, + Number(d), + Number(hh), + Number(mm), + Number(ss), + ), + ); + } + // If the matched string contains hyphens, treat as DD-MMM-YYYY + if (m[0].includes("-")) { + const [_, dd, monStr, yyyy] = m; + const mon = monthMap[monStr.toLowerCase()]; + return new Date(Date.UTC(Number(yyyy), mon, Number(dd))); + } + // Otherwise treat as MMM DD YYYY + const [_, monStr, dd, yyyy] = m; + const mon = monthMap[monStr.toLowerCase()]; + return new Date(Date.UTC(Number(yyyy), mon, Number(dd))); + } catch { + // fall through to undefined + } + return undefined; +} diff --git a/src/lib/domain.ts b/src/lib/domain.ts new file mode 100644 index 0000000..a67c3b7 --- /dev/null +++ b/src/lib/domain.ts @@ -0,0 +1,50 @@ +import psl from "psl"; + +export function extractTld(domain: string): string { + const lower = domain.trim().toLowerCase(); + try { + const parsed = psl.parse?.(lower) as { tld?: string }; + const suffix = parsed?.tld; + if (suffix) { + const labels = String(suffix).split(".").filter(Boolean); + if (labels.length) return labels[labels.length - 1]; + } + } catch { + // ignore and fall back + } + const parts = lower.split(".").filter(Boolean); + return parts[parts.length - 1] ?? lower; +} + +export function getDomainParts(domain: string): { + publicSuffix: string; + tld: string; +} { + const lower = domain.toLowerCase().trim(); + let publicSuffix: string | undefined; + try { + const parsed = psl.parse?.(lower) as { tld?: string }; + publicSuffix = parsed?.tld; + } catch { + // ignore + } + if (!publicSuffix) { + const parts = lower.split(".").filter(Boolean); + publicSuffix = parts.length ? parts[parts.length - 1] : lower; + } + const labels = publicSuffix.split(".").filter(Boolean); + const tld = labels.length ? labels[labels.length - 1] : publicSuffix; + return { publicSuffix, tld }; +} + +export function isLikelyDomain(input: string): boolean { + return /^[a-z0-9.-]+$/i.test(input) && input.includes("."); +} + +export function punyToUnicode(domain: string): string { + try { + return domain.normalize("NFC"); + } catch { + return domain; + } +} diff --git a/src/lib/text.ts b/src/lib/text.ts new file mode 100644 index 0000000..bb8319e --- /dev/null +++ b/src/lib/text.ts @@ -0,0 +1,79 @@ +export function uniq(arr: T[] | undefined | null): T[] | undefined { + if (!arr) return undefined; + return Array.from(new Set(arr)); +} + +export function parseKeyValueLines(text: string): Record { + const map = new Map(); + const lines = text.split(/\r?\n/); + let lastKey: string | undefined; + for (const rawLine of lines) { + const line = rawLine.replace(/\s+$/, ""); + if (!line.trim()) continue; + // Bracketed form: [Key] value (common in .jp and some ccTLDs) + const bracket = line.match(/^\s*\[([^\]]+)\]\s*(.*)$/); + if (bracket) { + const key = bracket[1].trim().toLowerCase(); + const value = bracket[2].trim(); + const list = map.get(key) ?? []; + if (value) list.push(value); + map.set(key, list); + lastKey = key; + continue; + } + // Colon form: Key: value + const idx = line.indexOf(":"); + if (idx !== -1) { + const key = line.slice(0, idx).trim().toLowerCase(); + const value = line.slice(idx + 1).trim(); + if (!key) { + lastKey = undefined; + continue; + } + const list = map.get(key) ?? []; + if (value) list.push(value); + map.set(key, list); + lastKey = key; + continue; + } + // Continuation line: starts with indentation after a key appeared + if (lastKey && /^\s+/.test(line)) { + const value = line.trim(); + if (value) { + const list = map.get(lastKey) ?? []; + list.push(value); + map.set(lastKey, list); + } + } + // Otherwise ignore non key-value lines + } + return Object.fromEntries(map); +} + +export function parseCsv(value: string | undefined): string[] | undefined { + if (!value) return undefined; + return value + .split(/[,\s]+/) + .map((s) => s.trim()) + .filter(Boolean); +} + +export function asString(value: unknown): string | undefined { + return typeof value === "string" ? value : undefined; +} + +export function asStringArray(value: unknown): string[] | undefined { + return Array.isArray(value) + ? (value.filter((x) => typeof x === "string") as string[]) + : undefined; +} + +export function asDateLike(value: unknown): string | number | Date | undefined { + if ( + typeof value === "string" || + typeof value === "number" || + value instanceof Date + ) + return value; + return undefined; +} diff --git a/src/normalize-rdap.test.ts b/src/rdap/__tests__/normalize.test.ts similarity index 97% rename from src/normalize-rdap.test.ts rename to src/rdap/__tests__/normalize.test.ts index f228a4b..3ab52f5 100644 --- a/src/normalize-rdap.test.ts +++ b/src/rdap/__tests__/normalize.test.ts @@ -1,6 +1,6 @@ import assert from "node:assert/strict"; import test from "node:test"; -import { normalizeRdap } from "./normalize-rdap.js"; +import { normalizeRdap } from "../normalize.js"; test("normalizeRdap maps registrar, contacts, nameservers, events, dnssec", () => { const rdap = { diff --git a/src/rdap.ts b/src/rdap/bootstrap.ts similarity index 58% rename from src/rdap.ts rename to src/rdap/bootstrap.ts index 56586fa..6215fc0 100644 --- a/src/rdap.ts +++ b/src/rdap/bootstrap.ts @@ -1,6 +1,6 @@ -import { DEFAULT_TIMEOUT_MS } from "./config.js"; -import type { LookupOptions } from "./types.js"; -import { withTimeout } from "./utils.js"; +import { DEFAULT_TIMEOUT_MS } from "../config.js"; +import { withTimeout } from "../lib/async.js"; +import type { LookupOptions } from "../types.js"; // Use global fetch (Node 18+). For large JSON we keep it simple. @@ -48,33 +48,3 @@ export async function getRdapBaseUrlsForTld( } return Array.from(new Set(bases)); } - -/** - * Fetch RDAP JSON for a domain from a specific RDAP base URL. - * Throws on HTTP >= 400 (includes RDAP error JSON payloads). - */ -export async function fetchRdapDomain( - domain: string, - baseUrl: string, - options?: LookupOptions, -): Promise<{ url: string; json: unknown }> { - const url = new URL( - `domain/${encodeURIComponent(domain)}`, - baseUrl, - ).toString(); - const res = await withTimeout( - fetch(url, { - method: "GET", - headers: { accept: "application/rdap+json, application/json" }, - signal: options?.signal, - }), - options?.timeoutMs ?? DEFAULT_TIMEOUT_MS, - "RDAP lookup timeout", - ); - if (!res.ok) { - const bodyText = await res.text(); - throw new Error(`RDAP ${res.status}: ${bodyText.slice(0, 500)}`); - } - const json = await res.json(); - return { url, json }; -} diff --git a/src/rdap/client.ts b/src/rdap/client.ts new file mode 100644 index 0000000..7721f01 --- /dev/null +++ b/src/rdap/client.ts @@ -0,0 +1,35 @@ +import { DEFAULT_TIMEOUT_MS } from "../config.js"; +import { withTimeout } from "../lib/async.js"; +import type { LookupOptions } from "../types.js"; + +// Use global fetch (Node 18+). For large JSON we keep it simple. + +/** + * Fetch RDAP JSON for a domain from a specific RDAP base URL. + * Throws on HTTP >= 400 (includes RDAP error JSON payloads). + */ +export async function fetchRdapDomain( + domain: string, + baseUrl: string, + options?: LookupOptions, +): Promise<{ url: string; json: unknown }> { + const url = new URL( + `domain/${encodeURIComponent(domain)}`, + baseUrl, + ).toString(); + const res = await withTimeout( + fetch(url, { + method: "GET", + headers: { accept: "application/rdap+json, application/json" }, + signal: options?.signal, + }), + options?.timeoutMs ?? DEFAULT_TIMEOUT_MS, + "RDAP lookup timeout", + ); + if (!res.ok) { + const bodyText = await res.text(); + throw new Error(`RDAP ${res.status}: ${bodyText.slice(0, 500)}`); + } + const json = await res.json(); + return { url, json }; +} diff --git a/src/normalize-rdap.ts b/src/rdap/normalize.ts similarity index 98% rename from src/normalize-rdap.ts rename to src/rdap/normalize.ts index 6951d00..1b228db 100644 --- a/src/normalize-rdap.ts +++ b/src/rdap/normalize.ts @@ -1,10 +1,11 @@ +import { toISO } from "../lib/dates.js"; +import { asDateLike, asString, asStringArray, uniq } from "../lib/text.js"; import type { Contact, DomainRecord, Nameserver, RegistrarInfo, -} from "./types.js"; -import { asDateLike, asString, asStringArray, toISO, uniq } from "./utils.js"; +} from "../types.js"; type RdapDoc = Record; diff --git a/src/types.d.ts b/src/types.ts similarity index 92% rename from src/types.d.ts rename to src/types.ts index ef9ac88..1339615 100644 --- a/src/types.d.ts +++ b/src/types.ts @@ -84,6 +84,9 @@ export interface LookupOptions { whoisOnly?: boolean; // don't attempt RDAP followWhoisReferral?: boolean; // follow referral server (default true) customBootstrapUrl?: string; // override IANA bootstrap + // WHOIS discovery and query tuning + whoisHints?: Record; // override/add authoritative WHOIS per TLD + maxWhoisHops?: number; // max referral hops to follow (default 2) signal?: AbortSignal; } diff --git a/src/utils.ts b/src/utils.ts deleted file mode 100644 index 340cb39..0000000 --- a/src/utils.ts +++ /dev/null @@ -1,248 +0,0 @@ -import psl from "psl"; - -// Lightweight date parsing helpers to avoid external dependencies. -// We aim to parse common RDAP and WHOIS date representations and return a UTC ISO string. -export function toISO( - dateLike: string | number | Date | undefined | null, -): string | undefined { - if (dateLike == null) return undefined; - if (dateLike instanceof Date) return toIsoFromDate(dateLike); - if (typeof dateLike === "number") return toIsoFromDate(new Date(dateLike)); - const raw = String(dateLike).trim(); - if (!raw) return undefined; - // Try several structured formats seen in WHOIS outputs (treat as UTC when no TZ provided) - const tryFormats = [ - // 2023-01-02 03:04:05Z or without Z - /^(\d{4})-(\d{2})-(\d{2})[ T](\d{2}):(\d{2}):(\d{2})(?:Z)?$/, - // 2023/01/02 03:04:05 - /^(\d{4})\/(\d{2})\/(\d{2})[ T](\d{2}):(\d{2}):(\d{2})$/, - // 02-Jan-2023 - /^(\d{2})-([A-Za-z]{3})-(\d{4})$/, - // Jan 02 2023 - /^([A-Za-z]{3})\s+(\d{1,2})\s+(\d{4})$/, - ]; - for (const re of tryFormats) { - const m = raw.match(re); - if (!m) continue; - const d = parseWithRegex(m, re); - if (d) return toIsoFromDate(d); - } - // Fallback to native Date parsing (handles ISO and RFC2822 with TZ) - const native = new Date(raw); - if (!Number.isNaN(native.getTime())) return toIsoFromDate(native); - return undefined; -} - -function toIsoFromDate(d: Date): string | undefined { - try { - return new Date( - Date.UTC( - d.getUTCFullYear(), - d.getUTCMonth(), - d.getUTCDate(), - d.getUTCHours(), - d.getUTCMinutes(), - d.getUTCSeconds(), - 0, - ), - ) - .toISOString() - .replace(/\.\d{3}Z$/, "Z"); - } catch { - return undefined; - } -} - -function parseWithRegex(m: RegExpMatchArray, _re: RegExp): Date | undefined { - const monthMap: Record = { - jan: 0, - feb: 1, - mar: 2, - apr: 3, - may: 4, - jun: 5, - jul: 6, - aug: 7, - sep: 8, - oct: 9, - nov: 10, - dec: 11, - }; - try { - // If the matched string contains time components, parse as Y-M-D H:M:S - if (m[0].includes(":")) { - const [_, y, mo, d, hh, mm, ss] = m; - return new Date( - Date.UTC( - Number(y), - Number(mo) - 1, - Number(d), - Number(hh), - Number(mm), - Number(ss), - ), - ); - } - // If the matched string contains hyphens, treat as DD-MMM-YYYY - if (m[0].includes("-")) { - const [_, dd, monStr, yyyy] = m; - const mon = monthMap[monStr.toLowerCase()]; - return new Date(Date.UTC(Number(yyyy), mon, Number(dd))); - } - // Otherwise treat as MMM DD YYYY - const [_, monStr, dd, yyyy] = m; - const mon = monthMap[monStr.toLowerCase()]; - return new Date(Date.UTC(Number(yyyy), mon, Number(dd))); - } catch { - // fall through to undefined - } - return undefined; -} - -export function uniq(arr: T[] | undefined | null): T[] | undefined { - if (!arr) return undefined; - return Array.from(new Set(arr)); -} - -export function parseKeyValueLines(text: string): Record { - const map = new Map(); - const lines = text.split(/\r?\n/); - let lastKey: string | undefined; - for (const rawLine of lines) { - const line = rawLine.replace(/\s+$/, ""); - if (!line.trim()) continue; - // Bracketed form: [Key] value (common in .jp and some ccTLDs) - const bracket = line.match(/^\s*\[([^\]]+)\]\s*(.*)$/); - if (bracket) { - const key = bracket[1].trim().toLowerCase(); - const value = bracket[2].trim(); - const list = map.get(key) ?? []; - if (value) list.push(value); - map.set(key, list); - lastKey = key; - continue; - } - // Colon form: Key: value - const idx = line.indexOf(":"); - if (idx !== -1) { - const key = line.slice(0, idx).trim().toLowerCase(); - const value = line.slice(idx + 1).trim(); - if (!key) { - lastKey = undefined; - continue; - } - const list = map.get(key) ?? []; - if (value) list.push(value); - map.set(key, list); - lastKey = key; - continue; - } - // Continuation line: starts with indentation after a key appeared - if (lastKey && /^\s+/.test(line)) { - const value = line.trim(); - if (value) { - const list = map.get(lastKey) ?? []; - list.push(value); - map.set(lastKey, list); - } - } - // Otherwise ignore non key-value lines - } - return Object.fromEntries(map); -} - -export function parseCsv(value: string | undefined): string[] | undefined { - if (!value) return undefined; - return value - .split(/[,\s]+/) - .map((s) => s.trim()) - .filter(Boolean); -} - -export function punyToUnicode(domain: string): string { - try { - return domain.normalize("NFC"); - } catch { - return domain; - } -} - -export function withTimeout( - promise: Promise, - timeoutMs: number, - reason = "Timeout", -): Promise { - if (!Number.isFinite(timeoutMs) || timeoutMs <= 0) return promise; - let timer: ReturnType | undefined; - const timeout = new Promise((_, reject) => { - timer = setTimeout(() => reject(new Error(reason)), timeoutMs); - }); - return Promise.race([ - promise.finally(() => { - if (timer !== undefined) clearTimeout(timer); - }), - timeout, - ]); -} - -export function sleep(ms: number): Promise { - return new Promise((resolve) => setTimeout(resolve, ms)); -} - -export function extractTld(domain: string): string { - const lower = domain.trim().toLowerCase(); - try { - const parsed = psl.parse?.(lower) as { tld?: string }; - const suffix = parsed?.tld; - if (suffix) { - const labels = String(suffix).split(".").filter(Boolean); - if (labels.length) return labels[labels.length - 1]; - } - } catch { - // ignore and fall back - } - const parts = lower.split(".").filter(Boolean); - return parts[parts.length - 1] ?? lower; -} - -export function getDomainParts(domain: string): { publicSuffix: string; tld: string } { - const lower = domain.toLowerCase().trim(); - let publicSuffix: string | undefined; - try { - const parsed = psl.parse?.(lower) as { tld?: string }; - publicSuffix = parsed?.tld; - } catch { - // ignore - } - if (!publicSuffix) { - const parts = lower.split(".").filter(Boolean); - publicSuffix = parts.length ? parts[parts.length - 1] : lower; - } - const labels = publicSuffix.split(".").filter(Boolean); - const tld = labels.length ? labels[labels.length - 1] : publicSuffix; - return { publicSuffix, tld }; -} - -export function isLikelyDomain(input: string): boolean { - return /^[a-z0-9.-]+$/i.test(input) && input.includes("."); -} - -export function asString(value: unknown): string | undefined { - return typeof value === "string" ? value : undefined; -} - -export function asStringArray(value: unknown): string[] | undefined { - return Array.isArray(value) - ? (value.filter((x) => typeof x === "string") as string[]) - : undefined; -} - -export function asDateLike(value: unknown): string | number | Date | undefined { - if ( - typeof value === "string" || - typeof value === "number" || - value instanceof Date - ) - return value; - return undefined; -} diff --git a/src/whois.ts b/src/whois.ts deleted file mode 100644 index c63d09f..0000000 --- a/src/whois.ts +++ /dev/null @@ -1,114 +0,0 @@ -import { createConnection } from "node:net"; -import { DEFAULT_TIMEOUT_MS } from "./config.js"; -import type { LookupOptions } from "./types.js"; -import { withTimeout } from "./utils.js"; - -export interface WhoisQueryResult { - serverQueried: string; - text: string; -} - -/** - * Perform a WHOIS query against an RFC 3912 server over TCP 43. - * Returns the raw text and the server used. - */ -export async function whoisQuery( - server: string, - query: string, - options?: LookupOptions, -): Promise { - const timeoutMs = options?.timeoutMs ?? DEFAULT_TIMEOUT_MS; - const port = 43; - const host = server.replace(/^whois:\/\//i, ""); - const text = await withTimeout( - queryTcp(host, port, query, options), - timeoutMs, - "WHOIS timeout", - ); - return { serverQueried: server, text }; -} - -// Low-level WHOIS TCP client. Some registries require CRLF after the domain query. -function queryTcp( - host: string, - port: number, - query: string, - options?: LookupOptions, -): Promise { - return new Promise((resolve, reject) => { - const socket = createConnection({ host, port }); - let data = ""; - let done = false; - const cleanup = () => { - if (done) return; - done = true; - socket.destroy(); - }; - socket.setTimeout((options?.timeoutMs ?? DEFAULT_TIMEOUT_MS) - 1000, () => { - cleanup(); - reject(new Error("WHOIS socket timeout")); - }); - socket.on("error", (err) => { - cleanup(); - reject(err); - }); - socket.on("data", (chunk) => { - data += chunk.toString("utf8"); - }); - socket.on("end", () => { - cleanup(); - resolve(data); - }); - socket.on("connect", () => { - socket.write(`${query}\r\n`); - }); - }); -} - -/** - * Best-effort discovery of the authoritative WHOIS server for a TLD via IANA root DB. - */ -export async function ianaWhoisServerForTld( - tld: string, - options?: LookupOptions, -): Promise { - const EXCEPTIONS: Record = { - com: "whois.verisign-grs.com", - net: "whois.verisign-grs.com", - org: "whois.pir.org", - }; - const url = `https://www.iana.org/domains/root/db/${encodeURIComponent(tld)}.html`; - try { - const res = await withTimeout( - fetch(url, { method: "GET" }), - options?.timeoutMs ?? DEFAULT_TIMEOUT_MS, - ); - if (!res.ok) return undefined; - const html = await res.text(); - const m = - html.match(/Whois Server:\s*]*>([^<]+)<\/a>/i) || - html.match(/Whois Server:\s*([^<\n]+)/i); - const server = m?.[1]?.trim(); - if (!server) - return EXCEPTIONS[tld.toLowerCase()] ?? `whois.nic.${tld.toLowerCase()}`; - return server.replace(/^https?:\/\//i, "").replace(/\/$/, ""); - } catch { - return EXCEPTIONS[tld.toLowerCase()] ?? `whois.nic.${tld.toLowerCase()}`; - } -} - -/** - * Extract registrar referral WHOIS server from a WHOIS response, if present. - */ -export function extractWhoisReferral(text: string): string | undefined { - const patterns = [ - /^Registrar WHOIS Server:\s*(.+)$/im, - /^Whois Server:\s*(.+)$/im, - /^ReferralServer:\s*whois:\/\/(.+)$/im, - ]; - for (const re of patterns) { - const m = text.match(re); - if (m?.[1]) return m[1].trim(); - } - return undefined; -} diff --git a/src/normalize-whois.test.ts b/src/whois/__tests__/normalize.test.ts similarity index 91% rename from src/normalize-whois.test.ts rename to src/whois/__tests__/normalize.test.ts index 81fa07d..da795dd 100644 --- a/src/normalize-whois.test.ts +++ b/src/whois/__tests__/normalize.test.ts @@ -1,18 +1,6 @@ import assert from "node:assert/strict"; import test from "node:test"; -import { normalizeWhois } from "./normalize-whois.js"; - -function _runCase(label: string, tld: string, text: string) { - const rec = normalizeWhois( - `example.${tld}`, - tld, - text, - "whois.test", - "2025-01-01T00:00:00Z", - ); - assert.equal(rec.tld, tld, label); - assert.equal(rec.source, "whois"); -} +import { normalizeWhois } from "../normalize.js"; test("WHOIS .de (DENIC-like) nserver lines", () => { const text = ` diff --git a/src/whois/catalog.ts b/src/whois/catalog.ts new file mode 100644 index 0000000..4befb1d --- /dev/null +++ b/src/whois/catalog.ts @@ -0,0 +1,60 @@ +// Centralized WHOIS data catalog. +// - tldExceptions: curated non-standard authoritative WHOIS servers by TLD +// - centralnicZones: known CentralNic-operated second-level public suffix zones + +export const WHOIS_CATALOG = { + tldExceptions: { + // gTLDs + com: "whois.verisign-grs.com", + net: "whois.verisign-grs.com", + org: "whois.pir.org", + info: "whois.afilias.net", + biz: "whois.nic.biz", + edu: "whois.educause.edu", + gov: "whois.dotgov.gov", + + // ccTLDs and others + de: "whois.denic.de", + jp: "whois.jprs.jp", + fr: "whois.nic.fr", + it: "whois.nic.it", + pl: "whois.dns.pl", + nl: "whois.domain-registry.nl", + be: "whois.dns.be", + se: "whois.iis.se", + no: "whois.norid.no", + fi: "whois.fi", + cz: "whois.nic.cz", + es: "whois.nic.es", + br: "whois.registro.br", + ca: "whois.cira.ca", + dk: "whois.dk-hostmaster.dk", + hk: "whois.hkirc.hk", + sg: "whois.sgnic.sg", + in: "whois.registry.in", + nz: "whois.srs.net.nz", + ch: "whois.nic.ch", + li: "whois.nic.li", + io: "whois.nic.io", + ai: "whois.nic.ai", + ru: "whois.tcinet.ru", + su: "whois.tcinet.ru", + "xn--p1ai": "whois.tcinet.ru", // .рф + + // CentralNic-operated second-level zones (treat as exceptions here for simplicity) + "uk.com": "whois.centralnic.com", + "uk.net": "whois.centralnic.com", + "gb.com": "whois.centralnic.com", + "gb.net": "whois.centralnic.com", + "eu.com": "whois.centralnic.com", + "us.com": "whois.centralnic.com", + "se.com": "whois.centralnic.com", + "de.com": "whois.centralnic.com", + "br.com": "whois.centralnic.com", + "ru.com": "whois.centralnic.com", + "cn.com": "whois.centralnic.com", + "sa.com": "whois.centralnic.com", + } as Record, +} as const; + +export const WHOIS_TLD_EXCEPTIONS = WHOIS_CATALOG.tldExceptions; diff --git a/src/whois/client.ts b/src/whois/client.ts new file mode 100644 index 0000000..e661f3d --- /dev/null +++ b/src/whois/client.ts @@ -0,0 +1,66 @@ +import { createConnection } from "node:net"; +import { DEFAULT_TIMEOUT_MS } from "../config.js"; +import { withTimeout } from "../lib/async.js"; +import type { LookupOptions } from "../types.js"; + +export interface WhoisQueryResult { + serverQueried: string; + text: string; +} + +/** + * Perform a WHOIS query against an RFC 3912 server over TCP 43. + * Returns the raw text and the server used. + */ +export async function whoisQuery( + server: string, + query: string, + options?: LookupOptions, +): Promise { + const timeoutMs = options?.timeoutMs ?? DEFAULT_TIMEOUT_MS; + const port = 43; + const host = server.replace(/^whois:\/\//i, ""); + const text = await withTimeout( + queryTcp(host, port, query, options), + timeoutMs, + "WHOIS timeout", + ); + return { serverQueried: server, text }; +} + +// Low-level WHOIS TCP client. Some registries require CRLF after the domain query. +function queryTcp( + host: string, + port: number, + query: string, + options?: LookupOptions, +): Promise { + return new Promise((resolve, reject) => { + const socket = createConnection({ host, port }); + let data = ""; + let done = false; + const cleanup = () => { + if (done) return; + done = true; + socket.destroy(); + }; + socket.setTimeout((options?.timeoutMs ?? DEFAULT_TIMEOUT_MS) - 1000, () => { + cleanup(); + reject(new Error("WHOIS socket timeout")); + }); + socket.on("error", (err) => { + cleanup(); + reject(err); + }); + socket.on("data", (chunk) => { + data += chunk.toString("utf8"); + }); + socket.on("end", () => { + cleanup(); + resolve(data); + }); + socket.on("connect", () => { + socket.write(`${query}\r\n`); + }); + }); +} diff --git a/src/whois/discovery.ts b/src/whois/discovery.ts new file mode 100644 index 0000000..ecb8100 --- /dev/null +++ b/src/whois/discovery.ts @@ -0,0 +1,56 @@ +import type { LookupOptions } from "../types.js"; +import { WHOIS_TLD_EXCEPTIONS } from "./catalog.js"; +import { whoisQuery } from "./client.js"; + +/** + * Best-effort discovery of the authoritative WHOIS server for a TLD via IANA root DB. + */ +export async function ianaWhoisServerForTld( + tld: string, + options?: LookupOptions, +): Promise { + const key = tld.toLowerCase(); + // 1) Explicit hint override + const hint = options?.whoisHints?.[key]; + if (hint) return normalizeServer(hint); + + // 2) IANA WHOIS authoritative discovery over TCP 43 + try { + const res = await whoisQuery("whois.iana.org", key, options); + const txt = res.text; + const m = + txt.match(/^whois:\s*(\S+)/im) || + txt.match(/^refer:\s*(\S+)/im) || + txt.match(/^whois server:\s*(\S+)/im); + const server = m?.[1]; + if (server) return normalizeServer(server); + } catch { + // fallthrough to exceptions/guess + } + + // 3) Curated exceptions + const exception = WHOIS_TLD_EXCEPTIONS[key]; + if (exception) return normalizeServer(exception); + + return undefined; +} + +/** + * Extract registrar referral WHOIS server from a WHOIS response, if present. + */ +export function extractWhoisReferral(text: string): string | undefined { + const patterns = [ + /^Registrar WHOIS Server:\s*(.+)$/im, + /^Whois Server:\s*(.+)$/im, + /^ReferralServer:\s*whois:\/\/(.+)$/im, + ]; + for (const re of patterns) { + const m = text.match(re); + if (m?.[1]) return m[1].trim(); + } + return undefined; +} + +function normalizeServer(server: string): string { + return server.replace(/^whois:\/\//i, "").replace(/\/$/, ""); +} diff --git a/src/normalize-whois.ts b/src/whois/normalize.ts similarity index 98% rename from src/normalize-whois.ts rename to src/whois/normalize.ts index 38f7237..36661e9 100644 --- a/src/normalize-whois.ts +++ b/src/whois/normalize.ts @@ -1,10 +1,11 @@ +import { toISO } from "../lib/dates.js"; +import { parseKeyValueLines, uniq } from "../lib/text.js"; import type { Contact, DomainRecord, Nameserver, RegistrarInfo, -} from "./types.js"; -import { parseKeyValueLines, toISO, uniq } from "./utils.js"; +} from "../types.js"; /** * Convert raw WHOIS text into our normalized DomainRecord. diff --git a/tsconfig.build.json b/tsconfig.build.json new file mode 100644 index 0000000..a9404d7 --- /dev/null +++ b/tsconfig.build.json @@ -0,0 +1,12 @@ +{ + "$schema": "https://json.schemastore.org/tsconfig", + "extends": "./tsconfig.json", + "compilerOptions": { + "outDir": "dist", + "declaration": true, + "declarationMap": true, + "sourceMap": true + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "dist", "src/**/*.test.ts", "src/**/__tests__/**"] +} diff --git a/tsconfig.tsbuildinfo b/tsconfig.tsbuildinfo deleted file mode 100644 index 23fb83c..0000000 --- a/tsconfig.tsbuildinfo +++ /dev/null @@ -1 +0,0 @@ -{"root":["./src/config.ts","./src/index.ts","./src/lookup.smoke.test.ts","./src/lookup.ts","./src/normalize-rdap.test.ts","./src/normalize-rdap.ts","./src/normalize-whois.test.ts","./src/normalize-whois.ts","./src/rdap.ts","./src/types.d.ts","./src/utils.test.ts","./src/utils.ts","./src/whois.ts"],"version":"5.9.2"} \ No newline at end of file