1
mirror of https://github.com/jakejarvis/domainstack.io.git synced 2025-12-02 19:33:48 -05:00

refactor: streamline domain handling by consolidating normalization and registrability checks

This commit is contained in:
2025-11-24 17:55:01 -05:00
parent 5e3fa8c2e8
commit eb7c47af72
6 changed files with 122 additions and 91 deletions

View File

@@ -3,7 +3,6 @@ import type { Metadata } from "next";
import { notFound, redirect } from "next/navigation";
import { DomainReportView } from "@/components/domain/domain-report-view";
import { analytics } from "@/lib/analytics/server";
import { normalizeDomainInput } from "@/lib/domain";
import { toRegistrableDomain } from "@/lib/domain-server";
import { getQueryClient, trpc } from "@/trpc/server";
@@ -17,10 +16,9 @@ export async function generateMetadata({
}): Promise<Metadata> {
const { domain: raw } = await params;
const decoded = decodeURIComponent(raw);
const normalized = normalizeDomainInput(decoded);
const isRegistrable = toRegistrableDomain(normalized);
if (!isRegistrable) {
const registrable = toRegistrableDomain(decoded);
if (!registrable) {
// workaround, should match metadata from not-found.tsx
return {
title: "Not Found",
@@ -30,11 +28,11 @@ export async function generateMetadata({
return {
title: {
absolute: `${normalized} — Domain Report`,
absolute: `${registrable} — Domain Report`,
},
description: `Domainstack report for ${normalized}: WHOIS lookup, DNS & SSL scan, HTTP headers, hosting & email provider data, and SEO metadata.`,
description: `Domainstack report for ${registrable}: WHOIS lookup, DNS & SSL scan, HTTP headers, hosting & email provider data, and SEO metadata.`,
alternates: {
canonical: `/${normalized}`,
canonical: `/${registrable}`,
},
};
}
@@ -46,30 +44,29 @@ export default async function DomainPage({
}) {
const { domain: raw } = await params;
const decoded = decodeURIComponent(raw);
const normalized = normalizeDomainInput(decoded);
const isRegistrable = toRegistrableDomain(normalized);
if (!isRegistrable) notFound();
const registrable = toRegistrableDomain(decoded);
if (!registrable) notFound();
// Canonicalize URL to the normalized domain (middleware should already handle most cases)
if (normalized !== decoded) {
redirect(`/${encodeURIComponent(normalized)}`);
// Canonicalize URL to the registrable domain (middleware should already handle most cases)
if (registrable !== decoded) {
redirect(`/${encodeURIComponent(registrable)}`);
}
// Track server-side page view
analytics.track("report_viewed", { domain: normalized });
analytics.track("report_viewed", { domain: registrable });
// Minimal prefetch: registration only, let sections stream progressively
// Use getQueryClient() to ensure consistent query client across the request
const queryClient = getQueryClient();
void queryClient.prefetchQuery(
trpc.domain.getRegistration.queryOptions({ domain: normalized }),
trpc.domain.getRegistration.queryOptions({ domain: registrable }),
);
return (
<div className="container mx-auto max-w-4xl px-4 py-6">
<HydrationBoundary state={dehydrate(queryClient)}>
<DomainReportView domain={normalized} />
<DomainReportView domain={registrable} />
</HydrationBoundary>
</div>
);

View File

@@ -1,14 +1,21 @@
import { toRegistrableDomain as toRegistrableDomainRdapper } from "rdapper";
import { cache } from "react";
import { BLACKLISTED_SUFFIXES } from "@/lib/constants/domain-validation";
import { normalizeDomainInput } from "@/lib/domain";
// A simple wrapper around rdapper's toRegistrableDomain that:
// 1. is cached for per-request deduplication
// 2. checks if the domain is blacklisted by BLACKLISTED_SUFFIXES in constants/domain-validation.ts
// A wrapper around rdapper's toRegistrableDomain that:
// 1. normalizes user input (strips schemes, paths, ports, auth, www., etc.)
// 2. is cached for per-request deduplication
// 3. checks if the domain is blacklisted by BLACKLISTED_SUFFIXES in constants/domain-validation.ts
export const toRegistrableDomain = cache(function toRegistrableDomain(
input: string,
): string | null {
const value = (input ?? "").trim().toLowerCase();
// First normalize the input to extract a clean hostname
// This handles user input with schemes, paths, ports, auth, trailing dots, www., etc.
const normalized = normalizeDomainInput(input);
if (!normalized) return null;
const value = normalized.trim().toLowerCase();
if (value === "") return null;
// Shortcut: exact suffixes such as ".css.map" that frequently appear

View File

@@ -24,6 +24,54 @@ describe("normalizeDomainInput", () => {
"ex-ample.com",
);
});
it("handles malformed protocols (single slash)", () => {
expect(normalizeDomainInput("http:/example.com")).toBe("example.com");
});
it("handles malformed protocols (triple slash)", () => {
expect(normalizeDomainInput("http:///example.com")).toBe("example.com");
});
it("handles malformed protocols (multiple colons)", () => {
expect(normalizeDomainInput("https:::example.com/path")).toBe(
"example.com",
);
});
it("rejects IPv6 literals", () => {
expect(normalizeDomainInput("[::1]")).toBe("");
expect(normalizeDomainInput("[::1]:8080")).toBe("");
expect(normalizeDomainInput("http://[2001:db8::1]/path")).toBe("");
});
it("handles spaces and whitespace", () => {
expect(normalizeDomainInput(" example.com ")).toBe("example.com");
expect(normalizeDomainInput("example.com /path")).toBe("example.com");
});
it("strips www from subdomains", () => {
expect(normalizeDomainInput("www.example.com")).toBe("example.com");
expect(normalizeDomainInput("WWW.EXAMPLE.COM")).toBe("example.com");
});
it("preserves non-www subdomains", () => {
expect(normalizeDomainInput("api.example.com")).toBe("api.example.com");
expect(normalizeDomainInput("sub.domain.example.com")).toBe(
"sub.domain.example.com",
);
});
it("handles query parameters and fragments", () => {
expect(normalizeDomainInput("example.com?query=value")).toBe("example.com");
expect(normalizeDomainInput("example.com#fragment")).toBe("example.com");
expect(normalizeDomainInput("example.com?q=1#frag")).toBe("example.com");
});
it("returns empty string for empty input", () => {
expect(normalizeDomainInput("")).toBe("");
expect(normalizeDomainInput(" ")).toBe("");
});
});
describe("isValidDomain", () => {

View File

@@ -1,52 +1,79 @@
// Utilities for handling user-provided domain input
// Matches beginning "http:" or "https:" followed by any number of slashes/colons
// Captures the authority (host + userinfo + port)
// This handles malformed protocols like "http:/example.com" or "http:///example.com"
const SCHEME_PREFIX_REGEX = /^https?[:/]+([^/]+)/i;
/**
* Normalize arbitrary user input into a bare registrable domain string.
* Normalize arbitrary user input into a bare hostname string.
* Accepts values like:
* - "example.com"
* - "www.example.com."
* - "https://example.com/path?x#y"
* - "http://user:pass@example.com:8080/"
* - "http:/example.com" (malformed protocol)
* - " EXAMPLE.COM "
* Returns a lowercased hostname without scheme, path, auth, port, or trailing dot.
* Returns a lowercased hostname without scheme, path, auth, port, trailing dot, or www. prefix.
* Returns empty string for invalid/unparseable input or IPv6 literals.
*/
export function normalizeDomainInput(input: string): string {
let value = (input ?? "").trim();
if (value === "") return "";
// If it looks like a URL (has a scheme), use URL parsing
const hasScheme = /:\/\//.test(value);
if (hasScheme) {
// Reject IPv6 literals early (e.g., "[::1]", "[::1]:8080")
// These are not supported and would cause issues in URL parsing
if (value.includes("[") || value.includes("]")) {
return "";
}
// Try to extract authority (host) from scheme-prefixed input
// This handles both valid and malformed protocols
const schemeMatch = value.match(SCHEME_PREFIX_REGEX);
if (schemeMatch) {
// Extract authority from the scheme match
value = schemeMatch[1];
} else if (/:\/\//.test(value)) {
// Has scheme-like pattern but didn't match our regex (e.g., "fake+scheme://...")
// Try URL parsing first
try {
const url = new URL(value);
// URL applies IDNA (punycode) and strips auth/port/path for hostname
value = url.hostname;
} catch {
// If invalid URL with scheme, strip leading scheme-like prefix manually
// Fallback: strip scheme-like prefix manually
value = value.replace(/^\w+:\/\//, "");
// Remove credentials if present
value = value.replace(/^[^@]+@/, "");
// Remove path/query/fragment
value = value.split("/")[0].split("?")[0].split("#")[0];
}
} else {
// No scheme: try URL parsing with implicit http:// to get punycoded hostname
// No scheme detected: try URL parsing with implicit http:// to get punycoded hostname
try {
const url = new URL(`http://${value}`);
value = url.hostname;
} catch {
// Fallback: remove any credentials, path, query, or fragment accidentally included
value = value.replace(/^[^@]+@/, "");
value = value.split("/")[0].split("?")[0].split("#")[0];
// Fallback: treat as raw authority and parse manually
}
}
// Strip port if present
value = value.replace(/:\d+$/, "");
// Strip query and fragment (in case they weren't already removed)
value = value.split(/[?#]/)[0];
// Strip User Info (credentials)
const atIndex = value.lastIndexOf("@");
if (atIndex !== -1) {
value = value.slice(atIndex + 1);
}
// Strip port
value = value.split(":")[0];
// Remove any path components that might remain
value = value.split("/")[0];
// Strip trailing dot
value = value.replace(/\.$/, "");
// Trim any remaining whitespace
value = value.trim();
// Remove common leading www.
value = value.replace(/^www\./i, "");
@@ -54,7 +81,7 @@ export function normalizeDomainInput(input: string): string {
}
/**
* Basic domain validity check (hostname-like), not performing DNS or RDAP.
* An even more basic domain validity check (hostname-like), not performing DNS or RDAP.
*/
export function isValidDomain(value: string): boolean {
const v = (value ?? "").trim();

View File

@@ -2,10 +2,6 @@ import type { NextRequest } from "next/server";
import { NextResponse } from "next/server";
import { toRegistrableDomain } from "@/lib/domain-server";
// Matches beginning "http:" or "https:" followed by any number of slashes/colons
// Captures the authority (host + userinfo + port)
export const SCHEME_PREFIX_REGEX = /^https?[:/]+([^/]+)/i;
export type ProxyAction =
| { type: "match" }
| { type: "redirect"; destination: string }
@@ -40,57 +36,15 @@ export function getProxyAction(path: string): ProxyAction {
// ignore decoding failures
}
let candidate = decodedInput;
// 3. Extract authority (host) candidate
// If scheme present, extract authority from it.
// Otherwise, treat the whole string as potential authority start.
const schemeMatch = candidate.match(SCHEME_PREFIX_REGEX);
let authority = schemeMatch ? schemeMatch[1] : candidate;
// 4. Cleanup: Strip query, fragment, path (if not already stripped by regex)
// Note: Regex above stops at first slash, so path is already gone if scheme matched.
// If scheme didn't match, we manually strip path.
if (!schemeMatch) {
authority = authority.split("/")[0];
}
// Strip query and fragment (order doesn't matter as we take the first occurrence of either)
authority = authority.split(/[?#]/)[0];
authority = authority.trim();
// 5. Strip User Info
const atIndex = authority.lastIndexOf("@");
if (atIndex !== -1) {
authority = authority.slice(atIndex + 1);
}
// 6. Strip Port
// IPv6 literals in brackets (e.g. [::1]) are not supported.
if (authority.includes("[") || authority.includes("]")) {
return null;
}
// Safe to split on colon as valid domains don't contain colons
authority = authority.split(":")[0];
candidate = authority.trim();
if (!candidate) {
return null;
}
// 7. Validate and Normalize
// This will return null for invalid domains, including IPs if rdapper handles them as such.
const registrable = toRegistrableDomain(candidate);
// 3. Validate and extract the registrable domain
const registrable = toRegistrableDomain(decodedInput);
if (!registrable) {
return null;
}
// 8. Redirect if necessary
// 4. Redirect if necessary
// We compare the originally decoded input against the final canonical domain.
// Any difference (path, query, scheme, case, whitespace, userinfo, port) triggers a redirect.
// Any difference (path, query, scheme, case, whitespace, userinfo, port, subdomain) triggers a redirect.
if (decodedInput !== registrable) {
return {
type: "redirect",

View File

@@ -1,6 +1,5 @@
import { TRPCError } from "@trpc/server";
import z from "zod";
import { normalizeDomainInput } from "@/lib/domain";
import { toRegistrableDomain } from "@/lib/domain-server";
import {
BlobUrlResponseSchema,
@@ -30,8 +29,7 @@ import {
const DomainInputSchema = z
.object({ domain: z.string().min(1) })
.transform(({ domain }) => {
const normalized = normalizeDomainInput(domain);
const registrable = toRegistrableDomain(normalized);
const registrable = toRegistrableDomain(domain);
if (!registrable) {
throw new TRPCError({
code: "BAD_REQUEST",