You've already forked domainstack.io
mirror of
https://github.com/jakejarvis/domainstack.io.git
synced 2025-12-02 19:33:48 -05:00
refactor: streamline domain handling by consolidating normalization and registrability checks
This commit is contained in:
@@ -3,7 +3,6 @@ import type { Metadata } from "next";
|
||||
import { notFound, redirect } from "next/navigation";
|
||||
import { DomainReportView } from "@/components/domain/domain-report-view";
|
||||
import { analytics } from "@/lib/analytics/server";
|
||||
import { normalizeDomainInput } from "@/lib/domain";
|
||||
import { toRegistrableDomain } from "@/lib/domain-server";
|
||||
import { getQueryClient, trpc } from "@/trpc/server";
|
||||
|
||||
@@ -17,10 +16,9 @@ export async function generateMetadata({
|
||||
}): Promise<Metadata> {
|
||||
const { domain: raw } = await params;
|
||||
const decoded = decodeURIComponent(raw);
|
||||
const normalized = normalizeDomainInput(decoded);
|
||||
|
||||
const isRegistrable = toRegistrableDomain(normalized);
|
||||
if (!isRegistrable) {
|
||||
const registrable = toRegistrableDomain(decoded);
|
||||
if (!registrable) {
|
||||
// workaround, should match metadata from not-found.tsx
|
||||
return {
|
||||
title: "Not Found",
|
||||
@@ -30,11 +28,11 @@ export async function generateMetadata({
|
||||
|
||||
return {
|
||||
title: {
|
||||
absolute: `${normalized} — Domain Report`,
|
||||
absolute: `${registrable} — Domain Report`,
|
||||
},
|
||||
description: `Domainstack report for ${normalized}: WHOIS lookup, DNS & SSL scan, HTTP headers, hosting & email provider data, and SEO metadata.`,
|
||||
description: `Domainstack report for ${registrable}: WHOIS lookup, DNS & SSL scan, HTTP headers, hosting & email provider data, and SEO metadata.`,
|
||||
alternates: {
|
||||
canonical: `/${normalized}`,
|
||||
canonical: `/${registrable}`,
|
||||
},
|
||||
};
|
||||
}
|
||||
@@ -46,30 +44,29 @@ export default async function DomainPage({
|
||||
}) {
|
||||
const { domain: raw } = await params;
|
||||
const decoded = decodeURIComponent(raw);
|
||||
const normalized = normalizeDomainInput(decoded);
|
||||
|
||||
const isRegistrable = toRegistrableDomain(normalized);
|
||||
if (!isRegistrable) notFound();
|
||||
const registrable = toRegistrableDomain(decoded);
|
||||
if (!registrable) notFound();
|
||||
|
||||
// Canonicalize URL to the normalized domain (middleware should already handle most cases)
|
||||
if (normalized !== decoded) {
|
||||
redirect(`/${encodeURIComponent(normalized)}`);
|
||||
// Canonicalize URL to the registrable domain (middleware should already handle most cases)
|
||||
if (registrable !== decoded) {
|
||||
redirect(`/${encodeURIComponent(registrable)}`);
|
||||
}
|
||||
|
||||
// Track server-side page view
|
||||
analytics.track("report_viewed", { domain: normalized });
|
||||
analytics.track("report_viewed", { domain: registrable });
|
||||
|
||||
// Minimal prefetch: registration only, let sections stream progressively
|
||||
// Use getQueryClient() to ensure consistent query client across the request
|
||||
const queryClient = getQueryClient();
|
||||
void queryClient.prefetchQuery(
|
||||
trpc.domain.getRegistration.queryOptions({ domain: normalized }),
|
||||
trpc.domain.getRegistration.queryOptions({ domain: registrable }),
|
||||
);
|
||||
|
||||
return (
|
||||
<div className="container mx-auto max-w-4xl px-4 py-6">
|
||||
<HydrationBoundary state={dehydrate(queryClient)}>
|
||||
<DomainReportView domain={normalized} />
|
||||
<DomainReportView domain={registrable} />
|
||||
</HydrationBoundary>
|
||||
</div>
|
||||
);
|
||||
|
||||
@@ -1,14 +1,21 @@
|
||||
import { toRegistrableDomain as toRegistrableDomainRdapper } from "rdapper";
|
||||
import { cache } from "react";
|
||||
import { BLACKLISTED_SUFFIXES } from "@/lib/constants/domain-validation";
|
||||
import { normalizeDomainInput } from "@/lib/domain";
|
||||
|
||||
// A simple wrapper around rdapper's toRegistrableDomain that:
|
||||
// 1. is cached for per-request deduplication
|
||||
// 2. checks if the domain is blacklisted by BLACKLISTED_SUFFIXES in constants/domain-validation.ts
|
||||
// A wrapper around rdapper's toRegistrableDomain that:
|
||||
// 1. normalizes user input (strips schemes, paths, ports, auth, www., etc.)
|
||||
// 2. is cached for per-request deduplication
|
||||
// 3. checks if the domain is blacklisted by BLACKLISTED_SUFFIXES in constants/domain-validation.ts
|
||||
export const toRegistrableDomain = cache(function toRegistrableDomain(
|
||||
input: string,
|
||||
): string | null {
|
||||
const value = (input ?? "").trim().toLowerCase();
|
||||
// First normalize the input to extract a clean hostname
|
||||
// This handles user input with schemes, paths, ports, auth, trailing dots, www., etc.
|
||||
const normalized = normalizeDomainInput(input);
|
||||
if (!normalized) return null;
|
||||
|
||||
const value = normalized.trim().toLowerCase();
|
||||
if (value === "") return null;
|
||||
|
||||
// Shortcut: exact suffixes such as ".css.map" that frequently appear
|
||||
|
||||
@@ -24,6 +24,54 @@ describe("normalizeDomainInput", () => {
|
||||
"ex-ample.com",
|
||||
);
|
||||
});
|
||||
|
||||
it("handles malformed protocols (single slash)", () => {
|
||||
expect(normalizeDomainInput("http:/example.com")).toBe("example.com");
|
||||
});
|
||||
|
||||
it("handles malformed protocols (triple slash)", () => {
|
||||
expect(normalizeDomainInput("http:///example.com")).toBe("example.com");
|
||||
});
|
||||
|
||||
it("handles malformed protocols (multiple colons)", () => {
|
||||
expect(normalizeDomainInput("https:::example.com/path")).toBe(
|
||||
"example.com",
|
||||
);
|
||||
});
|
||||
|
||||
it("rejects IPv6 literals", () => {
|
||||
expect(normalizeDomainInput("[::1]")).toBe("");
|
||||
expect(normalizeDomainInput("[::1]:8080")).toBe("");
|
||||
expect(normalizeDomainInput("http://[2001:db8::1]/path")).toBe("");
|
||||
});
|
||||
|
||||
it("handles spaces and whitespace", () => {
|
||||
expect(normalizeDomainInput(" example.com ")).toBe("example.com");
|
||||
expect(normalizeDomainInput("example.com /path")).toBe("example.com");
|
||||
});
|
||||
|
||||
it("strips www from subdomains", () => {
|
||||
expect(normalizeDomainInput("www.example.com")).toBe("example.com");
|
||||
expect(normalizeDomainInput("WWW.EXAMPLE.COM")).toBe("example.com");
|
||||
});
|
||||
|
||||
it("preserves non-www subdomains", () => {
|
||||
expect(normalizeDomainInput("api.example.com")).toBe("api.example.com");
|
||||
expect(normalizeDomainInput("sub.domain.example.com")).toBe(
|
||||
"sub.domain.example.com",
|
||||
);
|
||||
});
|
||||
|
||||
it("handles query parameters and fragments", () => {
|
||||
expect(normalizeDomainInput("example.com?query=value")).toBe("example.com");
|
||||
expect(normalizeDomainInput("example.com#fragment")).toBe("example.com");
|
||||
expect(normalizeDomainInput("example.com?q=1#frag")).toBe("example.com");
|
||||
});
|
||||
|
||||
it("returns empty string for empty input", () => {
|
||||
expect(normalizeDomainInput("")).toBe("");
|
||||
expect(normalizeDomainInput(" ")).toBe("");
|
||||
});
|
||||
});
|
||||
|
||||
describe("isValidDomain", () => {
|
||||
|
||||
@@ -1,52 +1,79 @@
|
||||
// Utilities for handling user-provided domain input
|
||||
|
||||
// Matches beginning "http:" or "https:" followed by any number of slashes/colons
|
||||
// Captures the authority (host + userinfo + port)
|
||||
// This handles malformed protocols like "http:/example.com" or "http:///example.com"
|
||||
const SCHEME_PREFIX_REGEX = /^https?[:/]+([^/]+)/i;
|
||||
|
||||
/**
|
||||
* Normalize arbitrary user input into a bare registrable domain string.
|
||||
* Normalize arbitrary user input into a bare hostname string.
|
||||
* Accepts values like:
|
||||
* - "example.com"
|
||||
* - "www.example.com."
|
||||
* - "https://example.com/path?x#y"
|
||||
* - "http://user:pass@example.com:8080/"
|
||||
* - "http:/example.com" (malformed protocol)
|
||||
* - " EXAMPLE.COM "
|
||||
* Returns a lowercased hostname without scheme, path, auth, port, or trailing dot.
|
||||
* Returns a lowercased hostname without scheme, path, auth, port, trailing dot, or www. prefix.
|
||||
* Returns empty string for invalid/unparseable input or IPv6 literals.
|
||||
*/
|
||||
export function normalizeDomainInput(input: string): string {
|
||||
let value = (input ?? "").trim();
|
||||
if (value === "") return "";
|
||||
|
||||
// If it looks like a URL (has a scheme), use URL parsing
|
||||
const hasScheme = /:\/\//.test(value);
|
||||
if (hasScheme) {
|
||||
// Reject IPv6 literals early (e.g., "[::1]", "[::1]:8080")
|
||||
// These are not supported and would cause issues in URL parsing
|
||||
if (value.includes("[") || value.includes("]")) {
|
||||
return "";
|
||||
}
|
||||
|
||||
// Try to extract authority (host) from scheme-prefixed input
|
||||
// This handles both valid and malformed protocols
|
||||
const schemeMatch = value.match(SCHEME_PREFIX_REGEX);
|
||||
if (schemeMatch) {
|
||||
// Extract authority from the scheme match
|
||||
value = schemeMatch[1];
|
||||
} else if (/:\/\//.test(value)) {
|
||||
// Has scheme-like pattern but didn't match our regex (e.g., "fake+scheme://...")
|
||||
// Try URL parsing first
|
||||
try {
|
||||
const url = new URL(value);
|
||||
// URL applies IDNA (punycode) and strips auth/port/path for hostname
|
||||
value = url.hostname;
|
||||
} catch {
|
||||
// If invalid URL with scheme, strip leading scheme-like prefix manually
|
||||
// Fallback: strip scheme-like prefix manually
|
||||
value = value.replace(/^\w+:\/\//, "");
|
||||
// Remove credentials if present
|
||||
value = value.replace(/^[^@]+@/, "");
|
||||
// Remove path/query/fragment
|
||||
value = value.split("/")[0].split("?")[0].split("#")[0];
|
||||
}
|
||||
} else {
|
||||
// No scheme: try URL parsing with implicit http:// to get punycoded hostname
|
||||
// No scheme detected: try URL parsing with implicit http:// to get punycoded hostname
|
||||
try {
|
||||
const url = new URL(`http://${value}`);
|
||||
value = url.hostname;
|
||||
} catch {
|
||||
// Fallback: remove any credentials, path, query, or fragment accidentally included
|
||||
value = value.replace(/^[^@]+@/, "");
|
||||
value = value.split("/")[0].split("?")[0].split("#")[0];
|
||||
// Fallback: treat as raw authority and parse manually
|
||||
}
|
||||
}
|
||||
|
||||
// Strip port if present
|
||||
value = value.replace(/:\d+$/, "");
|
||||
// Strip query and fragment (in case they weren't already removed)
|
||||
value = value.split(/[?#]/)[0];
|
||||
|
||||
// Strip User Info (credentials)
|
||||
const atIndex = value.lastIndexOf("@");
|
||||
if (atIndex !== -1) {
|
||||
value = value.slice(atIndex + 1);
|
||||
}
|
||||
|
||||
// Strip port
|
||||
value = value.split(":")[0];
|
||||
|
||||
// Remove any path components that might remain
|
||||
value = value.split("/")[0];
|
||||
|
||||
// Strip trailing dot
|
||||
value = value.replace(/\.$/, "");
|
||||
|
||||
// Trim any remaining whitespace
|
||||
value = value.trim();
|
||||
|
||||
// Remove common leading www.
|
||||
value = value.replace(/^www\./i, "");
|
||||
|
||||
@@ -54,7 +81,7 @@ export function normalizeDomainInput(input: string): string {
|
||||
}
|
||||
|
||||
/**
|
||||
* Basic domain validity check (hostname-like), not performing DNS or RDAP.
|
||||
* An even more basic domain validity check (hostname-like), not performing DNS or RDAP.
|
||||
*/
|
||||
export function isValidDomain(value: string): boolean {
|
||||
const v = (value ?? "").trim();
|
||||
|
||||
@@ -2,10 +2,6 @@ import type { NextRequest } from "next/server";
|
||||
import { NextResponse } from "next/server";
|
||||
import { toRegistrableDomain } from "@/lib/domain-server";
|
||||
|
||||
// Matches beginning "http:" or "https:" followed by any number of slashes/colons
|
||||
// Captures the authority (host + userinfo + port)
|
||||
export const SCHEME_PREFIX_REGEX = /^https?[:/]+([^/]+)/i;
|
||||
|
||||
export type ProxyAction =
|
||||
| { type: "match" }
|
||||
| { type: "redirect"; destination: string }
|
||||
@@ -40,57 +36,15 @@ export function getProxyAction(path: string): ProxyAction {
|
||||
// ignore decoding failures
|
||||
}
|
||||
|
||||
let candidate = decodedInput;
|
||||
|
||||
// 3. Extract authority (host) candidate
|
||||
// If scheme present, extract authority from it.
|
||||
// Otherwise, treat the whole string as potential authority start.
|
||||
const schemeMatch = candidate.match(SCHEME_PREFIX_REGEX);
|
||||
let authority = schemeMatch ? schemeMatch[1] : candidate;
|
||||
|
||||
// 4. Cleanup: Strip query, fragment, path (if not already stripped by regex)
|
||||
// Note: Regex above stops at first slash, so path is already gone if scheme matched.
|
||||
// If scheme didn't match, we manually strip path.
|
||||
if (!schemeMatch) {
|
||||
authority = authority.split("/")[0];
|
||||
}
|
||||
|
||||
// Strip query and fragment (order doesn't matter as we take the first occurrence of either)
|
||||
authority = authority.split(/[?#]/)[0];
|
||||
|
||||
authority = authority.trim();
|
||||
|
||||
// 5. Strip User Info
|
||||
const atIndex = authority.lastIndexOf("@");
|
||||
if (atIndex !== -1) {
|
||||
authority = authority.slice(atIndex + 1);
|
||||
}
|
||||
|
||||
// 6. Strip Port
|
||||
// IPv6 literals in brackets (e.g. [::1]) are not supported.
|
||||
if (authority.includes("[") || authority.includes("]")) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Safe to split on colon as valid domains don't contain colons
|
||||
authority = authority.split(":")[0];
|
||||
|
||||
candidate = authority.trim();
|
||||
|
||||
if (!candidate) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// 7. Validate and Normalize
|
||||
// This will return null for invalid domains, including IPs if rdapper handles them as such.
|
||||
const registrable = toRegistrableDomain(candidate);
|
||||
// 3. Validate and extract the registrable domain
|
||||
const registrable = toRegistrableDomain(decodedInput);
|
||||
if (!registrable) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// 8. Redirect if necessary
|
||||
// 4. Redirect if necessary
|
||||
// We compare the originally decoded input against the final canonical domain.
|
||||
// Any difference (path, query, scheme, case, whitespace, userinfo, port) triggers a redirect.
|
||||
// Any difference (path, query, scheme, case, whitespace, userinfo, port, subdomain) triggers a redirect.
|
||||
if (decodedInput !== registrable) {
|
||||
return {
|
||||
type: "redirect",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import { TRPCError } from "@trpc/server";
|
||||
import z from "zod";
|
||||
import { normalizeDomainInput } from "@/lib/domain";
|
||||
import { toRegistrableDomain } from "@/lib/domain-server";
|
||||
import {
|
||||
BlobUrlResponseSchema,
|
||||
@@ -30,8 +29,7 @@ import {
|
||||
const DomainInputSchema = z
|
||||
.object({ domain: z.string().min(1) })
|
||||
.transform(({ domain }) => {
|
||||
const normalized = normalizeDomainInput(domain);
|
||||
const registrable = toRegistrableDomain(normalized);
|
||||
const registrable = toRegistrableDomain(domain);
|
||||
if (!registrable) {
|
||||
throw new TRPCError({
|
||||
code: "BAD_REQUEST",
|
||||
|
||||
Reference in New Issue
Block a user