1
mirror of https://github.com/jakejarvis/hoot.git synced 2025-10-18 20:14:25 -04:00

Refactor middleware to improve domain handling and redirect logic (#88)

This commit is contained in:
2025-10-11 00:09:04 -04:00
committed by GitHub
parent cc409a2cef
commit 382303de59
3 changed files with 91 additions and 42 deletions

View File

@@ -40,7 +40,7 @@ export default async function DomainPage({
const isRegistrable = toRegistrableDomain(normalized);
if (!isRegistrable) notFound();
// Canonicalize URL to the normalized domain
// Canonicalize URL to the normalized domain (middleware should already handle most cases)
if (normalized !== decoded) {
redirect(`/${encodeURIComponent(normalized)}`);
}

View File

@@ -1,7 +1,3 @@
// Server-only domain helpers using rdapper
// Note: Do not import this file in client components.
import "server-only";
import { toRegistrableDomain as toRegistrableDomainRdapper } from "rdapper";
import { BLACKLISTED_SUFFIXES } from "@/lib/constants";

View File

@@ -1,5 +1,6 @@
import type { NextRequest } from "next/server";
import { NextResponse } from "next/server";
import { toRegistrableDomain } from "@/lib/domain-server";
// Matches beginning "http:" or "https:" followed by any number of slashes, e.g.:
// "https://", "https:/", "https:////" etc.
@@ -10,7 +11,13 @@ export function middleware(request: NextRequest) {
const path = request.nextUrl.pathname;
// Fast path: only act on non-root paths
if (path.length <= 1) return NextResponse.next();
if (path.length <= 1) {
return NextResponse.next({
headers: {
"x-middleware-verdict": "ignore",
},
});
}
// Remove the leading "/" so we can inspect the raw string the user pasted after the host
const afterSlash = path.slice(1);
@@ -23,57 +30,103 @@ export function middleware(request: NextRequest) {
// ignore decoding failures; fall back to raw
}
// Minimal: handle scheme-less single-segment '/www.<host>' redirects
if (!candidate.includes("/") && /^www\./i.test(candidate)) {
const host = candidate.replace(/^www\./i, "").replace(/\.$/, "");
if (host) {
const url = request.nextUrl.clone();
url.pathname = `/${encodeURIComponent(host.toLowerCase())}`;
url.search = "";
url.hash = "";
return NextResponse.redirect(url);
}
// If the candidate contains a scheme, extract authority; otherwise normalize the raw candidate the same way
const match = candidate.match(HTTP_PREFIX_CAPTURE_AUTHORITY);
let authority = match ? match[1] : candidate;
// Strip any query or fragment that may be present
const queryIndex = authority.indexOf("?");
const fragmentIndex = authority.indexOf("#");
let cutoffIndex = -1;
if (queryIndex !== -1 && fragmentIndex !== -1) {
cutoffIndex = Math.min(queryIndex, fragmentIndex);
} else {
cutoffIndex = queryIndex !== -1 ? queryIndex : fragmentIndex;
}
if (cutoffIndex !== -1) authority = authority.slice(0, cutoffIndex);
// For scheme-less inputs, drop any path portion after the first slash
if (!match) {
const slashIndex = authority.indexOf("/");
if (slashIndex !== -1) authority = authority.slice(0, slashIndex);
}
// Match the pattern at the top for pasted URLs with scheme
const match = candidate.match(HTTP_PREFIX_CAPTURE_AUTHORITY);
if (!match) return NextResponse.next();
authority = authority.trim();
// May include userinfo@host:port; we only want the host
let authority = match[1];
// Strip userinfo@ if present
// Remove userinfo if present
const atIndex = authority.lastIndexOf("@");
if (atIndex !== -1) authority = authority.slice(atIndex + 1);
// Strip port if present
const colonIndex = authority.indexOf(":");
if (colonIndex !== -1) authority = authority.slice(0, colonIndex);
// Detect bracketed IPv6 literal and only strip port if a colon appears after the closing ']'.
if (authority.startsWith("[")) {
const closingBracketIndex = authority.indexOf("]");
if (closingBracketIndex !== -1) {
const colonAfterBracketIndex = authority.indexOf(
":",
closingBracketIndex + 1,
);
if (colonAfterBracketIndex !== -1) {
authority = authority.slice(0, colonAfterBracketIndex);
} else {
// keep the bracketed host intact when no port is present
authority = authority.slice(0, closingBracketIndex + 1);
}
} else {
// Malformed bracket: fall back to first colon behavior
const colonIndex = authority.indexOf(":");
if (colonIndex !== -1) authority = authority.slice(0, colonIndex);
}
} else {
const colonIndex = authority.indexOf(":");
if (colonIndex !== -1) authority = authority.slice(0, colonIndex);
}
// Trim whitespace before last checks
authority = authority.trim();
candidate = authority.trim();
// Normalize common "www." prefix
if (/^www\./i.test(authority)) authority = authority.slice(4);
if (!candidate) {
return NextResponse.next({
headers: {
"x-middleware-verdict": "ignore",
},
});
}
// Skip IP addresses entirely (unsupported)
const isIPv4Like = /^(?:\d{1,3}\.){3}\d{1,3}$/.test(authority);
if (isIPv4Like) return NextResponse.next();
// Determine registrable apex and subdomain presence
const registrable = toRegistrableDomain(candidate);
if (!registrable) {
return NextResponse.next({
headers: {
"x-middleware-verdict": "ignore",
},
});
}
// The final bailout: if we end up with an empty string by here, it's not a valid domain
if (!authority) return NextResponse.next();
// If coming from a full URL carrier, any subdomain is present, or the host differs from registrable (case/trailing dot), redirect to apex
const shouldRedirectToApex = Boolean(match) || candidate !== registrable;
if (shouldRedirectToApex) {
const url = request.nextUrl.clone();
url.pathname = `/${encodeURIComponent(registrable)}`;
url.search = "";
url.hash = "";
return NextResponse.redirect(url, {
headers: {
"x-middleware-verdict": "redirect",
},
});
}
const url = request.nextUrl.clone();
const hostLower = authority.toLowerCase();
url.pathname = `/${encodeURIComponent(hostLower)}`;
url.search = ""; // remove any irrelevant query string from the pasted URL carrier path
url.hash = "";
return NextResponse.redirect(url);
// Otherwise, it's already a bare registrable domain — proceed
return NextResponse.next({
headers: {
"x-middleware-verdict": "ok",
},
});
}
export const config = {
runtime: "nodejs",
matcher: [
// Exclude API and Next internals/static assets for performance and to avoid side effects
"/((?!api|_next/static|_next/image|_next/webpack-hmr|_vercel|favicon.ico|robots.txt|sitemap.xml).*)",
"/((?!api|_next/static|_next/image|_next/webpack-hmr|_vercel|_proxy|favicon.ico|robots.txt|sitemap.xml).*)",
],
};