mirror of
https://github.com/jakejarvis/hoot.git
synced 2025-10-18 20:14:25 -04:00
Refactor middleware to improve domain handling and redirect logic (#88)
This commit is contained in:
@@ -40,7 +40,7 @@ export default async function DomainPage({
|
||||
const isRegistrable = toRegistrableDomain(normalized);
|
||||
if (!isRegistrable) notFound();
|
||||
|
||||
// Canonicalize URL to the normalized domain
|
||||
// Canonicalize URL to the normalized domain (middleware should already handle most cases)
|
||||
if (normalized !== decoded) {
|
||||
redirect(`/${encodeURIComponent(normalized)}`);
|
||||
}
|
||||
|
@@ -1,7 +1,3 @@
|
||||
// Server-only domain helpers using rdapper
|
||||
// Note: Do not import this file in client components.
|
||||
import "server-only";
|
||||
|
||||
import { toRegistrableDomain as toRegistrableDomainRdapper } from "rdapper";
|
||||
import { BLACKLISTED_SUFFIXES } from "@/lib/constants";
|
||||
|
||||
|
127
middleware.ts
127
middleware.ts
@@ -1,5 +1,6 @@
|
||||
import type { NextRequest } from "next/server";
|
||||
import { NextResponse } from "next/server";
|
||||
import { toRegistrableDomain } from "@/lib/domain-server";
|
||||
|
||||
// Matches beginning "http:" or "https:" followed by any number of slashes, e.g.:
|
||||
// "https://", "https:/", "https:////" etc.
|
||||
@@ -10,7 +11,13 @@ export function middleware(request: NextRequest) {
|
||||
const path = request.nextUrl.pathname;
|
||||
|
||||
// Fast path: only act on non-root paths
|
||||
if (path.length <= 1) return NextResponse.next();
|
||||
if (path.length <= 1) {
|
||||
return NextResponse.next({
|
||||
headers: {
|
||||
"x-middleware-verdict": "ignore",
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
// Remove the leading "/" so we can inspect the raw string the user pasted after the host
|
||||
const afterSlash = path.slice(1);
|
||||
@@ -23,57 +30,103 @@ export function middleware(request: NextRequest) {
|
||||
// ignore decoding failures; fall back to raw
|
||||
}
|
||||
|
||||
// Minimal: handle scheme-less single-segment '/www.<host>' redirects
|
||||
if (!candidate.includes("/") && /^www\./i.test(candidate)) {
|
||||
const host = candidate.replace(/^www\./i, "").replace(/\.$/, "");
|
||||
if (host) {
|
||||
const url = request.nextUrl.clone();
|
||||
url.pathname = `/${encodeURIComponent(host.toLowerCase())}`;
|
||||
url.search = "";
|
||||
url.hash = "";
|
||||
return NextResponse.redirect(url);
|
||||
}
|
||||
// If the candidate contains a scheme, extract authority; otherwise normalize the raw candidate the same way
|
||||
const match = candidate.match(HTTP_PREFIX_CAPTURE_AUTHORITY);
|
||||
let authority = match ? match[1] : candidate;
|
||||
|
||||
// Strip any query or fragment that may be present
|
||||
const queryIndex = authority.indexOf("?");
|
||||
const fragmentIndex = authority.indexOf("#");
|
||||
let cutoffIndex = -1;
|
||||
if (queryIndex !== -1 && fragmentIndex !== -1) {
|
||||
cutoffIndex = Math.min(queryIndex, fragmentIndex);
|
||||
} else {
|
||||
cutoffIndex = queryIndex !== -1 ? queryIndex : fragmentIndex;
|
||||
}
|
||||
if (cutoffIndex !== -1) authority = authority.slice(0, cutoffIndex);
|
||||
|
||||
// For scheme-less inputs, drop any path portion after the first slash
|
||||
if (!match) {
|
||||
const slashIndex = authority.indexOf("/");
|
||||
if (slashIndex !== -1) authority = authority.slice(0, slashIndex);
|
||||
}
|
||||
|
||||
// Match the pattern at the top for pasted URLs with scheme
|
||||
const match = candidate.match(HTTP_PREFIX_CAPTURE_AUTHORITY);
|
||||
if (!match) return NextResponse.next();
|
||||
authority = authority.trim();
|
||||
|
||||
// May include userinfo@host:port; we only want the host
|
||||
let authority = match[1];
|
||||
|
||||
// Strip userinfo@ if present
|
||||
// Remove userinfo if present
|
||||
const atIndex = authority.lastIndexOf("@");
|
||||
if (atIndex !== -1) authority = authority.slice(atIndex + 1);
|
||||
|
||||
// Strip port if present
|
||||
const colonIndex = authority.indexOf(":");
|
||||
if (colonIndex !== -1) authority = authority.slice(0, colonIndex);
|
||||
// Detect bracketed IPv6 literal and only strip port if a colon appears after the closing ']'.
|
||||
if (authority.startsWith("[")) {
|
||||
const closingBracketIndex = authority.indexOf("]");
|
||||
if (closingBracketIndex !== -1) {
|
||||
const colonAfterBracketIndex = authority.indexOf(
|
||||
":",
|
||||
closingBracketIndex + 1,
|
||||
);
|
||||
if (colonAfterBracketIndex !== -1) {
|
||||
authority = authority.slice(0, colonAfterBracketIndex);
|
||||
} else {
|
||||
// keep the bracketed host intact when no port is present
|
||||
authority = authority.slice(0, closingBracketIndex + 1);
|
||||
}
|
||||
} else {
|
||||
// Malformed bracket: fall back to first colon behavior
|
||||
const colonIndex = authority.indexOf(":");
|
||||
if (colonIndex !== -1) authority = authority.slice(0, colonIndex);
|
||||
}
|
||||
} else {
|
||||
const colonIndex = authority.indexOf(":");
|
||||
if (colonIndex !== -1) authority = authority.slice(0, colonIndex);
|
||||
}
|
||||
|
||||
// Trim whitespace before last checks
|
||||
authority = authority.trim();
|
||||
candidate = authority.trim();
|
||||
|
||||
// Normalize common "www." prefix
|
||||
if (/^www\./i.test(authority)) authority = authority.slice(4);
|
||||
if (!candidate) {
|
||||
return NextResponse.next({
|
||||
headers: {
|
||||
"x-middleware-verdict": "ignore",
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
// Skip IP addresses entirely (unsupported)
|
||||
const isIPv4Like = /^(?:\d{1,3}\.){3}\d{1,3}$/.test(authority);
|
||||
if (isIPv4Like) return NextResponse.next();
|
||||
// Determine registrable apex and subdomain presence
|
||||
const registrable = toRegistrableDomain(candidate);
|
||||
if (!registrable) {
|
||||
return NextResponse.next({
|
||||
headers: {
|
||||
"x-middleware-verdict": "ignore",
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
// The final bailout: if we end up with an empty string by here, it's not a valid domain
|
||||
if (!authority) return NextResponse.next();
|
||||
// If coming from a full URL carrier, any subdomain is present, or the host differs from registrable (case/trailing dot), redirect to apex
|
||||
const shouldRedirectToApex = Boolean(match) || candidate !== registrable;
|
||||
if (shouldRedirectToApex) {
|
||||
const url = request.nextUrl.clone();
|
||||
url.pathname = `/${encodeURIComponent(registrable)}`;
|
||||
url.search = "";
|
||||
url.hash = "";
|
||||
return NextResponse.redirect(url, {
|
||||
headers: {
|
||||
"x-middleware-verdict": "redirect",
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
const url = request.nextUrl.clone();
|
||||
const hostLower = authority.toLowerCase();
|
||||
url.pathname = `/${encodeURIComponent(hostLower)}`;
|
||||
url.search = ""; // remove any irrelevant query string from the pasted URL carrier path
|
||||
url.hash = "";
|
||||
return NextResponse.redirect(url);
|
||||
// Otherwise, it's already a bare registrable domain — proceed
|
||||
return NextResponse.next({
|
||||
headers: {
|
||||
"x-middleware-verdict": "ok",
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
export const config = {
|
||||
runtime: "nodejs",
|
||||
matcher: [
|
||||
// Exclude API and Next internals/static assets for performance and to avoid side effects
|
||||
"/((?!api|_next/static|_next/image|_next/webpack-hmr|_vercel|favicon.ico|robots.txt|sitemap.xml).*)",
|
||||
"/((?!api|_next/static|_next/image|_next/webpack-hmr|_vercel|_proxy|favicon.ico|robots.txt|sitemap.xml).*)",
|
||||
],
|
||||
};
|
||||
|
Reference in New Issue
Block a user