1
mirror of https://github.com/jakejarvis/domainstack.io.git synced 2025-12-02 19:33:48 -05:00
Files
domainstack.io/lib/fetch-remote-asset.ts

331 lines
9.2 KiB
TypeScript

import { lookup as dnsLookup } from "node:dns/promises";
import { isIP } from "node:net";
import * as ipaddr from "ipaddr.js";
import { createLogger } from "@/lib/logger/server";
const logger = createLogger({ source: "remote-asset" });
// Hosts that should never be fetched regardless of DNS (fast path).
const BLOCKED_HOSTNAMES = new Set(["localhost"]);
const BLOCKED_SUFFIXES = [".local", ".internal", ".localhost"];
// Sensible defaults; callers can override per-use.
const DEFAULT_MAX_BYTES = 5 * 1024 * 1024; // 5MB
const DEFAULT_TIMEOUT_MS = 8000;
const DEFAULT_MAX_REDIRECTS = 3;
export type RemoteAssetErrorCode =
| "invalid_url"
| "protocol_not_allowed"
| "host_not_allowed"
| "host_blocked"
| "dns_error"
| "private_ip"
| "redirect_limit"
| "response_error"
| "size_exceeded";
export class RemoteAssetError extends Error {
constructor(
public readonly code: RemoteAssetErrorCode,
message: string,
public readonly status?: number,
) {
super(message);
this.name = "RemoteAssetError";
}
}
export type FetchRemoteAssetOptions = {
/** Absolute URL, or relative to `currentUrl` when provided. */
url: string | URL;
/** Optional base URL used to resolve relative `url` values. */
currentUrl?: string | URL;
/** Additional headers (e.g., `User-Agent`). */
headers?: HeadersInit;
/** Abort timeout per request/redirect hop (ms). */
timeoutMs?: number;
/** Maximum bytes to buffer before aborting. */
maxBytes?: number;
/** Maximum redirects we will follow while re-checking the host. */
maxRedirects?: number;
/** Additional allow list to further restrict hosts (still subject to default blocklist). */
allowedHosts?: string[];
/** Allow HTTP (useful for favicons); defaults to HTTPS only. */
allowHttp?: boolean;
};
export type RemoteAssetResult = {
buffer: Buffer;
contentType: string | null;
finalUrl: string;
status: number;
};
/**
* Fetch a user-controlled asset while protecting against SSRF, redirect-based
* host swapping, and unbounded memory usage.
*/
export async function fetchRemoteAsset(
opts: FetchRemoteAssetOptions,
): Promise<RemoteAssetResult> {
let currentUrl = toUrl(opts.url, opts.currentUrl);
const timeoutMs = opts.timeoutMs ?? DEFAULT_TIMEOUT_MS;
const maxBytes = opts.maxBytes ?? DEFAULT_MAX_BYTES;
const maxRedirects = opts.maxRedirects ?? DEFAULT_MAX_REDIRECTS;
const allowHttp = opts.allowHttp ?? false;
const allowedHosts =
opts.allowedHosts
?.map((host) => host.trim().toLowerCase())
.filter(Boolean) ?? [];
for (let redirectCount = 0; redirectCount <= maxRedirects; redirectCount++) {
// Treat every hop (including the initial request) as untrusted and re-run
// the hostname/IP vetting so redirects cannot smuggle us into a private net.
await ensureUrlAllowed(currentUrl, { allowHttp, allowedHosts });
const response = await timedFetch(currentUrl.toString(), {
headers: opts.headers,
timeoutMs,
});
if (isRedirect(response)) {
if (redirectCount === maxRedirects) {
throw new RemoteAssetError(
"redirect_limit",
`Too many redirects fetching ${currentUrl.toString()}`,
);
}
// Follow the Location manually so we can validate the next host ourselves.
const location = response.headers.get("location");
if (!location) {
throw new RemoteAssetError(
"response_error",
"Redirect response missing Location header",
);
}
const nextUrl = new URL(location, currentUrl);
currentUrl = nextUrl;
continue;
}
if (!response.ok) {
const error = new RemoteAssetError(
"response_error",
`Remote asset request failed with ${response.status}`,
response.status,
);
logger.warn("response error", {
url: currentUrl.toString(),
reason: error.message,
});
throw error;
}
const declaredLength = response.headers.get("content-length");
if (declaredLength) {
const declared = Number(declaredLength);
if (Number.isFinite(declared) && declared > maxBytes) {
const error = new RemoteAssetError(
"size_exceeded",
`Remote asset declared size ${declared} exceeds limit ${maxBytes}`,
);
logger.warn("size exceeded", {
url: currentUrl.toString(),
reason: error.message,
});
throw error;
}
}
const buffer = await readBodyWithLimit(response, maxBytes);
const contentType = response.headers.get("content-type");
return {
buffer,
contentType,
finalUrl: currentUrl.toString(),
status: response.status,
};
}
throw new RemoteAssetError("redirect_limit", "Exceeded redirect limit");
}
function toUrl(input: string | URL, base?: string | URL): URL {
if (input instanceof URL) return input;
try {
return base ? new URL(input, base) : new URL(input);
} catch {
throw new RemoteAssetError("invalid_url", `Invalid URL: ${input}`);
}
}
/**
* Validate scheme + hostname, ensure DNS does not resolve to private ranges,
* and respect the optional allow list.
*/
async function ensureUrlAllowed(
url: URL,
options: { allowHttp: boolean; allowedHosts: string[] },
) {
const protocol = url.protocol.toLowerCase();
// HTTPS is the default; only allow HTTP when explicitly opted-in.
if (protocol !== "https:" && !(options.allowHttp && protocol === "http:")) {
throw new RemoteAssetError(
"protocol_not_allowed",
`Protocol ${protocol} not allowed`,
);
}
const hostname = url.hostname.trim().toLowerCase();
if (!hostname) {
throw new RemoteAssetError("invalid_url", "URL missing hostname");
}
if (
BLOCKED_HOSTNAMES.has(hostname) ||
BLOCKED_SUFFIXES.some((suffix) => hostname.endsWith(suffix))
) {
logger.warn("blocked host", {
url: url.toString(),
});
throw new RemoteAssetError("host_blocked", `Host ${hostname} is blocked`);
}
if (
options.allowedHosts.length > 0 &&
!options.allowedHosts.includes(hostname)
) {
logger.warn("blocked host", {
url: url.toString(),
});
throw new RemoteAssetError(
"host_not_allowed",
`Host ${hostname} is not in allow list`,
);
}
if (isIP(hostname)) {
if (isBlockedIp(hostname)) {
logger.warn("blocked private ip", {
url: url.toString(),
});
throw new RemoteAssetError(
"private_ip",
`IP ${hostname} is not reachable`,
);
}
return;
}
let records: Array<{ address: string; family: number }>;
try {
records = await dnsLookup(hostname, { all: true });
} catch (err) {
logger.error("unexpected lookup error", err, {
url: url.toString(),
});
throw new RemoteAssetError(
"dns_error",
err instanceof Error ? err.message : "DNS lookup failed",
);
}
if (!records || records.length === 0) {
logger.warn("lookup returned no records", {
url: url.toString(),
});
throw new RemoteAssetError("dns_error", "DNS lookup returned no records");
}
if (records.some((record) => isBlockedIp(record.address))) {
logger.warn("blocked private ip", {
url: url.toString(),
});
throw new RemoteAssetError(
"private_ip",
`DNS for ${hostname} resolved to private address`,
);
}
}
/**
* Wrapper around `fetch` that adds an AbortController/timeout per request.
*/
async function timedFetch(
url: string,
opts: { headers?: HeadersInit; timeoutMs: number },
): Promise<Response> {
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), opts.timeoutMs);
try {
return await fetch(url, {
method: "GET",
headers: opts.headers,
redirect: "manual",
signal: controller.signal,
});
} finally {
clearTimeout(timer);
}
}
function isRedirect(response: Response): boolean {
return response.status >= 300 && response.status < 400;
}
/**
* Incrementally read the response body, aborting if it exceeds the byte limit.
*/
async function readBodyWithLimit(
response: Response,
maxBytes: number,
): Promise<Buffer> {
if (!response.body) {
// No stream available (tiny body or mocked response); a simple check suffices.
const buf = Buffer.from(await response.arrayBuffer());
if (buf.byteLength > maxBytes) {
throw new RemoteAssetError(
"size_exceeded",
`Remote asset exceeded ${maxBytes} bytes`,
);
}
return buf;
}
const reader = response.body.getReader();
const chunks: Buffer[] = [];
let received = 0;
while (true) {
const { done, value } = await reader.read();
if (done) break;
if (value) {
received += value.byteLength;
if (received > maxBytes) {
try {
reader.cancel();
} catch {
// ignore
}
// Abort as soon as the limit is crossed to avoid buffering unbounded data.
throw new RemoteAssetError(
"size_exceeded",
`Remote asset exceeded ${maxBytes} bytes`,
);
}
chunks.push(Buffer.from(value));
}
}
return Buffer.concat(chunks, received);
}
function isBlockedIp(address: string): boolean {
try {
const parsed = ipaddr.parse(address);
const range = parsed.range();
return range !== "unicast";
} catch {
return true;
}
}