mirror of
https://github.com/jakejarvis/hoot.git
synced 2025-10-18 22:34:25 -04:00
163 lines
4.9 KiB
TypeScript
163 lines
4.9 KiB
TypeScript
import type { Browser } from "puppeteer-core";
|
|
import { getOrCreateCachedAsset } from "@/lib/cache";
|
|
import { SCREENSHOT_TTL_SECONDS, USER_AGENT } from "@/lib/constants";
|
|
import { addWatermarkToScreenshot, optimizeImageCover } from "@/lib/image";
|
|
import { launchChromium } from "@/lib/puppeteer";
|
|
import { ns } from "@/lib/redis";
|
|
import { uploadImage } from "@/lib/storage";
|
|
|
|
const VIEWPORT_WIDTH = 1200;
|
|
const VIEWPORT_HEIGHT = 630;
|
|
const NAV_TIMEOUT_MS = 8000;
|
|
const IDLE_TIME_MS = 500;
|
|
const IDLE_TIMEOUT_MS = 3000;
|
|
const CAPTURE_MAX_ATTEMPTS_DEFAULT = 3;
|
|
const CAPTURE_BACKOFF_BASE_MS_DEFAULT = 200;
|
|
const CAPTURE_BACKOFF_MAX_MS_DEFAULT = 1200;
|
|
|
|
function sleep(ms: number): Promise<void> {
|
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
}
|
|
|
|
function backoffDelayMs(
|
|
attemptIndex: number,
|
|
baseMs: number,
|
|
maxMs: number,
|
|
): number {
|
|
const base = Math.min(maxMs, baseMs * 2 ** attemptIndex);
|
|
const jitter = Math.floor(Math.random() * Math.min(base, maxMs) * 0.25);
|
|
return Math.min(base + jitter, maxMs);
|
|
}
|
|
|
|
function buildHomepageUrls(domain: string): string[] {
|
|
return [`https://${domain}`, `http://${domain}`];
|
|
}
|
|
|
|
export async function getOrCreateScreenshotBlobUrl(
|
|
domain: string,
|
|
options?: {
|
|
attempts?: number;
|
|
backoffBaseMs?: number;
|
|
backoffMaxMs?: number;
|
|
},
|
|
): Promise<{ url: string | null }> {
|
|
const attempts = Math.max(
|
|
1,
|
|
options?.attempts ?? CAPTURE_MAX_ATTEMPTS_DEFAULT,
|
|
);
|
|
const backoffBaseMs =
|
|
options?.backoffBaseMs ?? CAPTURE_BACKOFF_BASE_MS_DEFAULT;
|
|
const backoffMaxMs = options?.backoffMaxMs ?? CAPTURE_BACKOFF_MAX_MS_DEFAULT;
|
|
const indexKey = ns(
|
|
"screenshot",
|
|
"url",
|
|
domain,
|
|
`${VIEWPORT_WIDTH}x${VIEWPORT_HEIGHT}`,
|
|
);
|
|
const lockKey = ns(
|
|
"lock",
|
|
"screenshot",
|
|
domain,
|
|
`${VIEWPORT_WIDTH}x${VIEWPORT_HEIGHT}`,
|
|
);
|
|
const ttl = SCREENSHOT_TTL_SECONDS;
|
|
|
|
return await getOrCreateCachedAsset({
|
|
indexKey,
|
|
lockKey,
|
|
ttlSeconds: ttl,
|
|
eventName: "screenshot_capture",
|
|
baseMetrics: { domain, width: VIEWPORT_WIDTH, height: VIEWPORT_HEIGHT },
|
|
purgeQueue: "screenshot",
|
|
produceAndUpload: async () => {
|
|
let browser: Browser | null = null;
|
|
try {
|
|
browser = await launchChromium();
|
|
const tryUrls = buildHomepageUrls(domain);
|
|
for (const url of tryUrls) {
|
|
let lastError: unknown = null;
|
|
for (let attemptIndex = 0; attemptIndex < attempts; attemptIndex++) {
|
|
try {
|
|
const page = await browser.newPage();
|
|
let rawPng: Buffer;
|
|
try {
|
|
await page.setViewport({
|
|
width: VIEWPORT_WIDTH,
|
|
height: VIEWPORT_HEIGHT,
|
|
deviceScaleFactor: 1,
|
|
});
|
|
await page.setUserAgent(USER_AGENT);
|
|
await page.goto(url, {
|
|
waitUntil: "domcontentloaded",
|
|
timeout: NAV_TIMEOUT_MS,
|
|
});
|
|
try {
|
|
await page.waitForNetworkIdle({
|
|
idleTime: IDLE_TIME_MS,
|
|
timeout: IDLE_TIMEOUT_MS,
|
|
});
|
|
} catch {}
|
|
rawPng = (await page.screenshot({
|
|
type: "png",
|
|
fullPage: false,
|
|
})) as Buffer;
|
|
} finally {
|
|
try {
|
|
await page.close();
|
|
} catch {}
|
|
}
|
|
const png = await optimizeImageCover(
|
|
rawPng,
|
|
VIEWPORT_WIDTH,
|
|
VIEWPORT_HEIGHT,
|
|
);
|
|
if (!png || png.length === 0) continue;
|
|
const withWatermark = await addWatermarkToScreenshot(
|
|
png,
|
|
VIEWPORT_WIDTH,
|
|
VIEWPORT_HEIGHT,
|
|
);
|
|
const { url: storedUrl, key: fileKey } = await uploadImage({
|
|
kind: "screenshot",
|
|
domain,
|
|
width: VIEWPORT_WIDTH,
|
|
height: VIEWPORT_HEIGHT,
|
|
buffer: withWatermark,
|
|
});
|
|
return {
|
|
url: storedUrl,
|
|
key: fileKey,
|
|
metrics: {
|
|
source: url.startsWith("https://")
|
|
? "direct_https"
|
|
: "direct_http",
|
|
},
|
|
};
|
|
} catch (err) {
|
|
lastError = err;
|
|
const delay = backoffDelayMs(
|
|
attemptIndex,
|
|
backoffBaseMs,
|
|
backoffMaxMs,
|
|
);
|
|
if (attemptIndex < attempts - 1) {
|
|
await sleep(delay);
|
|
}
|
|
}
|
|
}
|
|
if (lastError) {
|
|
// try next candidate url
|
|
}
|
|
}
|
|
return { url: null };
|
|
} finally {
|
|
if (browser) {
|
|
try {
|
|
await browser.close();
|
|
} catch {}
|
|
}
|
|
}
|
|
},
|
|
});
|
|
}
|