mirror of
https://github.com/jakejarvis/hoot.git
synced 2025-10-18 20:14:25 -04:00
Add cached asset utility for Redis integration in favicon and screenshot services
This commit is contained in:
@@ -21,6 +21,7 @@ import {
|
||||
TwitterIcon,
|
||||
} from "@/components/brand-icons";
|
||||
import { KeyValue } from "@/components/domain/key-value";
|
||||
import { KeyValueGrid } from "@/components/domain/key-value-grid";
|
||||
import { KeyValueSkeleton } from "@/components/domain/key-value-skeleton";
|
||||
import { Section } from "@/components/domain/section";
|
||||
import { SocialPreview } from "@/components/domain/social-preview";
|
||||
@@ -119,7 +120,7 @@ export function SeoSection({
|
||||
<span>Meta Tags</span>
|
||||
<SubheadCount count={metaTagCount} color="orange" />
|
||||
</div>
|
||||
<div className="grid grid-cols-1 gap-2 md:grid-cols-2">
|
||||
<KeyValueGrid colsSm={2} colsMd={2}>
|
||||
{metaTagValues
|
||||
.filter((t) => t.value != null)
|
||||
.map((t) => (
|
||||
@@ -147,7 +148,7 @@ export function SeoSection({
|
||||
copyable
|
||||
/>
|
||||
))}
|
||||
</div>
|
||||
</KeyValueGrid>
|
||||
</div>
|
||||
|
||||
<div className="mt-6 space-y-3">
|
||||
@@ -809,12 +810,12 @@ function SeoSkeleton() {
|
||||
Meta Tags
|
||||
<SubheadCountSkeleton />
|
||||
</div>
|
||||
<div className="grid grid-cols-1 gap-2 md:grid-cols-2">
|
||||
<KeyValueGrid colsSm={2} colsMd={2}>
|
||||
<KeyValueSkeleton label="Title" widthClass="w-[220px]" />
|
||||
<KeyValueSkeleton label="Description" widthClass="w-[260px]" />
|
||||
<KeyValueSkeleton label="Canonical" widthClass="w-[200px]" />
|
||||
<KeyValueSkeleton label="Image" widthClass="w-[260px]" />
|
||||
</div>
|
||||
</KeyValueGrid>
|
||||
</div>
|
||||
|
||||
{/* Open Graph */}
|
||||
|
124
lib/cache/cached-asset.ts
vendored
Normal file
124
lib/cache/cached-asset.ts
vendored
Normal file
@@ -0,0 +1,124 @@
|
||||
import { captureServer } from "@/lib/analytics/server";
|
||||
import { ns, redis } from "@/lib/redis";
|
||||
|
||||
type CachedAssetOptions<TProduceMeta extends Record<string, unknown>> = {
|
||||
indexKey: string;
|
||||
lockKey: string;
|
||||
ttlSeconds: number;
|
||||
eventName: string;
|
||||
baseMetrics?: Record<string, unknown>;
|
||||
/**
|
||||
* Produce and upload the asset, returning { url, key } and any metrics to attach
|
||||
*/
|
||||
produceAndUpload: () => Promise<{
|
||||
url: string | null;
|
||||
key?: string;
|
||||
metrics?: TProduceMeta;
|
||||
}>;
|
||||
/**
|
||||
* Purge queue name (zset) for scheduling deletes by expiresAtMs
|
||||
* If provided and key is returned, will zadd(key, expiresAtMs)
|
||||
*/
|
||||
purgeQueue?: string;
|
||||
};
|
||||
|
||||
export async function getOrCreateCachedAsset<T extends Record<string, unknown>>(
|
||||
options: CachedAssetOptions<T>,
|
||||
): Promise<{ url: string | null }> {
|
||||
const {
|
||||
indexKey,
|
||||
lockKey,
|
||||
ttlSeconds,
|
||||
eventName,
|
||||
baseMetrics,
|
||||
produceAndUpload,
|
||||
purgeQueue,
|
||||
} = options;
|
||||
const startedAt = Date.now();
|
||||
|
||||
// 1) Check index
|
||||
try {
|
||||
const raw = (await redis.get(indexKey)) as { url?: unknown } | null;
|
||||
if (raw && typeof raw === "object") {
|
||||
const cachedUrl = (raw as { url?: unknown }).url;
|
||||
if (typeof cachedUrl === "string") {
|
||||
await captureServer(eventName, {
|
||||
...baseMetrics,
|
||||
source: "redis",
|
||||
duration_ms: Date.now() - startedAt,
|
||||
outcome: "ok",
|
||||
cache: "hit",
|
||||
});
|
||||
return { url: cachedUrl };
|
||||
}
|
||||
if (cachedUrl === null) {
|
||||
await captureServer(eventName, {
|
||||
...baseMetrics,
|
||||
source: "redis",
|
||||
duration_ms: Date.now() - startedAt,
|
||||
outcome: "not_found",
|
||||
cache: "hit",
|
||||
});
|
||||
return { url: null };
|
||||
}
|
||||
}
|
||||
} catch {}
|
||||
|
||||
// 2) Acquire lock or wait
|
||||
// Reuse redis.ts helper rather than duplicating. Import here lazily to avoid cycles.
|
||||
const { acquireLockOrWaitForResult } = await import("@/lib/redis");
|
||||
const lockResult = await acquireLockOrWaitForResult<{ url: string | null }>({
|
||||
lockKey,
|
||||
resultKey: indexKey,
|
||||
lockTtl: Math.max(5, Math.min(120, ttlSeconds)),
|
||||
});
|
||||
|
||||
if (!lockResult.acquired) {
|
||||
const cached = lockResult.cachedResult;
|
||||
if (cached && typeof cached === "object" && "url" in cached) {
|
||||
const cachedUrl = (cached as { url: string | null }).url;
|
||||
await captureServer(eventName, {
|
||||
...baseMetrics,
|
||||
source: "redis_wait",
|
||||
duration_ms: Date.now() - startedAt,
|
||||
outcome: cachedUrl ? "ok" : "not_found",
|
||||
cache: "wait",
|
||||
});
|
||||
return { url: cachedUrl };
|
||||
}
|
||||
return { url: null };
|
||||
}
|
||||
|
||||
// 3) Do work under lock
|
||||
try {
|
||||
const produced = await produceAndUpload();
|
||||
const expiresAtMs = Date.now() + ttlSeconds * 1000;
|
||||
|
||||
try {
|
||||
await redis.set(
|
||||
indexKey,
|
||||
{ url: produced.url, key: produced.key, expiresAtMs },
|
||||
{ ex: ttlSeconds },
|
||||
);
|
||||
if (purgeQueue && produced.key) {
|
||||
await redis.zadd(ns("purge", purgeQueue), {
|
||||
score: expiresAtMs,
|
||||
member: produced.key,
|
||||
});
|
||||
}
|
||||
} catch {}
|
||||
|
||||
await captureServer(eventName, {
|
||||
...baseMetrics,
|
||||
...(produced.metrics ?? {}),
|
||||
duration_ms: Date.now() - startedAt,
|
||||
outcome: produced.url ? "ok" : "not_found",
|
||||
cache: "store",
|
||||
});
|
||||
return { url: produced.url };
|
||||
} finally {
|
||||
try {
|
||||
await redis.del(lockKey);
|
||||
} catch {}
|
||||
}
|
||||
}
|
@@ -1,6 +1,11 @@
|
||||
export const USER_AGENT =
|
||||
process.env.HOOT_USER_AGENT || "hoot.sh/0.1 (+https://hoot.sh)";
|
||||
|
||||
// Cache TTLs
|
||||
export const FAVICON_TTL_SECONDS = 7 * 24 * 60 * 60; // 1 week
|
||||
export const SCREENSHOT_TTL_SECONDS = 7 * 24 * 60 * 60; // 1 week
|
||||
export const SOCIAL_PREVIEW_TTL_SECONDS = 7 * 24 * 60 * 60; // 1 week
|
||||
|
||||
export const DEFAULT_SUGGESTIONS = [
|
||||
"github.com",
|
||||
"reddit.com",
|
||||
|
@@ -4,30 +4,11 @@ import { createHmac } from "node:crypto";
|
||||
import { UTApi, UTFile } from "uploadthing/server";
|
||||
import type { StorageKind } from "@/lib/schemas";
|
||||
|
||||
const ONE_WEEK_SECONDS = 7 * 24 * 60 * 60;
|
||||
const UPLOAD_MAX_ATTEMPTS = 3;
|
||||
const UPLOAD_BACKOFF_BASE_MS = 100;
|
||||
const UPLOAD_BACKOFF_MAX_MS = 2000;
|
||||
|
||||
function toPositiveInt(value: unknown, fallback: number): number {
|
||||
const n = Number(value);
|
||||
return Number.isFinite(n) && n > 0 ? Math.floor(n) : fallback;
|
||||
}
|
||||
|
||||
export function getFaviconTtlSeconds(): number {
|
||||
return toPositiveInt(process.env.FAVICON_TTL_SECONDS, ONE_WEEK_SECONDS);
|
||||
}
|
||||
|
||||
export function getScreenshotTtlSeconds(): number {
|
||||
return toPositiveInt(process.env.SCREENSHOT_TTL_SECONDS, ONE_WEEK_SECONDS);
|
||||
}
|
||||
|
||||
export function getSocialPreviewTtlSeconds(): number {
|
||||
return toPositiveInt(
|
||||
process.env.SOCIAL_PREVIEW_TTL_SECONDS,
|
||||
ONE_WEEK_SECONDS,
|
||||
);
|
||||
}
|
||||
// TTLs now live in lib/constants.ts
|
||||
|
||||
/**
|
||||
* Deterministic, obfuscated hash for IDs and filenames
|
||||
|
@@ -1,12 +1,11 @@
|
||||
import { captureServer } from "@/lib/analytics/server";
|
||||
import { USER_AGENT } from "@/lib/constants";
|
||||
import { getOrCreateCachedAsset } from "@/lib/cache/cached-asset";
|
||||
import { FAVICON_TTL_SECONDS, USER_AGENT } from "@/lib/constants";
|
||||
import { convertBufferToImageCover } from "@/lib/image";
|
||||
import { acquireLockOrWaitForResult, ns, redis } from "@/lib/redis";
|
||||
import { getFaviconTtlSeconds, uploadImage } from "@/lib/storage";
|
||||
import { ns } from "@/lib/redis";
|
||||
import { uploadImage } from "@/lib/storage";
|
||||
|
||||
const DEFAULT_SIZE = 32;
|
||||
const REQUEST_TIMEOUT_MS = 1500; // per each method
|
||||
const LOCK_TTL_SECONDS = 30; // lock TTL for upload coordination
|
||||
|
||||
async function fetchWithTimeout(
|
||||
url: string,
|
||||
@@ -43,220 +42,61 @@ function buildSources(domain: string): string[] {
|
||||
export async function getOrCreateFaviconBlobUrl(
|
||||
domain: string,
|
||||
): Promise<{ url: string | null }> {
|
||||
const startedAt = Date.now();
|
||||
console.debug("[favicon] start", { domain, size: DEFAULT_SIZE });
|
||||
|
||||
const indexKey = ns("favicon", "url", domain, String(DEFAULT_SIZE));
|
||||
const lockKey = ns("lock", "favicon", domain, String(DEFAULT_SIZE));
|
||||
const ttl = FAVICON_TTL_SECONDS;
|
||||
|
||||
// 1) Check Redis index first (supports positive and negative cache)
|
||||
try {
|
||||
console.debug("[favicon] redis get", { key: indexKey });
|
||||
const raw = (await redis.get(indexKey)) as { url?: unknown } | null;
|
||||
if (raw && typeof raw === "object") {
|
||||
const cachedUrl = (raw as { url?: unknown }).url;
|
||||
if (typeof cachedUrl === "string") {
|
||||
console.info("[favicon] cache hit", {
|
||||
domain,
|
||||
size: DEFAULT_SIZE,
|
||||
url: cachedUrl,
|
||||
});
|
||||
await captureServer("favicon_fetch", {
|
||||
domain,
|
||||
size: DEFAULT_SIZE,
|
||||
source: "redis",
|
||||
duration_ms: Date.now() - startedAt,
|
||||
outcome: "ok",
|
||||
cache: "hit",
|
||||
});
|
||||
return { url: cachedUrl };
|
||||
}
|
||||
if (cachedUrl === null) {
|
||||
console.info("[favicon] negative cache hit", {
|
||||
domain,
|
||||
size: DEFAULT_SIZE,
|
||||
});
|
||||
await captureServer("favicon_fetch", {
|
||||
domain,
|
||||
size: DEFAULT_SIZE,
|
||||
source: "redis",
|
||||
duration_ms: Date.now() - startedAt,
|
||||
outcome: "not_found",
|
||||
cache: "hit",
|
||||
});
|
||||
return { url: null };
|
||||
}
|
||||
}
|
||||
console.debug("[favicon] cache miss", { domain, size: DEFAULT_SIZE });
|
||||
} catch {
|
||||
// ignore and proceed to fetch
|
||||
}
|
||||
|
||||
// 2) Acquire lock or wait for another process to complete
|
||||
const lockResult = await acquireLockOrWaitForResult<{ url: string | null }>({
|
||||
return await getOrCreateCachedAsset({
|
||||
indexKey,
|
||||
lockKey,
|
||||
resultKey: indexKey,
|
||||
lockTtl: LOCK_TTL_SECONDS,
|
||||
});
|
||||
|
||||
if (!lockResult.acquired) {
|
||||
// Another process was working on it
|
||||
const cached = lockResult.cachedResult as { url?: unknown } | null;
|
||||
if (cached && "url" in (cached as object)) {
|
||||
if (typeof cached.url === "string") {
|
||||
console.info("[favicon] found result from other process", {
|
||||
domain,
|
||||
size: DEFAULT_SIZE,
|
||||
url: cached.url,
|
||||
});
|
||||
await captureServer("favicon_fetch", {
|
||||
domain,
|
||||
size: DEFAULT_SIZE,
|
||||
source: "redis_wait",
|
||||
duration_ms: Date.now() - startedAt,
|
||||
outcome: "ok",
|
||||
cache: "wait",
|
||||
});
|
||||
return { url: cached.url };
|
||||
}
|
||||
if (cached.url === null) {
|
||||
console.info("[favicon] found negative result from other process", {
|
||||
domain,
|
||||
size: DEFAULT_SIZE,
|
||||
});
|
||||
await captureServer("favicon_fetch", {
|
||||
domain,
|
||||
size: DEFAULT_SIZE,
|
||||
source: "redis_wait",
|
||||
duration_ms: Date.now() - startedAt,
|
||||
outcome: "not_found",
|
||||
cache: "wait",
|
||||
});
|
||||
return { url: null };
|
||||
}
|
||||
}
|
||||
// Timeout or other process failed - return null
|
||||
console.warn("[favicon] wait timeout, no result", { domain });
|
||||
return { url: null };
|
||||
}
|
||||
|
||||
// 3) We acquired the lock - fetch/convert/upload
|
||||
try {
|
||||
const sources = buildSources(domain);
|
||||
for (const src of sources) {
|
||||
try {
|
||||
console.debug("[favicon] fetch source", { src });
|
||||
const res = await fetchWithTimeout(src);
|
||||
if (!res.ok) continue;
|
||||
const contentType = res.headers.get("content-type");
|
||||
const ab = await res.arrayBuffer();
|
||||
const buf = Buffer.from(ab);
|
||||
console.debug("[favicon] fetched source ok", {
|
||||
src,
|
||||
status: res.status,
|
||||
contentType,
|
||||
bytes: buf.length,
|
||||
});
|
||||
|
||||
const webp = await convertBufferToImageCover(
|
||||
buf,
|
||||
DEFAULT_SIZE,
|
||||
DEFAULT_SIZE,
|
||||
contentType,
|
||||
);
|
||||
if (!webp) continue;
|
||||
console.debug("[favicon] converted to webp", {
|
||||
size: DEFAULT_SIZE,
|
||||
bytes: webp.length,
|
||||
});
|
||||
|
||||
const source = (() => {
|
||||
if (src.includes("icons.duckduckgo.com")) return "duckduckgo";
|
||||
if (src.includes("www.google.com/s2/favicons")) return "google";
|
||||
if (src.startsWith("https://")) return "direct_https";
|
||||
if (src.startsWith("http://")) return "direct_http";
|
||||
return "unknown";
|
||||
})();
|
||||
|
||||
console.info("[favicon] uploading via uploadthing");
|
||||
const { url, key } = await uploadImage({
|
||||
kind: "favicon",
|
||||
domain,
|
||||
width: DEFAULT_SIZE,
|
||||
height: DEFAULT_SIZE,
|
||||
buffer: webp,
|
||||
});
|
||||
console.info("[favicon] uploaded", { url, key });
|
||||
|
||||
// Write Redis index and schedule purge
|
||||
ttlSeconds: ttl,
|
||||
eventName: "favicon_fetch",
|
||||
baseMetrics: { domain, size: DEFAULT_SIZE },
|
||||
purgeQueue: "favicon",
|
||||
produceAndUpload: async () => {
|
||||
const sources = buildSources(domain);
|
||||
for (const src of sources) {
|
||||
try {
|
||||
const ttl = getFaviconTtlSeconds();
|
||||
const expiresAtMs = Date.now() + ttl * 1000;
|
||||
console.debug("[favicon] redis set index", {
|
||||
key: indexKey,
|
||||
ttlSeconds: ttl,
|
||||
expiresAtMs,
|
||||
});
|
||||
await redis.set(
|
||||
indexKey,
|
||||
{ url, key, expiresAtMs },
|
||||
{
|
||||
ex: ttl,
|
||||
},
|
||||
const res = await fetchWithTimeout(src);
|
||||
if (!res.ok) continue;
|
||||
const contentType = res.headers.get("content-type");
|
||||
const ab = await res.arrayBuffer();
|
||||
const buf = Buffer.from(ab);
|
||||
const webp = await convertBufferToImageCover(
|
||||
buf,
|
||||
DEFAULT_SIZE,
|
||||
DEFAULT_SIZE,
|
||||
contentType,
|
||||
);
|
||||
console.debug("[favicon] redis zadd purge", { key, expiresAtMs });
|
||||
await redis.zadd(ns("purge", "favicon"), {
|
||||
score: expiresAtMs,
|
||||
member: key, // store UploadThing file key for deletion API
|
||||
if (!webp) continue;
|
||||
const { url, key } = await uploadImage({
|
||||
kind: "favicon",
|
||||
domain,
|
||||
width: DEFAULT_SIZE,
|
||||
height: DEFAULT_SIZE,
|
||||
buffer: webp,
|
||||
});
|
||||
const source = (() => {
|
||||
if (src.includes("icons.duckduckgo.com")) return "duckduckgo";
|
||||
if (src.includes("www.google.com/s2/favicons")) return "google";
|
||||
if (src.startsWith("https://")) return "direct_https";
|
||||
if (src.startsWith("http://")) return "direct_http";
|
||||
return "unknown";
|
||||
})();
|
||||
return {
|
||||
url,
|
||||
key,
|
||||
metrics: {
|
||||
source,
|
||||
upstream_status: res.status,
|
||||
upstream_content_type: contentType ?? null,
|
||||
},
|
||||
};
|
||||
} catch {
|
||||
// best effort
|
||||
// try next source
|
||||
}
|
||||
|
||||
await captureServer("favicon_fetch", {
|
||||
domain,
|
||||
size: DEFAULT_SIZE,
|
||||
source,
|
||||
upstream_status: res.status,
|
||||
upstream_content_type: contentType ?? null,
|
||||
duration_ms: Date.now() - startedAt,
|
||||
outcome: "ok",
|
||||
cache: "store",
|
||||
});
|
||||
|
||||
return { url };
|
||||
} catch (err) {
|
||||
console.warn("[favicon] source failed; trying next", {
|
||||
src,
|
||||
error: (err as Error)?.message,
|
||||
});
|
||||
// try next source
|
||||
}
|
||||
}
|
||||
|
||||
await captureServer("favicon_fetch", {
|
||||
domain,
|
||||
size: DEFAULT_SIZE,
|
||||
duration_ms: Date.now() - startedAt,
|
||||
outcome: "not_found",
|
||||
cache: "miss",
|
||||
});
|
||||
console.warn("[favicon] not found after trying all sources", { domain });
|
||||
// Negative cache the failure for the same TTL as success
|
||||
try {
|
||||
const ttl = getFaviconTtlSeconds();
|
||||
const expiresAtMs = Date.now() + ttl * 1000;
|
||||
await redis.set(indexKey, { url: null, expiresAtMs }, { ex: ttl });
|
||||
} catch {
|
||||
// best effort
|
||||
}
|
||||
return { url: null };
|
||||
} finally {
|
||||
// Release lock (best effort)
|
||||
try {
|
||||
await redis.del(lockKey);
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
return { url: null };
|
||||
},
|
||||
});
|
||||
}
|
||||
|
@@ -1,11 +1,11 @@
|
||||
import { waitUntil } from "@vercel/functions";
|
||||
import type { Browser } from "puppeteer-core";
|
||||
import { captureServer } from "@/lib/analytics/server";
|
||||
import { USER_AGENT } from "@/lib/constants";
|
||||
import { getOrCreateCachedAsset } from "@/lib/cache/cached-asset";
|
||||
import { SCREENSHOT_TTL_SECONDS, USER_AGENT } from "@/lib/constants";
|
||||
import { addWatermarkToScreenshot, optimizeImageCover } from "@/lib/image";
|
||||
import { launchChromium } from "@/lib/puppeteer";
|
||||
import { acquireLockOrWaitForResult, ns, redis } from "@/lib/redis";
|
||||
import { getScreenshotTtlSeconds, uploadImage } from "@/lib/storage";
|
||||
import { ns } from "@/lib/redis";
|
||||
import { uploadImage } from "@/lib/storage";
|
||||
|
||||
const VIEWPORT_WIDTH = 1200;
|
||||
const VIEWPORT_HEIGHT = 630;
|
||||
@@ -15,7 +15,6 @@ const IDLE_TIMEOUT_MS = 3000;
|
||||
const CAPTURE_MAX_ATTEMPTS_DEFAULT = 3;
|
||||
const CAPTURE_BACKOFF_BASE_MS_DEFAULT = 200;
|
||||
const CAPTURE_BACKOFF_MAX_MS_DEFAULT = 1200;
|
||||
const LOCK_TTL_SECONDS = 30; // lock TTL for upload coordination
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
@@ -43,12 +42,6 @@ export async function getOrCreateScreenshotBlobUrl(
|
||||
backoffMaxMs?: number;
|
||||
},
|
||||
): Promise<{ url: string | null }> {
|
||||
const startedAt = Date.now();
|
||||
console.debug("[screenshot] start", {
|
||||
domain,
|
||||
width: VIEWPORT_WIDTH,
|
||||
height: VIEWPORT_HEIGHT,
|
||||
});
|
||||
const attempts = Math.max(
|
||||
1,
|
||||
options?.attempts ?? CAPTURE_MAX_ATTEMPTS_DEFAULT,
|
||||
@@ -56,7 +49,6 @@ export async function getOrCreateScreenshotBlobUrl(
|
||||
const backoffBaseMs =
|
||||
options?.backoffBaseMs ?? CAPTURE_BACKOFF_BASE_MS_DEFAULT;
|
||||
const backoffMaxMs = options?.backoffMaxMs ?? CAPTURE_BACKOFF_MAX_MS_DEFAULT;
|
||||
|
||||
const indexKey = ns(
|
||||
"screenshot",
|
||||
"url",
|
||||
@@ -69,145 +61,63 @@ export async function getOrCreateScreenshotBlobUrl(
|
||||
domain,
|
||||
`${VIEWPORT_WIDTH}x${VIEWPORT_HEIGHT}`,
|
||||
);
|
||||
const ttl = SCREENSHOT_TTL_SECONDS;
|
||||
|
||||
// 1) Check Redis index first
|
||||
try {
|
||||
console.debug("[screenshot] redis get", { key: indexKey });
|
||||
const raw = (await redis.get(indexKey)) as { url?: unknown } | null;
|
||||
if (raw && typeof raw === "object" && typeof raw.url === "string") {
|
||||
console.info("[screenshot] cache hit", {
|
||||
domain,
|
||||
width: VIEWPORT_WIDTH,
|
||||
height: VIEWPORT_HEIGHT,
|
||||
url: raw.url,
|
||||
});
|
||||
await captureServer("screenshot_capture", {
|
||||
domain,
|
||||
width: VIEWPORT_WIDTH,
|
||||
height: VIEWPORT_HEIGHT,
|
||||
source: "redis",
|
||||
duration_ms: Date.now() - startedAt,
|
||||
outcome: "ok",
|
||||
cache: "hit",
|
||||
});
|
||||
return { url: raw.url };
|
||||
}
|
||||
console.debug("[screenshot] cache miss", {
|
||||
domain,
|
||||
width: VIEWPORT_WIDTH,
|
||||
height: VIEWPORT_HEIGHT,
|
||||
});
|
||||
} catch {
|
||||
// ignore and proceed
|
||||
}
|
||||
|
||||
// 2) Acquire lock or wait for another process to complete
|
||||
const lockResult = await acquireLockOrWaitForResult<{ url: string }>({
|
||||
return await getOrCreateCachedAsset({
|
||||
indexKey,
|
||||
lockKey,
|
||||
resultKey: indexKey,
|
||||
lockTtl: LOCK_TTL_SECONDS,
|
||||
});
|
||||
|
||||
if (!lockResult.acquired) {
|
||||
// Another process was working on it
|
||||
if (lockResult.cachedResult?.url) {
|
||||
console.info("[screenshot] found result from other process", {
|
||||
domain,
|
||||
width: VIEWPORT_WIDTH,
|
||||
height: VIEWPORT_HEIGHT,
|
||||
url: lockResult.cachedResult.url,
|
||||
});
|
||||
await captureServer("screenshot_capture", {
|
||||
domain,
|
||||
width: VIEWPORT_WIDTH,
|
||||
height: VIEWPORT_HEIGHT,
|
||||
source: "redis_wait",
|
||||
duration_ms: Date.now() - startedAt,
|
||||
outcome: "ok",
|
||||
cache: "wait",
|
||||
});
|
||||
return { url: lockResult.cachedResult.url };
|
||||
}
|
||||
// Timeout or other process failed - return null
|
||||
console.warn("[screenshot] wait timeout, no result", { domain });
|
||||
return { url: null };
|
||||
}
|
||||
|
||||
// 3) We acquired the lock - attempt to capture
|
||||
try {
|
||||
let browser: Browser | null = null;
|
||||
try {
|
||||
browser = await launchChromium();
|
||||
console.debug("[screenshot] browser launched", { mode: "chromium" });
|
||||
|
||||
const tryUrls = buildHomepageUrls(domain);
|
||||
for (const url of tryUrls) {
|
||||
let lastError: unknown = null;
|
||||
for (let attemptIndex = 0; attemptIndex < attempts; attemptIndex++) {
|
||||
try {
|
||||
const page = await browser.newPage();
|
||||
let rawPng: Buffer;
|
||||
ttlSeconds: ttl,
|
||||
eventName: "screenshot_capture",
|
||||
baseMetrics: { domain, width: VIEWPORT_WIDTH, height: VIEWPORT_HEIGHT },
|
||||
purgeQueue: "screenshot",
|
||||
produceAndUpload: async () => {
|
||||
let browser: Browser | null = null;
|
||||
try {
|
||||
browser = await launchChromium();
|
||||
const tryUrls = buildHomepageUrls(domain);
|
||||
for (const url of tryUrls) {
|
||||
let lastError: unknown = null;
|
||||
for (let attemptIndex = 0; attemptIndex < attempts; attemptIndex++) {
|
||||
try {
|
||||
await page.setViewport({
|
||||
width: VIEWPORT_WIDTH,
|
||||
height: VIEWPORT_HEIGHT,
|
||||
deviceScaleFactor: 1,
|
||||
});
|
||||
await page.setUserAgent(USER_AGENT);
|
||||
|
||||
console.debug("[screenshot] navigating", {
|
||||
url,
|
||||
attempt: attemptIndex + 1,
|
||||
});
|
||||
await page.goto(url, {
|
||||
waitUntil: "domcontentloaded",
|
||||
timeout: NAV_TIMEOUT_MS,
|
||||
});
|
||||
|
||||
// Give chatty pages/CDNs a brief chance to settle without hanging
|
||||
const page = await browser.newPage();
|
||||
let rawPng: Buffer;
|
||||
try {
|
||||
await page.waitForNetworkIdle({
|
||||
idleTime: IDLE_TIME_MS,
|
||||
timeout: IDLE_TIMEOUT_MS,
|
||||
await page.setViewport({
|
||||
width: VIEWPORT_WIDTH,
|
||||
height: VIEWPORT_HEIGHT,
|
||||
deviceScaleFactor: 1,
|
||||
});
|
||||
} catch {}
|
||||
|
||||
console.debug("[screenshot] navigated", {
|
||||
url,
|
||||
attempt: attemptIndex + 1,
|
||||
});
|
||||
|
||||
rawPng = (await page.screenshot({
|
||||
type: "png",
|
||||
fullPage: false,
|
||||
})) as Buffer;
|
||||
} finally {
|
||||
try {
|
||||
await page.close();
|
||||
} catch {}
|
||||
}
|
||||
console.debug("[screenshot] raw screenshot bytes", {
|
||||
bytes: rawPng.length,
|
||||
});
|
||||
|
||||
const png = await optimizeImageCover(
|
||||
rawPng,
|
||||
VIEWPORT_WIDTH,
|
||||
VIEWPORT_HEIGHT,
|
||||
);
|
||||
if (png && png.length > 0) {
|
||||
console.debug("[screenshot] optimized png bytes", {
|
||||
bytes: png.length,
|
||||
});
|
||||
await page.setUserAgent(USER_AGENT);
|
||||
await page.goto(url, {
|
||||
waitUntil: "domcontentloaded",
|
||||
timeout: NAV_TIMEOUT_MS,
|
||||
});
|
||||
try {
|
||||
await page.waitForNetworkIdle({
|
||||
idleTime: IDLE_TIME_MS,
|
||||
timeout: IDLE_TIMEOUT_MS,
|
||||
});
|
||||
} catch {}
|
||||
rawPng = (await page.screenshot({
|
||||
type: "png",
|
||||
fullPage: false,
|
||||
})) as Buffer;
|
||||
} finally {
|
||||
try {
|
||||
await page.close();
|
||||
} catch {}
|
||||
}
|
||||
const png = await optimizeImageCover(
|
||||
rawPng,
|
||||
VIEWPORT_WIDTH,
|
||||
VIEWPORT_HEIGHT,
|
||||
);
|
||||
if (!png || png.length === 0) continue;
|
||||
const withWatermark = await addWatermarkToScreenshot(
|
||||
png,
|
||||
VIEWPORT_WIDTH,
|
||||
VIEWPORT_HEIGHT,
|
||||
);
|
||||
console.debug("[screenshot] watermarked bytes", {
|
||||
bytes: withWatermark.length,
|
||||
});
|
||||
console.info("[screenshot] uploading via uploadthing");
|
||||
const { url: storedUrl, key: fileKey } = await uploadImage({
|
||||
kind: "screenshot",
|
||||
domain,
|
||||
@@ -215,113 +125,39 @@ export async function getOrCreateScreenshotBlobUrl(
|
||||
height: VIEWPORT_HEIGHT,
|
||||
buffer: withWatermark,
|
||||
});
|
||||
console.info("[screenshot] uploaded", {
|
||||
return {
|
||||
url: storedUrl,
|
||||
key: fileKey,
|
||||
});
|
||||
|
||||
// Write Redis index and schedule purge
|
||||
try {
|
||||
const ttl = getScreenshotTtlSeconds();
|
||||
const expiresAtMs = Date.now() + ttl * 1000;
|
||||
console.debug("[screenshot] redis set index", {
|
||||
key: indexKey,
|
||||
ttlSeconds: ttl,
|
||||
expiresAtMs,
|
||||
});
|
||||
await redis.set(
|
||||
indexKey,
|
||||
{ url: storedUrl, key: fileKey, expiresAtMs },
|
||||
{
|
||||
ex: ttl,
|
||||
},
|
||||
);
|
||||
console.debug("[screenshot] redis zadd purge", {
|
||||
key: fileKey,
|
||||
expiresAtMs,
|
||||
});
|
||||
await redis.zadd(ns("purge", "screenshot"), {
|
||||
score: expiresAtMs,
|
||||
member: fileKey, // store UploadThing file key for deletion API
|
||||
});
|
||||
} catch {
|
||||
// best effort
|
||||
metrics: {
|
||||
source: url.startsWith("https://")
|
||||
? "direct_https"
|
||||
: "direct_http",
|
||||
},
|
||||
};
|
||||
} catch (err) {
|
||||
lastError = err;
|
||||
const delay = backoffDelayMs(
|
||||
attemptIndex,
|
||||
backoffBaseMs,
|
||||
backoffMaxMs,
|
||||
);
|
||||
if (attemptIndex < attempts - 1) {
|
||||
await sleep(delay);
|
||||
}
|
||||
|
||||
await captureServer("screenshot_capture", {
|
||||
domain,
|
||||
width: VIEWPORT_WIDTH,
|
||||
height: VIEWPORT_HEIGHT,
|
||||
source: url.startsWith("https://")
|
||||
? "direct_https"
|
||||
: "direct_http",
|
||||
duration_ms: Date.now() - startedAt,
|
||||
outcome: "ok",
|
||||
cache: "store",
|
||||
});
|
||||
|
||||
return { url: storedUrl };
|
||||
}
|
||||
} catch (err) {
|
||||
lastError = err;
|
||||
const delay = backoffDelayMs(
|
||||
attemptIndex,
|
||||
backoffBaseMs,
|
||||
backoffMaxMs,
|
||||
);
|
||||
console.warn("[screenshot] attempt failed", {
|
||||
url,
|
||||
attempt: attemptIndex + 1,
|
||||
delay_ms: delay,
|
||||
error: (err as Error)?.message,
|
||||
});
|
||||
if (attemptIndex < attempts - 1) {
|
||||
await sleep(delay);
|
||||
}
|
||||
}
|
||||
if (lastError) {
|
||||
// try next candidate url
|
||||
}
|
||||
}
|
||||
|
||||
// Exhausted attempts for this URL, move to next candidate
|
||||
if (lastError) {
|
||||
console.warn("[screenshot] all attempts failed for url", {
|
||||
url,
|
||||
attempts,
|
||||
error: (lastError as Error)?.message,
|
||||
});
|
||||
return { url: null };
|
||||
} finally {
|
||||
if (browser) {
|
||||
try {
|
||||
waitUntil(browser.close());
|
||||
} catch {}
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
// fallthrough to not_found
|
||||
|
||||
console.error("[screenshot] capture failed", {
|
||||
domain,
|
||||
error: (err as Error)?.message,
|
||||
});
|
||||
} finally {
|
||||
if (browser) {
|
||||
try {
|
||||
waitUntil(browser.close());
|
||||
} catch {}
|
||||
}
|
||||
}
|
||||
|
||||
await captureServer("screenshot_capture", {
|
||||
domain,
|
||||
width: VIEWPORT_WIDTH,
|
||||
height: VIEWPORT_HEIGHT,
|
||||
duration_ms: Date.now() - startedAt,
|
||||
outcome: "not_found",
|
||||
cache: "miss",
|
||||
});
|
||||
|
||||
console.warn("[screenshot] returning null", { domain });
|
||||
return { url: null };
|
||||
} finally {
|
||||
// Release lock (best effort)
|
||||
try {
|
||||
await redis.del(lockKey);
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
},
|
||||
});
|
||||
}
|
||||
|
@@ -1,14 +1,10 @@
|
||||
import { captureServer } from "@/lib/analytics/server";
|
||||
import { USER_AGENT } from "@/lib/constants";
|
||||
import { SOCIAL_PREVIEW_TTL_SECONDS, USER_AGENT } from "@/lib/constants";
|
||||
import { optimizeImageCover } from "@/lib/image";
|
||||
import { acquireLockOrWaitForResult, ns, redis } from "@/lib/redis";
|
||||
import type { SeoResponse } from "@/lib/schemas";
|
||||
import { parseHtmlMeta, parseRobotsTxt, selectPreview } from "@/lib/seo";
|
||||
import {
|
||||
deterministicHash,
|
||||
getSocialPreviewTtlSeconds,
|
||||
uploadImage,
|
||||
} from "@/lib/storage";
|
||||
import { makeImageFileName, uploadImage } from "@/lib/storage";
|
||||
|
||||
const HTML_TTL_SECONDS = 1 * 60 * 60; // 1 hour
|
||||
const ROBOTS_TTL_SECONDS = 12 * 60 * 60; // 12 hours
|
||||
@@ -174,20 +170,26 @@ async function getOrCreateSocialPreviewImageUrl(
|
||||
): Promise<{ url: string | null }> {
|
||||
const startedAt = Date.now();
|
||||
const lower = domain.toLowerCase();
|
||||
const hash = deterministicHash(imageUrl);
|
||||
const fileId = makeImageFileName(
|
||||
"social",
|
||||
lower,
|
||||
SOCIAL_WIDTH,
|
||||
SOCIAL_HEIGHT,
|
||||
imageUrl,
|
||||
);
|
||||
|
||||
const indexKey = ns(
|
||||
"seo",
|
||||
"image-url",
|
||||
lower,
|
||||
hash,
|
||||
fileId,
|
||||
`${SOCIAL_WIDTH}x${SOCIAL_HEIGHT}`,
|
||||
);
|
||||
const lockKey = ns(
|
||||
"lock",
|
||||
"seo-image",
|
||||
lower,
|
||||
hash,
|
||||
fileId,
|
||||
`${SOCIAL_WIDTH}x${SOCIAL_HEIGHT}`,
|
||||
);
|
||||
|
||||
@@ -264,7 +266,7 @@ async function getOrCreateSocialPreviewImageUrl(
|
||||
});
|
||||
|
||||
try {
|
||||
const ttl = getSocialPreviewTtlSeconds();
|
||||
const ttl = SOCIAL_PREVIEW_TTL_SECONDS;
|
||||
const expiresAtMs = Date.now() + ttl * 1000;
|
||||
await redis.set(indexKey, { url, key, expiresAtMs }, { ex: ttl });
|
||||
await redis.zadd(ns("purge", "social"), {
|
||||
|
Reference in New Issue
Block a user