File size: 2,576 Bytes
2c00ea8
 
 
 
 
564e576
aee936e
2c00ea8
 
 
aee936e
dc1fb76
2c00ea8
911e941
 
 
 
 
 
 
 
 
 
 
 
 
2c00ea8
aee936e
 
2c00ea8
dc1fb76
aee936e
 
 
 
 
 
 
 
 
 
2c00ea8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aee936e
2c00ea8
 
aee936e
 
 
 
 
2c00ea8
aee936e
 
54e4da8
 
 
 
 
 
 
 
 
 
 
 
2c00ea8
f98c542
 
 
 
 
 
 
2c00ea8
aee936e
 
54e4da8
aee936e
2c00ea8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import {
	chromium,
	devices,
	type Page,
	type BrowserContextOptions,
	type Response,
	type Browser,
} from "playwright";
import { PlaywrightBlocker } from "@cliqz/adblocker-playwright";
import { env } from "$env/dynamic/private";
import { logger } from "$lib/server/logger";
import { onExit } from "$lib/server/exitHandler";

const blocker =
	env.PLAYWRIGHT_ADBLOCKER === "true"
		? await PlaywrightBlocker.fromPrebuiltAdsAndTracking(fetch)
				.then((blker) => {
					const mostBlocked = blker.blockFonts().blockMedias().blockFrames().blockImages();
					if (env.WEBSEARCH_JAVASCRIPT === "false") return mostBlocked.blockScripts();
					return mostBlocked;
				})
				.catch((err) => {
					logger.error(err, "Failed to initialize PlaywrightBlocker from prebuilt lists");
					return PlaywrightBlocker.empty();
				})
		: PlaywrightBlocker.empty();

let browserSingleton: Promise<Browser> | undefined;
async function getBrowser() {
	const browser = await chromium.launch({ headless: true });
	onExit(() => browser.close());
	browser.on("disconnected", () => {
		logger.warn("Browser closed");
		browserSingleton = undefined;
	});
	return browser;
}

async function getPlaywrightCtx() {
	if (!browserSingleton) browserSingleton = getBrowser();
	const browser = await browserSingleton;

	const device = devices["Desktop Chrome"];
	const options: BrowserContextOptions = {
		...device,
		// Increasing width improves spatial clustering accuracy
		screen: {
			width: 3840,
			height: 1080,
		},
		viewport: {
			width: 3840,
			height: 1080,
		},
		reducedMotion: "reduce",
		acceptDownloads: false,
		timezoneId: "America/New_York",
		locale: "en-US",
	};
	return browser.newContext(options);
}

export async function withPage<T>(
	url: string,
	callback: (page: Page, response?: Response) => Promise<T>
): Promise<T> {
	const ctx = await getPlaywrightCtx();

	try {
		const page = await ctx.newPage();
		if (env.PLAYWRIGHT_ADBLOCKER === "true") {
			await blocker.enableBlockingInPage(page);
		}

		await page.route("**", (route, request) => {
			const requestUrl = request.url();
			if (!requestUrl.startsWith("https://")) {
				logger.warn(`Blocked request to: ${requestUrl}`);
				return route.abort();
			}
			return route.continue();
		});

		const res = await page
			.goto(url, { waitUntil: "load", timeout: parseInt(env.WEBSEARCH_TIMEOUT) })
			.catch(() => {
				console.warn(
					`Failed to load page within ${parseInt(env.WEBSEARCH_TIMEOUT) / 1000}s: ${url}`
				);
			});

		return await callback(page, res ?? undefined);
	} finally {
		await ctx.close();
	}
}