Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 3,468 Bytes
b17a5c8 9264459 2c00ea8 9264459 564e576 9264459 2c00ea8 564e576 6e18e46 ee5c213 e5f4e9a 2c00ea8 e5f4e9a 564e576 e5f4e9a 2a808d7 564e576 2a808d7 2c00ea8 e5f4e9a 6e18e46 e5f4e9a b17a5c8 2c00ea8 b17a5c8 2c00ea8 564e576 2c00ea8 564e576 2c00ea8 564e576 2c00ea8 564e576 2c00ea8 e5f4e9a 2c00ea8 564e576 2c00ea8 564e576 2c00ea8 564e576 2c00ea8 5459f31 564e576 e5f4e9a 2c00ea8 ee5c213 564e576 2c00ea8 5459f31 564e576 e5f4e9a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
import { defaultEmbeddingModel, embeddingModels } from "$lib/server/embeddingModels";
import type { Conversation } from "$lib/types/Conversation";
import type { Message } from "$lib/types/Message";
import type { WebSearch, WebSearchScrapedSource } from "$lib/types/WebSearch";
import type { Assistant } from "$lib/types/Assistant";
import type { MessageWebSearchUpdate } from "$lib/types/MessageUpdate";
import { search } from "./search/search";
import { scrape } from "./scrape/scrape";
import { findContextSources } from "./embed/embed";
import { removeParents } from "./markdown/tree";
import {
makeErrorUpdate,
makeFinalAnswerUpdate,
makeGeneralUpdate,
makeSourcesUpdate,
} from "./update";
import { mergeAsyncGenerators } from "$lib/utils/mergeAsyncGenerators";
import { MetricsServer } from "../metrics";
import { logger } from "$lib/server/logger";
const MAX_N_PAGES_TO_SCRAPE = 8 as const;
const MAX_N_PAGES_TO_EMBED = 5 as const;
export async function* runWebSearch(
conv: Conversation,
messages: Message[],
ragSettings?: Assistant["rag"],
query?: string
): AsyncGenerator<MessageWebSearchUpdate, WebSearch, undefined> {
const prompt = messages[messages.length - 1].content;
const createdAt = new Date();
const updatedAt = new Date();
MetricsServer.getMetrics().webSearch.requestCount.inc();
try {
const embeddingModel =
embeddingModels.find((m) => m.id === conv.embeddingModel) ?? defaultEmbeddingModel;
if (!embeddingModel) {
throw Error(`Embedding model ${conv.embeddingModel} not available anymore`);
}
// Search the web
const { searchQuery, pages } = yield* search(messages, ragSettings, query);
if (pages.length === 0) throw Error("No results found for this search query");
// Scrape pages
yield makeGeneralUpdate({ message: "Browsing search results" });
const allScrapedPages = yield* mergeAsyncGenerators(
pages.slice(0, MAX_N_PAGES_TO_SCRAPE).map(scrape(embeddingModel.chunkCharLength))
);
const scrapedPages = allScrapedPages
.filter((p): p is WebSearchScrapedSource => Boolean(p))
.filter((p) => p.page.markdownTree.children.length > 0)
.slice(0, MAX_N_PAGES_TO_EMBED);
if (!scrapedPages.length) {
throw Error(`No text found in the first ${MAX_N_PAGES_TO_SCRAPE} results`);
}
// Chunk the text of each of the elements and find the most similar chunks to the prompt
yield makeGeneralUpdate({ message: "Extracting relevant information" });
const contextSources = await findContextSources(scrapedPages, prompt, embeddingModel).then(
(ctxSources) =>
ctxSources.map((source) => ({
...source,
page: { ...source.page, markdownTree: removeParents(source.page.markdownTree) },
}))
);
yield makeSourcesUpdate(contextSources);
const webSearch: WebSearch = {
prompt,
searchQuery,
results: scrapedPages.map(({ page, ...source }) => ({
...source,
page: { ...page, markdownTree: removeParents(page.markdownTree) },
})),
contextSources,
createdAt,
updatedAt,
};
yield makeFinalAnswerUpdate();
return webSearch;
} catch (searchError) {
const message = searchError instanceof Error ? searchError.message : String(searchError);
logger.error(message);
yield makeErrorUpdate({ message: "An error occurred", args: [message] });
const webSearch: WebSearch = {
prompt,
searchQuery: "",
results: [],
contextSources: [],
createdAt,
updatedAt,
};
yield makeFinalAnswerUpdate();
return webSearch;
}
}
|