Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
import { collectDefaultMetrics, Registry, Counter, Summary } from "prom-client"; | |
import express from "express"; | |
import { logger } from "$lib/server/logger"; | |
import { env } from "$env/dynamic/private"; | |
import type { Model } from "$lib/types/Model"; | |
import { onExit } from "./exitHandler"; | |
import { promisify } from "util"; | |
interface Metrics { | |
model: { | |
conversationsTotal: Counter<Model["id"]>; | |
messagesTotal: Counter<Model["id"]>; | |
tokenCountTotal: Counter<Model["id"]>; | |
timePerOutputToken: Summary<Model["id"]>; | |
timeToFirstToken: Summary<Model["id"]>; | |
latency: Summary<Model["id"]>; | |
votesPositive: Counter<Model["id"]>; | |
votesNegative: Counter<Model["id"]>; | |
}; | |
webSearch: { | |
requestCount: Counter; | |
pageFetchCount: Counter; | |
pageFetchCountError: Counter; | |
pageFetchDuration: Summary; | |
embeddingDuration: Summary; | |
}; | |
tool: { | |
toolUseCount: Counter<string>; | |
toolUseCountError: Counter<string>; | |
toolUseDuration: Summary<string>; | |
timeToChooseTools: Summary; | |
}; | |
} | |
export class MetricsServer { | |
private static instance: MetricsServer; | |
private metrics: Metrics; | |
private constructor() { | |
const app = express(); | |
const port = Number(env.METRICS_PORT || "5565"); | |
if (isNaN(port) || port < 0 || port > 65535) { | |
logger.warn(`Invalid value for METRICS_PORT: ${env.METRICS_PORT}`); | |
} | |
if (env.METRICS_ENABLED !== "false" && env.METRICS_ENABLED !== "true") { | |
logger.warn(`Invalid value for METRICS_ENABLED: ${env.METRICS_ENABLED}`); | |
} | |
if (env.METRICS_ENABLED === "true") { | |
const server = app.listen(port, () => { | |
logger.info(`Metrics server listening on port ${port}`); | |
}); | |
const closeServer = promisify(server.close); | |
onExit(async () => { | |
logger.info("Disconnecting metrics server ..."); | |
await closeServer(); | |
logger.info("Server stopped ..."); | |
}); | |
} | |
const register = new Registry(); | |
collectDefaultMetrics({ register }); | |
this.metrics = { | |
model: { | |
conversationsTotal: new Counter({ | |
name: "model_conversations_total", | |
help: "Total number of conversations", | |
labelNames: ["model"], | |
registers: [register], | |
}), | |
messagesTotal: new Counter({ | |
name: "model_messages_total", | |
help: "Total number of messages", | |
labelNames: ["model"], | |
registers: [register], | |
}), | |
tokenCountTotal: new Counter({ | |
name: "model_token_count_total", | |
help: "Total number of tokens", | |
labelNames: ["model"], | |
registers: [register], | |
}), | |
timePerOutputToken: new Summary({ | |
name: "model_time_per_output_token_ms", | |
help: "Time per output token in ms", | |
labelNames: ["model"], | |
registers: [register], | |
maxAgeSeconds: 5 * 60, | |
ageBuckets: 5, | |
}), | |
timeToFirstToken: new Summary({ | |
name: "model_time_to_first_token_ms", | |
help: "Time to first token", | |
labelNames: ["model"], | |
registers: [register], | |
maxAgeSeconds: 5 * 60, | |
ageBuckets: 5, | |
}), | |
latency: new Summary({ | |
name: "model_latency_ms", | |
help: "Total latency until end of answer", | |
labelNames: ["model"], | |
registers: [register], | |
maxAgeSeconds: 5 * 60, | |
ageBuckets: 5, | |
}), | |
votesPositive: new Counter({ | |
name: "model_votes_positive", | |
help: "Total number of positive votes on messages generated by the model", | |
labelNames: ["model"], | |
registers: [register], | |
}), | |
votesNegative: new Counter({ | |
name: "model_votes_negative", | |
help: "Total number of negative votes on messages generated by the model", | |
labelNames: ["model"], | |
registers: [register], | |
}), | |
}, | |
webSearch: { | |
requestCount: new Counter({ | |
name: "web_search_request_count", | |
help: "Total number of web search requests", | |
registers: [register], | |
}), | |
pageFetchCount: new Counter({ | |
name: "web_search_page_fetch_count", | |
help: "Total number of web search page fetches", | |
registers: [register], | |
}), | |
pageFetchCountError: new Counter({ | |
name: "web_search_page_fetch_count_error", | |
help: "Total number of web search page fetch errors", | |
registers: [register], | |
}), | |
pageFetchDuration: new Summary({ | |
name: "web_search_page_fetch_duration_ms", | |
help: "Web search page fetch duration", | |
registers: [register], | |
maxAgeSeconds: 5 * 60, | |
ageBuckets: 5, | |
}), | |
embeddingDuration: new Summary({ | |
name: "web_search_embedding_duration_ms", | |
help: "Web search embedding duration", | |
registers: [register], | |
maxAgeSeconds: 5 * 60, | |
ageBuckets: 5, | |
}), | |
}, | |
tool: { | |
toolUseCount: new Counter({ | |
name: "tool_use_count", | |
help: "Total number of tool uses", | |
labelNames: ["tool"], | |
registers: [register], | |
}), | |
toolUseCountError: new Counter({ | |
name: "tool_use_count_error", | |
help: "Total number of tool use errors", | |
labelNames: ["tool"], | |
registers: [register], | |
}), | |
toolUseDuration: new Summary({ | |
name: "tool_use_duration_ms", | |
help: "Tool use duration", | |
labelNames: ["tool"], | |
registers: [register], | |
maxAgeSeconds: 30 * 60, // longer duration since we use this to give feedback to the user | |
ageBuckets: 5, | |
}), | |
timeToChooseTools: new Summary({ | |
name: "time_to_choose_tools_ms", | |
help: "Time to choose tools", | |
labelNames: ["model"], | |
registers: [register], | |
maxAgeSeconds: 5 * 60, | |
ageBuckets: 5, | |
}), | |
}, | |
}; | |
app.get("/metrics", (req, res) => { | |
register.metrics().then((metrics) => { | |
res.set("Content-Type", "text/plain"); | |
res.send(metrics); | |
}); | |
}); | |
} | |
public static getInstance(): MetricsServer { | |
if (!MetricsServer.instance) { | |
MetricsServer.instance = new MetricsServer(); | |
} | |
return MetricsServer.instance; | |
} | |
public static getMetrics(): Metrics { | |
return MetricsServer.getInstance().metrics; | |
} | |
} | |