in server/src/main/java/com/epam/aidial/core/server/controller/DeploymentPostController.java [411:446]
private Future<TokenUsage> collectTokenUsage(Buffer responseBody) {
Future<TokenUsage> tokenUsageFuture = Future.succeededFuture();
if (context.getDeployment() instanceof Model model) {
if (context.getResponse().getStatusCode() == HttpStatus.OK.getCode()) {
TokenUsage tokenUsage = TokenUsageParser.parse(responseBody);
if (tokenUsage == null) {
Pricing pricing = model.getPricing();
if (pricing == null || "token".equals(pricing.getUnit())) {
log.warn("Can't find token usage. Trace: {}. Span: {}. Project: {}. Deployment: {}. Endpoint: {}. Upstream: {}. Status: {}. Length: {}",
context.getTraceId(), context.getSpanId(),
context.getProject(), context.getDeployment().getName(),
context.getDeployment().getEndpoint(),
context.getUpstreamRoute().get().getEndpoint(),
context.getResponse().getStatusCode(),
context.getResponseBody().length());
}
tokenUsage = new TokenUsage();
}
context.setTokenUsage(tokenUsage);
proxy.getRateLimiter().increase(context, context.getDeployment()).onFailure(error -> log.warn("Failed to increase limit. Trace: {}. Span: {}",
context.getTraceId(), context.getSpanId(), error));
try {
BigDecimal cost = ModelCostCalculator.calculate(context);
tokenUsage.setCost(cost);
tokenUsage.setAggCost(cost);
} catch (Throwable e) {
log.warn("Failed to calculate cost for model={}. Trace: {}. Span: {}",
context.getDeployment().getName(), context.getTraceId(), context.getSpanId(), e);
}
tokenUsageFuture = proxy.getTokenStatsTracker().updateModelStats(context);
}
} else {
tokenUsageFuture = proxy.getTokenStatsTracker().getTokenStats(context).andThen(result -> context.setTokenUsage(result.result()));
}
return tokenUsageFuture;
}