def set_usage()

in aidial_adapter_openai/utils/streaming.py [0:0]


    def set_usage(chunk: dict | None, resp: ChatCompletionResponse) -> dict:
        chunk = chunk or empty_chunk

        # Do not fail the whole response if tokenization has failed
        try:
            completion_tokens = tokenize_response(resp)
            prompt_tokens = get_prompt_tokens()
        except Exception as e:
            logger.exception(
                f"caught exception while tokenization: {type(e).__module__}.{type(e).__name__}. "
                "The tokenization has failed, therefore, the usage won't be reported."
            )
        else:
            chunk["usage"] = {
                "completion_tokens": completion_tokens,
                "prompt_tokens": prompt_tokens,
                "total_tokens": prompt_tokens + completion_tokens,
            }
        return chunk