aidial_adapter_bedrock/embedding/cohere/response.py (18 lines of code) (raw):

from typing import List, Literal, Tuple from pydantic import BaseModel from aidial_adapter_bedrock.bedrock import Bedrock from aidial_adapter_bedrock.utils.log_config import bedrock_logger as log class CohereResponse(BaseModel): id: str response_type: Literal["embeddings_floats"] embeddings: List[List[float]] texts: List[str] # According to https://docs.cohere.com/reference/embed # input tokens are expected to be returned in the response field `meta`. # However, Bedrock moved it to the response headers. async def call_embedding_model( client: Bedrock, model: str, request: dict ) -> Tuple[List[List[float]], int]: body, headers = await client.ainvoke_non_streaming(model, request) response = CohereResponse.parse_obj(body) input_tokens = int(headers.get("x-amzn-bedrock-input-token-count", "0")) if input_tokens == 0: log.warning("Can't extract input tokens from embeddings response") return response.embeddings, input_tokens