aidial_adapter_bedrock/dial_api/response.py (48 lines of code) (raw):
from typing import List, Literal, Self
from aidial_sdk.embeddings import Embedding
from aidial_sdk.embeddings import Response as EmbeddingsResponse
from aidial_sdk.embeddings import Usage
from pydantic import BaseModel
from aidial_adapter_bedrock.embedding.encoding import vector_to_base64
class Capabilities(BaseModel):
chat_completion: bool = False
completion: bool = False
embeddings: bool = False
fine_tune: bool = False
inference: bool = False
class ModelObject(BaseModel):
object: Literal["model"] = "model"
capabilities: Capabilities = Capabilities()
id: str
@classmethod
def chat_completions(cls, id: str) -> Self:
return cls(id=id, capabilities=Capabilities(chat_completion=True))
@classmethod
def embeddings(cls, id: str) -> Self:
return cls(id=id, capabilities=Capabilities(embeddings=True))
class ModelsResponse(BaseModel):
object: Literal["list"] = "list"
data: List[ModelObject]
def _encode_vector(
encoding_format: Literal["float", "base64"], vector: List[float]
) -> List[float] | str:
return vector_to_base64(vector) if encoding_format == "base64" else vector
def make_embeddings_response(
model: str,
encoding_format: Literal["float", "base64"],
vectors: List[List[float]],
prompt_tokens: int,
) -> EmbeddingsResponse:
embeddings = [_encode_vector(encoding_format, v) for v in vectors]
data: List[Embedding] = [
Embedding(index=index, embedding=embedding)
for index, embedding in enumerate(embeddings)
]
return EmbeddingsResponse(
model=model,
data=data,
usage=Usage(
prompt_tokens=prompt_tokens,
total_tokens=prompt_tokens,
),
)