aidial_adapter_vertexai/vertex_ai.py (34 lines of code) (raw):

from aiocache import cached from vertexai.preview.language_models import ( ChatModel, CodeChatModel, TextEmbeddingModel, ) from vertexai.preview.vision_models import ImageGenerationModel from vertexai.vision_models import MultiModalEmbeddingModel from aidial_adapter_vertexai.utils.concurrency import make_single_thread_async @cached() async def get_code_chat_model(model_id: str) -> CodeChatModel: # TODO: We're using single threaded async call, because # calling `from_pretrained` in different threads cause deadlock # https://github.com/googleapis/python-aiplatform/issues/4342 # When this issue is resolved, we use just `make_async` return await make_single_thread_async( CodeChatModel.from_pretrained, model_id ) @cached() async def get_chat_model(model_id: str) -> ChatModel: return await make_single_thread_async(ChatModel.from_pretrained, model_id) @cached() async def get_text_embedding_model(model_id: str) -> TextEmbeddingModel: return await make_single_thread_async( TextEmbeddingModel.from_pretrained, model_id ) @cached() async def get_multi_modal_embedding_model( model_id: str, ) -> MultiModalEmbeddingModel: return await make_single_thread_async( MultiModalEmbeddingModel.from_pretrained, model_id ) @cached() async def get_image_generation_model(model_id: str) -> ImageGenerationModel: return await make_single_thread_async( ImageGenerationModel.from_pretrained, model_id )