Utils/llm/api.py (186 lines of code) (raw):
import time
import requests
from datetime import datetime
from Utils.llm.config import API, Model, temperature
from Utils.llm.bedrock import request_bedrock_data
class APIException(Exception):
def __init__(self, status_code, content):
self.status_code = status_code
self.content = content
super().__init__(self.content)
def request_openai_format_data(system_prompt, messages, model):
config = API[model]()
skip_system = config.get("skip_system", False)
headers = {
'Content-Type': 'application/json',
'Api-Key': config["api_key"],
"Authorization": f"Bearer {config['api_key']}",
}
payload = {
'model': config["model_id"],
'messages': ([] if skip_system else [{'role': 'system', 'content': f'{system_prompt}'}]) + messages,
'temperature': temperature,
}
max_tokens = config.get("max_tokens")
if max_tokens is not None:
payload['max_tokens'] = max_tokens
response = requests.post(config["url"], headers=headers, json=payload, timeout=300)
if not response.ok:
raise APIException(response.status_code, response.content)
data = response.json()
result = {
"content": data["choices"][0]["message"]["content"],
"tokens": {
"input_tokens": data["usage"]["prompt_tokens"],
"output_tokens": data["usage"]["completion_tokens"],
}
}
if "reasoning_tokens" in data["usage"].get("completion_tokens_details", {}):
result["tokens"]["reasoning_tokens"] = data["usage"]["completion_tokens_details"]["reasoning_tokens"]
return result
def request_gemini_pro_data(system_prompt, messages):
config = API[Model.GeminiPro]()
headers = {
'Content-Type': 'application/json',
"Authorization": f"Bearer {config['api_key']}",
}
contents = [
{"role": message['role'], "parts": [{"text": message['content']}]}
for message in messages
]
payload = {
"contents": contents,
"system_instruction": {"parts": [{"text": system_prompt}]},
"generation_config": {
"maxOutputTokens": 8192,
"temperature": temperature,
},
"safetySettings": [
{
"category": "HARM_CATEGORY_HATE_SPEECH",
"threshold": "BLOCK_ONLY_HIGH",
},
{
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
"threshold": "BLOCK_ONLY_HIGH",
},
{
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
"threshold": "BLOCK_ONLY_HIGH",
},
{
"category": "HARM_CATEGORY_HARASSMENT",
"threshold": "BLOCK_ONLY_HIGH",
}
],
}
response = requests.post(config["url"], headers=headers, json=payload, timeout=300)
if not response.ok:
raise APIException(response.status_code, response.content)
data = response.json()
return {
'content': data["candidates"][0]["content"]["parts"][0]["text"],
'tokens': {
"input_tokens": data["usageMetadata"]["promptTokenCount"],
"output_tokens": data["usageMetadata"]["candidatesTokenCount"],
}
}
def request_google_ai_studio_data(system_prompt, messages, model):
config = API[model]()
headers = {
'Content-Type': 'application/json',
}
contents = [
{"role": message['role'], "parts": [{"text": message['content']}]}
for message in messages
]
payload = {
"contents": contents,
"system_instruction": {"role": "user", "parts": [{"text": system_prompt}]},
"generation_config": {
"maxOutputTokens": 8192,
"temperature": temperature,
"responseMimeType": "text/plain"
},
}
response = requests.post(config["url"], headers=headers, json=payload, timeout=300)
if not response.ok:
raise APIException(response.status_code, response.content)
data = response.json()
return {
'content': data["candidates"][0]["content"]["parts"][0]["text"],
'tokens': {
"input_tokens": data["usageMetadata"]["promptTokenCount"],
"output_tokens": data["usageMetadata"]["candidatesTokenCount"],
}
}
def request_claude_data(system_prompt, messages, model):
config = API[model]() # Claude Opus or Sonnet
headers = {
'Content-Type': 'application/json; charset=utf-8',
"Authorization": f"Bearer {config['api_key']}",
}
payload = {
"anthropic_version": config['version'],
"max_tokens": 4096,
"stream": False,
"temperature": temperature,
"system": system_prompt,
"messages": messages # [{"role": "user", "content": prompt}]
}
response = requests.post(config["url"], headers=headers, json=payload, timeout=300)
if not response.ok:
raise APIException(response.status_code, response.content)
data = response.json()
return {
'content': data["content"][0]["text"],
'tokens': {
"input_tokens": data["usage"]["input_tokens"],
"output_tokens": data["usage"]["output_tokens"],
}
}
def ask_model(messages, system_prompt, model, attempt=1):
start_time = time.time()
print(f'\tAttempt {attempt} at {datetime.now()}')
try:
data = None
match model:
case Model.GeminiPro:
data = request_gemini_pro_data(system_prompt, messages)
case Model.GeminiPro_0801 | Model.Gemini_15_Pro_002 | Model.GeminiPro_1114 | Model.GeminiPro_1121:
data = request_google_ai_studio_data(system_prompt, messages, model)
case Model.Opus_3 | Model.Sonnet_35 | Model.Sonnet_35v2 | Model.Haiku_35:
data = request_claude_data(system_prompt, messages, model)
case Model.AmazonNovaPro:
data = request_bedrock_data(system_prompt, messages, model)
case _:
data = request_openai_format_data(system_prompt, messages, model)
execute_time = time.time() - start_time
return {
"content": data["content"],
"tokens": data["tokens"],
"execute_time": execute_time
}
except APIException as e:
print(f"Error: {e.status_code}")
print(f"Error: {e.content}")
if e.status_code == 429:
print('Will try in 1 minute...')
time.sleep(60)
return ask_model(messages, system_prompt, model, attempt + 1)
else:
if attempt > 2:
return {
"error": f'### Error: {e.content}\n'
}
else:
print("\tTrying again...")
time.sleep(10)
return ask_model(messages, system_prompt, model, attempt + 1)
except requests.exceptions.Timeout:
if attempt > 2:
return {
"error": f'### Error: Timeout error\n'
}
print("\tRequest timed out. Trying again...")
return ask_model(messages, system_prompt, model, attempt + 1)