in Utils/get_tokens_and_time.py [0:0]
def extract_and_write_data(file_path, model, experiment):
try:
with open(file_path, 'r', encoding='utf-8') as file:
data = file.read()
file_name = Path(file_path).stem
category, attempt = file_name.split('_report_')
tokens_regex = r"### Tokens: {'input_tokens': (\d+), 'output_tokens': (\d+)(?:, 'reasoning_tokens': (\d+))?}"
execution_time_regex = r"### Execution time: ([\d.]+)"
tokens_match = re.search(tokens_regex, data)
execution_time_match = re.search(execution_time_regex, data)
category_name, _, repo_and_complexity = category.partition('_')
repo_name = ci = size = 'none'
if repo_and_complexity:
regex = r"^(.+?)_((?:low|avg|high|extra_high))_((?:low|avg|high|extra_high)(?:_\d)?)"
match = re.match(regex, repo_and_complexity)
if match:
repo_name, ci, size = match.groups()
if tokens_match and execution_time_match:
input_tokens, output_tokens, reasoning_tokens = tokens_match.groups()
reasoning_tokens = reasoning_tokens or '0'
execution_time = execution_time_match.group(1)
else:
print(f"No available data {file_name}.")
input_tokens = output_tokens = execution_time = reasoning_tokens = '0'
csv_line = f"{experiment},{category_name},{repo_to_technology.get(repo_name, 'none')},{model},{repo_name},{ci},{size},{attempt},{input_tokens},{reasoning_tokens},{output_tokens},{execution_time}\n"
return csv_line
except Exception as e:
print(f'Error while processing file: {e}')