run_tasks.ipynb (279 lines of code) (raw):
{
"cells": [
{
"cell_type": "code",
"id": "aa5d5348637ce0d3",
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-06T13:15:00.999373Z",
"start_time": "2024-12-06T13:14:58.711405Z"
}
},
"source": [
"from Utils.llm.config import Model\n",
"from Utils.prepare_data import main as prepare_tasks\n",
"from Utils.execute_test import main as execute\n",
"from Utils.auto_eval import main as evaluate"
],
"outputs": [],
"execution_count": 1
},
{
"cell_type": "code",
"id": "f143b437b5a83b6b",
"metadata": {},
"source": [
"# edit before start\n",
"model = Model.AmazonNovaPro\n",
"lang = \"JS\" # language specific folder name, if your files in Dataset/JS use \"JS\"\n",
"prepare_tasks(model, lang)"
],
"outputs": [],
"execution_count": null
},
{
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2024-12-06T13:23:50.683184Z",
"start_time": "2024-12-06T13:15:15.534235Z"
}
},
"cell_type": "code",
"source": [
"attempts = 1 # how much times each experiment will be launched\n",
"\n",
"# tasks to launch (will be launched only this task)\n",
"spot_launch_list = [\n",
" # 'GenerateReactApp.txt',\n",
"]\n",
"\n",
"# tasks to skip\n",
"skip_list = [\n",
" # 'GenerateReactApp.txt',\n",
"]\n",
"\n",
"model = Model.AmazonNovaPro\n",
"lang = \"JS\" # language specific folder name, if your files in Dataset/JS use JS\n",
"execute(model, lang, attempts, spot_launch_list, skip_list)\n"
],
"id": "initial_id",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Starting answers generation for AmazonNovaPro\n",
"Attempt #1, get answer for EvaluateCodeQuality_ToDoApp_ReactJS_high_avg.txt\n",
"\tAttempt 1 at 2024-12-06 16:15:15.539939\n",
"Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/code_analysis/result_2024-12-06_16-15-15.536491/EvaluateCodeQuality_ToDoApp_ReactJS_high_avg/EvaluateCodeQuality_ToDoApp_ReactJS_high_avg_report_1.md\n",
"Attempt #1, get answer for EvaluateCodeQuality_ReactSignUp_high_low.txt\n",
"\tAttempt 1 at 2024-12-06 16:15:42.901508\n",
"Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/code_analysis/result_2024-12-06_16-15-15.536491/EvaluateCodeQuality_ReactSignUp_high_low/EvaluateCodeQuality_ReactSignUp_high_low_report_1.md\n",
"Attempt #1, get answer for EvaluateCodeQuality_AngularCosmoPage_avg_high.txt\n",
"\tAttempt 1 at 2024-12-06 16:16:10.068723\n",
"Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/code_analysis/result_2024-12-06_16-15-15.536491/EvaluateCodeQuality_AngularCosmoPage_avg_high/EvaluateCodeQuality_AngularCosmoPage_avg_high_report_1.md\n",
"Attempt #1, get answer for BusinessFunctionality_ReactSignUp_high_low.txt\n",
"\tAttempt 1 at 2024-12-06 16:16:29.575815\n",
"Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/solution_documentation/result_2024-12-06_16-15-15.536491/BusinessFunctionality_ReactSignUp_high_low/BusinessFunctionality_ReactSignUp_high_low_report_1.md\n",
"Attempt #1, get answer for BusinessFunctionality_ToDoApp_ReactJS_high_avg.txt\n",
"\tAttempt 1 at 2024-12-06 16:16:40.374661\n",
"Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/solution_documentation/result_2024-12-06_16-15-15.536491/BusinessFunctionality_ToDoApp_ReactJS_high_avg/BusinessFunctionality_ToDoApp_ReactJS_high_avg_report_1.md\n",
"Attempt #1, get answer for BusinessFunctionality_AngularCosmoPage_avg_high.txt\n",
"\tAttempt 1 at 2024-12-06 16:16:50.837716\n",
"Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/solution_documentation/result_2024-12-06_16-15-15.536491/BusinessFunctionality_AngularCosmoPage_avg_high/BusinessFunctionality_AngularCosmoPage_avg_high_report_1.md\n",
"Attempt #1, get answer for ReactToAngular_ToDoApp_ReactJS_high_avg.txt\n",
"\tAttempt 1 at 2024-12-06 16:17:02.599880\n",
"Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/solution_migration/result_2024-12-06_16-15-15.536491/ReactToAngular_ToDoApp_ReactJS_high_avg/ReactToAngular_ToDoApp_ReactJS_high_avg_report_1.md\n",
"Attempt #1, get answer for VanillaToReact_Piano_NativeJS_high_low.txt\n",
"\tAttempt 1 at 2024-12-06 16:17:28.660264\n",
"Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/solution_migration/result_2024-12-06_16-15-15.536491/VanillaToReact_Piano_NativeJS_high_low/VanillaToReact_Piano_NativeJS_high_low_report_1.md\n",
"Attempt #1, get answer for UpdateAngular_ToDoApp_AngularJS_avg_avg_2.txt\n",
"\tAttempt 1 at 2024-12-06 16:18:06.511262\n",
"Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/solution_migration/result_2024-12-06_16-15-15.536491/UpdateAngular_ToDoApp_AngularJS_avg_avg_2/UpdateAngular_ToDoApp_AngularJS_avg_avg_2_report_1.md\n",
"Attempt #1, get answer for AngularToReact_AngularCosmoPage_avg_high.txt\n",
"\tAttempt 1 at 2024-12-06 16:18:32.536849\n",
"Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/solution_migration/result_2024-12-06_16-15-15.536491/AngularToReact_AngularCosmoPage_avg_high/AngularToReact_AngularCosmoPage_avg_high_report_1.md\n",
"Attempt #1, get answer for UpdateReact_ToDoApp_ReactJS_high_avg.txt\n",
"\tAttempt 1 at 2024-12-06 16:18:58.637348\n",
"Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/solution_migration/result_2024-12-06_16-15-15.536491/UpdateReact_ToDoApp_ReactJS_high_avg/UpdateReact_ToDoApp_ReactJS_high_avg_report_1.md\n",
"Attempt #1, get answer for jQueryToReact_ToDoApp_jQuery_high_low.txt\n",
"\tAttempt 1 at 2024-12-06 16:19:29.939022\n",
"Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/solution_migration/result_2024-12-06_16-15-15.536491/jQueryToReact_ToDoApp_jQuery_high_low/jQueryToReact_ToDoApp_jQuery_high_low_report_1.md\n",
"Attempt #1, get answer for ReactToAngular_ReactSignUp_high_low.txt\n",
"\tAttempt 1 at 2024-12-06 16:20:01.909661\n",
"Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/solution_migration/result_2024-12-06_16-15-15.536491/ReactToAngular_ReactSignUp_high_low/ReactToAngular_ReactSignUp_high_low_report_1.md\n",
"Attempt #1, get answer for GenerateProjectConfiguration.txt\n",
"\tAttempt 1 at 2024-12-06 16:20:20.963654\n",
"Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/solution_template_generation/result_2024-12-06_16-15-15.536491/GenerateProjectConfiguration/GenerateProjectConfiguration_report_1.md\n",
"Attempt #1, get answer for DescribeTechnicalImplementation_AngularCosmoPage_avg_high.txt\n",
"\tAttempt 1 at 2024-12-06 16:20:39.443001\n",
"Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/code_explanation/result_2024-12-06_16-15-15.536491/DescribeTechnicalImplementation_AngularCosmoPage_avg_high/DescribeTechnicalImplementation_AngularCosmoPage_avg_high_report_1.md\n",
"Attempt #1, get answer for DescribeTechnicalImplementation_ReactSignUp_high_low.txt\n",
"\tAttempt 1 at 2024-12-06 16:20:51.700414\n",
"Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/code_explanation/result_2024-12-06_16-15-15.536491/DescribeTechnicalImplementation_ReactSignUp_high_low/DescribeTechnicalImplementation_ReactSignUp_high_low_report_1.md\n",
"Attempt #1, get answer for DescribeTechnicalImplementation_ToDoApp_ReactJS_high_avg.txt\n",
"\tAttempt 1 at 2024-12-06 16:21:05.500165\n",
"Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/code_explanation/result_2024-12-06_16-15-15.536491/DescribeTechnicalImplementation_ToDoApp_ReactJS_high_avg/DescribeTechnicalImplementation_ToDoApp_ReactJS_high_avg_report_1.md\n",
"Attempt #1, get answer for WriteTestsForLegacyCode_ReactSignUp_high_low.txt\n",
"\tAttempt 1 at 2024-12-06 16:21:32.224545\n",
"Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/test_generation/result_2024-12-06_16-15-15.536491/WriteTestsForLegacyCode_ReactSignUp_high_low/WriteTestsForLegacyCode_ReactSignUp_high_low_report_1.md\n",
"Attempt #1, get answer for WriteTestsForLegacyCode_ToDoApp_ReactJS_high_avg.txt\n",
"\tAttempt 1 at 2024-12-06 16:21:47.197898\n",
"Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/test_generation/result_2024-12-06_16-15-15.536491/WriteTestsForLegacyCode_ToDoApp_ReactJS_high_avg/WriteTestsForLegacyCode_ToDoApp_ReactJS_high_avg_report_1.md\n",
"Attempt #1, get answer for WriteTestsForActualCode_ReactSelect_extra_high_high.txt\n",
"\tAttempt 1 at 2024-12-06 16:22:12.007051\n",
"Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/test_generation/result_2024-12-06_16-15-15.536491/WriteTestsForActualCode_ReactSelect_extra_high_high/WriteTestsForActualCode_ReactSelect_extra_high_high_report_1.md\n",
"Attempt #1, get answer for WriteTestsForLegacyCode_AngularCosmoPage_avg_high.txt\n",
"\tAttempt 1 at 2024-12-06 16:22:49.412792\n",
"Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/test_generation/result_2024-12-06_16-15-15.536491/WriteTestsForLegacyCode_AngularCosmoPage_avg_high/WriteTestsForLegacyCode_AngularCosmoPage_avg_high_report_1.md\n",
"Attempt #1, get answer for ModifyReactApp_ReactFetchAPI_avg_low.txt\n",
"\tAttempt 1 at 2024-12-06 16:23:15.738592\n",
"Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/component_generation/result_2024-12-06_16-15-15.536491/ModifyReactApp_ReactFetchAPI_avg_low/ModifyReactApp_ReactFetchAPI_avg_low_report_1.md\n",
"Attempt #1, get answer for GenerateBaseComponent.txt\n",
"\tAttempt 1 at 2024-12-06 16:23:24.976785\n",
"Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/component_generation/result_2024-12-06_16-15-15.536491/GenerateBaseComponent/GenerateBaseComponent_report_1.md\n",
"Attempt #1, get answer for GenerateReactApp.txt\n",
"\tAttempt 1 at 2024-12-06 16:23:34.519342\n",
"Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/component_generation/result_2024-12-06_16-15-15.536491/GenerateReactApp/GenerateReactApp_report_1.md\n"
]
}
],
"execution_count": 2
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-06T13:24:29.282586Z",
"start_time": "2024-12-06T13:24:29.265858Z"
}
},
"cell_type": "code",
"source": [
"# create summary report\n",
"from Utils.get_tokens_and_time import main as summarize\n",
"\n",
"langs = [\n",
" 'JS',\n",
" # 'Java',\n",
"]\n",
"\n",
"models = [\n",
" Model.AmazonNovaPro,\n",
"]\n",
"\n",
"summarize(models=models, langs=langs)"
],
"id": "be266342cb183532",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Summary written successfully for AmazonNovaPro and lang JS to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/summary.csv\n"
]
}
],
"execution_count": 3
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-06T13:34:34.411572Z",
"start_time": "2024-12-06T13:26:50.968433Z"
}
},
"cell_type": "code",
"source": [
"# evaluate model results based on summary report\n",
"evaluate(model_name=Model.AmazonNovaPro)"
],
"id": "630544898b58b871",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Evaluating scenario EvaluateCodeQuality_ReactSignUp_high_low\n",
"Grading scenario EvaluateCodeQuality_ReactSignUp_high_low\n",
"Evaluating scenario EvaluateCodeQuality_ToDoApp_ReactJS_high_avg\n",
"Grading scenario EvaluateCodeQuality_ToDoApp_ReactJS_high_avg\n",
"Evaluating scenario EvaluateCodeQuality_AngularCosmoPage_avg_high\n",
"Grading scenario EvaluateCodeQuality_AngularCosmoPage_avg_high\n",
"Evaluating scenario DescribeTechnicalImplementation_AngularCosmoPage_avg_high\n",
"Grading scenario DescribeTechnicalImplementation_AngularCosmoPage_avg_high\n",
"Evaluating scenario DescribeTechnicalImplementation_ReactSignUp_high_low\n",
"Grading scenario DescribeTechnicalImplementation_ReactSignUp_high_low\n",
"Evaluating scenario DescribeTechnicalImplementation_ToDoApp_ReactJS_high_avg\n",
"Grading scenario DescribeTechnicalImplementation_ToDoApp_ReactJS_high_avg\n",
"Evaluating scenario ModifyReactApp_ReactFetchAPI_avg_low\n",
"Grading scenario ModifyReactApp_ReactFetchAPI_avg_low\n",
"Evaluating scenario GenerateBaseComponent\n",
"Grading scenario GenerateBaseComponent\n",
"Evaluating scenario GenerateReactApp\n",
"Grading scenario GenerateReactApp\n",
"Evaluating scenario BusinessFunctionality_ReactSignUp_high_low\n",
"Grading scenario BusinessFunctionality_ReactSignUp_high_low\n",
"Evaluating scenario BusinessFunctionality_ToDoApp_ReactJS_high_avg\n",
"Grading scenario BusinessFunctionality_ToDoApp_ReactJS_high_avg\n",
"Evaluating scenario BusinessFunctionality_AngularCosmoPage_avg_high\n",
"Grading scenario BusinessFunctionality_AngularCosmoPage_avg_high\n",
"Evaluating scenario ReactToAngular_ToDoApp_ReactJS_high_avg\n",
"Grading scenario ReactToAngular_ToDoApp_ReactJS_high_avg\n",
"Evaluating scenario jQueryToReact_ToDoApp_jQuery_high_low\n",
"Grading scenario jQueryToReact_ToDoApp_jQuery_high_low\n",
"Evaluating scenario AngularToReact_AngularCosmoPage_avg_high\n",
"Grading scenario AngularToReact_AngularCosmoPage_avg_high\n",
"Evaluating scenario UpdateReact_ToDoApp_ReactJS_high_avg\n",
"Grading scenario UpdateReact_ToDoApp_ReactJS_high_avg\n",
"Evaluating scenario UpdateAngular_ToDoApp_AngularJS_avg_avg_2\n",
"Grading scenario UpdateAngular_ToDoApp_AngularJS_avg_avg_2\n",
"Evaluating scenario ReactToAngular_ReactSignUp_high_low\n",
"Grading scenario ReactToAngular_ReactSignUp_high_low\n",
"Evaluating scenario VanillaToReact_Piano_NativeJS_high_low\n",
"Grading scenario VanillaToReact_Piano_NativeJS_high_low\n",
"File /Users/iliakorol/genai/AIRUN-LLM-Benchmark/Scenarios/Criteria/JS/solution_template_generation/GenerateProjectConfiguration_criteria.yaml does not exist.\n",
"Evaluating scenario WriteTestsForLegacyCode_ReactSignUp_high_low\n",
"Grading scenario WriteTestsForLegacyCode_ReactSignUp_high_low\n",
"Evaluating scenario WriteTestsForLegacyCode_ToDoApp_ReactJS_high_avg\n",
"Grading scenario WriteTestsForLegacyCode_ToDoApp_ReactJS_high_avg\n",
"Evaluating scenario WriteTestsForLegacyCode_AngularCosmoPage_avg_high\n",
"Grading scenario WriteTestsForLegacyCode_AngularCosmoPage_avg_high\n",
"Evaluating scenario WriteTestsForActualCode_ReactSelect_extra_high_high\n",
"Grading scenario WriteTestsForActualCode_ReactSelect_extra_high_high\n"
]
}
],
"execution_count": 4
},
{
"metadata": {},
"cell_type": "code",
"source": "",
"id": "36e2b55a03a940b3",
"outputs": [],
"execution_count": null
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}