run_tasks.ipynb (279 lines of code) (raw):

{ "cells": [ { "cell_type": "code", "id": "aa5d5348637ce0d3", "metadata": { "ExecuteTime": { "end_time": "2024-12-06T13:15:00.999373Z", "start_time": "2024-12-06T13:14:58.711405Z" } }, "source": [ "from Utils.llm.config import Model\n", "from Utils.prepare_data import main as prepare_tasks\n", "from Utils.execute_test import main as execute\n", "from Utils.auto_eval import main as evaluate" ], "outputs": [], "execution_count": 1 }, { "cell_type": "code", "id": "f143b437b5a83b6b", "metadata": {}, "source": [ "# edit before start\n", "model = Model.AmazonNovaPro\n", "lang = \"JS\" # language specific folder name, if your files in Dataset/JS use \"JS\"\n", "prepare_tasks(model, lang)" ], "outputs": [], "execution_count": null }, { "metadata": { "collapsed": true, "ExecuteTime": { "end_time": "2024-12-06T13:23:50.683184Z", "start_time": "2024-12-06T13:15:15.534235Z" } }, "cell_type": "code", "source": [ "attempts = 1 # how much times each experiment will be launched\n", "\n", "# tasks to launch (will be launched only this task)\n", "spot_launch_list = [\n", " # 'GenerateReactApp.txt',\n", "]\n", "\n", "# tasks to skip\n", "skip_list = [\n", " # 'GenerateReactApp.txt',\n", "]\n", "\n", "model = Model.AmazonNovaPro\n", "lang = \"JS\" # language specific folder name, if your files in Dataset/JS use JS\n", "execute(model, lang, attempts, spot_launch_list, skip_list)\n" ], "id": "initial_id", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Starting answers generation for AmazonNovaPro\n", "Attempt #1, get answer for EvaluateCodeQuality_ToDoApp_ReactJS_high_avg.txt\n", "\tAttempt 1 at 2024-12-06 16:15:15.539939\n", "Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/code_analysis/result_2024-12-06_16-15-15.536491/EvaluateCodeQuality_ToDoApp_ReactJS_high_avg/EvaluateCodeQuality_ToDoApp_ReactJS_high_avg_report_1.md\n", "Attempt #1, get answer for EvaluateCodeQuality_ReactSignUp_high_low.txt\n", "\tAttempt 1 at 2024-12-06 16:15:42.901508\n", "Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/code_analysis/result_2024-12-06_16-15-15.536491/EvaluateCodeQuality_ReactSignUp_high_low/EvaluateCodeQuality_ReactSignUp_high_low_report_1.md\n", "Attempt #1, get answer for EvaluateCodeQuality_AngularCosmoPage_avg_high.txt\n", "\tAttempt 1 at 2024-12-06 16:16:10.068723\n", "Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/code_analysis/result_2024-12-06_16-15-15.536491/EvaluateCodeQuality_AngularCosmoPage_avg_high/EvaluateCodeQuality_AngularCosmoPage_avg_high_report_1.md\n", "Attempt #1, get answer for BusinessFunctionality_ReactSignUp_high_low.txt\n", "\tAttempt 1 at 2024-12-06 16:16:29.575815\n", "Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/solution_documentation/result_2024-12-06_16-15-15.536491/BusinessFunctionality_ReactSignUp_high_low/BusinessFunctionality_ReactSignUp_high_low_report_1.md\n", "Attempt #1, get answer for BusinessFunctionality_ToDoApp_ReactJS_high_avg.txt\n", "\tAttempt 1 at 2024-12-06 16:16:40.374661\n", "Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/solution_documentation/result_2024-12-06_16-15-15.536491/BusinessFunctionality_ToDoApp_ReactJS_high_avg/BusinessFunctionality_ToDoApp_ReactJS_high_avg_report_1.md\n", "Attempt #1, get answer for BusinessFunctionality_AngularCosmoPage_avg_high.txt\n", "\tAttempt 1 at 2024-12-06 16:16:50.837716\n", "Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/solution_documentation/result_2024-12-06_16-15-15.536491/BusinessFunctionality_AngularCosmoPage_avg_high/BusinessFunctionality_AngularCosmoPage_avg_high_report_1.md\n", "Attempt #1, get answer for ReactToAngular_ToDoApp_ReactJS_high_avg.txt\n", "\tAttempt 1 at 2024-12-06 16:17:02.599880\n", "Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/solution_migration/result_2024-12-06_16-15-15.536491/ReactToAngular_ToDoApp_ReactJS_high_avg/ReactToAngular_ToDoApp_ReactJS_high_avg_report_1.md\n", "Attempt #1, get answer for VanillaToReact_Piano_NativeJS_high_low.txt\n", "\tAttempt 1 at 2024-12-06 16:17:28.660264\n", "Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/solution_migration/result_2024-12-06_16-15-15.536491/VanillaToReact_Piano_NativeJS_high_low/VanillaToReact_Piano_NativeJS_high_low_report_1.md\n", "Attempt #1, get answer for UpdateAngular_ToDoApp_AngularJS_avg_avg_2.txt\n", "\tAttempt 1 at 2024-12-06 16:18:06.511262\n", "Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/solution_migration/result_2024-12-06_16-15-15.536491/UpdateAngular_ToDoApp_AngularJS_avg_avg_2/UpdateAngular_ToDoApp_AngularJS_avg_avg_2_report_1.md\n", "Attempt #1, get answer for AngularToReact_AngularCosmoPage_avg_high.txt\n", "\tAttempt 1 at 2024-12-06 16:18:32.536849\n", "Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/solution_migration/result_2024-12-06_16-15-15.536491/AngularToReact_AngularCosmoPage_avg_high/AngularToReact_AngularCosmoPage_avg_high_report_1.md\n", "Attempt #1, get answer for UpdateReact_ToDoApp_ReactJS_high_avg.txt\n", "\tAttempt 1 at 2024-12-06 16:18:58.637348\n", "Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/solution_migration/result_2024-12-06_16-15-15.536491/UpdateReact_ToDoApp_ReactJS_high_avg/UpdateReact_ToDoApp_ReactJS_high_avg_report_1.md\n", "Attempt #1, get answer for jQueryToReact_ToDoApp_jQuery_high_low.txt\n", "\tAttempt 1 at 2024-12-06 16:19:29.939022\n", "Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/solution_migration/result_2024-12-06_16-15-15.536491/jQueryToReact_ToDoApp_jQuery_high_low/jQueryToReact_ToDoApp_jQuery_high_low_report_1.md\n", "Attempt #1, get answer for ReactToAngular_ReactSignUp_high_low.txt\n", "\tAttempt 1 at 2024-12-06 16:20:01.909661\n", "Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/solution_migration/result_2024-12-06_16-15-15.536491/ReactToAngular_ReactSignUp_high_low/ReactToAngular_ReactSignUp_high_low_report_1.md\n", "Attempt #1, get answer for GenerateProjectConfiguration.txt\n", "\tAttempt 1 at 2024-12-06 16:20:20.963654\n", "Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/solution_template_generation/result_2024-12-06_16-15-15.536491/GenerateProjectConfiguration/GenerateProjectConfiguration_report_1.md\n", "Attempt #1, get answer for DescribeTechnicalImplementation_AngularCosmoPage_avg_high.txt\n", "\tAttempt 1 at 2024-12-06 16:20:39.443001\n", "Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/code_explanation/result_2024-12-06_16-15-15.536491/DescribeTechnicalImplementation_AngularCosmoPage_avg_high/DescribeTechnicalImplementation_AngularCosmoPage_avg_high_report_1.md\n", "Attempt #1, get answer for DescribeTechnicalImplementation_ReactSignUp_high_low.txt\n", "\tAttempt 1 at 2024-12-06 16:20:51.700414\n", "Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/code_explanation/result_2024-12-06_16-15-15.536491/DescribeTechnicalImplementation_ReactSignUp_high_low/DescribeTechnicalImplementation_ReactSignUp_high_low_report_1.md\n", "Attempt #1, get answer for DescribeTechnicalImplementation_ToDoApp_ReactJS_high_avg.txt\n", "\tAttempt 1 at 2024-12-06 16:21:05.500165\n", "Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/code_explanation/result_2024-12-06_16-15-15.536491/DescribeTechnicalImplementation_ToDoApp_ReactJS_high_avg/DescribeTechnicalImplementation_ToDoApp_ReactJS_high_avg_report_1.md\n", "Attempt #1, get answer for WriteTestsForLegacyCode_ReactSignUp_high_low.txt\n", "\tAttempt 1 at 2024-12-06 16:21:32.224545\n", "Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/test_generation/result_2024-12-06_16-15-15.536491/WriteTestsForLegacyCode_ReactSignUp_high_low/WriteTestsForLegacyCode_ReactSignUp_high_low_report_1.md\n", "Attempt #1, get answer for WriteTestsForLegacyCode_ToDoApp_ReactJS_high_avg.txt\n", "\tAttempt 1 at 2024-12-06 16:21:47.197898\n", "Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/test_generation/result_2024-12-06_16-15-15.536491/WriteTestsForLegacyCode_ToDoApp_ReactJS_high_avg/WriteTestsForLegacyCode_ToDoApp_ReactJS_high_avg_report_1.md\n", "Attempt #1, get answer for WriteTestsForActualCode_ReactSelect_extra_high_high.txt\n", "\tAttempt 1 at 2024-12-06 16:22:12.007051\n", "Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/test_generation/result_2024-12-06_16-15-15.536491/WriteTestsForActualCode_ReactSelect_extra_high_high/WriteTestsForActualCode_ReactSelect_extra_high_high_report_1.md\n", "Attempt #1, get answer for WriteTestsForLegacyCode_AngularCosmoPage_avg_high.txt\n", "\tAttempt 1 at 2024-12-06 16:22:49.412792\n", "Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/test_generation/result_2024-12-06_16-15-15.536491/WriteTestsForLegacyCode_AngularCosmoPage_avg_high/WriteTestsForLegacyCode_AngularCosmoPage_avg_high_report_1.md\n", "Attempt #1, get answer for ModifyReactApp_ReactFetchAPI_avg_low.txt\n", "\tAttempt 1 at 2024-12-06 16:23:15.738592\n", "Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/component_generation/result_2024-12-06_16-15-15.536491/ModifyReactApp_ReactFetchAPI_avg_low/ModifyReactApp_ReactFetchAPI_avg_low_report_1.md\n", "Attempt #1, get answer for GenerateBaseComponent.txt\n", "\tAttempt 1 at 2024-12-06 16:23:24.976785\n", "Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/component_generation/result_2024-12-06_16-15-15.536491/GenerateBaseComponent/GenerateBaseComponent_report_1.md\n", "Attempt #1, get answer for GenerateReactApp.txt\n", "\tAttempt 1 at 2024-12-06 16:23:34.519342\n", "Output was written to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/component_generation/result_2024-12-06_16-15-15.536491/GenerateReactApp/GenerateReactApp_report_1.md\n" ] } ], "execution_count": 2 }, { "metadata": { "ExecuteTime": { "end_time": "2024-12-06T13:24:29.282586Z", "start_time": "2024-12-06T13:24:29.265858Z" } }, "cell_type": "code", "source": [ "# create summary report\n", "from Utils.get_tokens_and_time import main as summarize\n", "\n", "langs = [\n", " 'JS',\n", " # 'Java',\n", "]\n", "\n", "models = [\n", " Model.AmazonNovaPro,\n", "]\n", "\n", "summarize(models=models, langs=langs)" ], "id": "be266342cb183532", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Summary written successfully for AmazonNovaPro and lang JS to /Users/iliakorol/genai/AIRUN-LLM-Benchmark-Results/Output/AmazonNovaPro/JS/summary.csv\n" ] } ], "execution_count": 3 }, { "metadata": { "ExecuteTime": { "end_time": "2024-12-06T13:34:34.411572Z", "start_time": "2024-12-06T13:26:50.968433Z" } }, "cell_type": "code", "source": [ "# evaluate model results based on summary report\n", "evaluate(model_name=Model.AmazonNovaPro)" ], "id": "630544898b58b871", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Evaluating scenario EvaluateCodeQuality_ReactSignUp_high_low\n", "Grading scenario EvaluateCodeQuality_ReactSignUp_high_low\n", "Evaluating scenario EvaluateCodeQuality_ToDoApp_ReactJS_high_avg\n", "Grading scenario EvaluateCodeQuality_ToDoApp_ReactJS_high_avg\n", "Evaluating scenario EvaluateCodeQuality_AngularCosmoPage_avg_high\n", "Grading scenario EvaluateCodeQuality_AngularCosmoPage_avg_high\n", "Evaluating scenario DescribeTechnicalImplementation_AngularCosmoPage_avg_high\n", "Grading scenario DescribeTechnicalImplementation_AngularCosmoPage_avg_high\n", "Evaluating scenario DescribeTechnicalImplementation_ReactSignUp_high_low\n", "Grading scenario DescribeTechnicalImplementation_ReactSignUp_high_low\n", "Evaluating scenario DescribeTechnicalImplementation_ToDoApp_ReactJS_high_avg\n", "Grading scenario DescribeTechnicalImplementation_ToDoApp_ReactJS_high_avg\n", "Evaluating scenario ModifyReactApp_ReactFetchAPI_avg_low\n", "Grading scenario ModifyReactApp_ReactFetchAPI_avg_low\n", "Evaluating scenario GenerateBaseComponent\n", "Grading scenario GenerateBaseComponent\n", "Evaluating scenario GenerateReactApp\n", "Grading scenario GenerateReactApp\n", "Evaluating scenario BusinessFunctionality_ReactSignUp_high_low\n", "Grading scenario BusinessFunctionality_ReactSignUp_high_low\n", "Evaluating scenario BusinessFunctionality_ToDoApp_ReactJS_high_avg\n", "Grading scenario BusinessFunctionality_ToDoApp_ReactJS_high_avg\n", "Evaluating scenario BusinessFunctionality_AngularCosmoPage_avg_high\n", "Grading scenario BusinessFunctionality_AngularCosmoPage_avg_high\n", "Evaluating scenario ReactToAngular_ToDoApp_ReactJS_high_avg\n", "Grading scenario ReactToAngular_ToDoApp_ReactJS_high_avg\n", "Evaluating scenario jQueryToReact_ToDoApp_jQuery_high_low\n", "Grading scenario jQueryToReact_ToDoApp_jQuery_high_low\n", "Evaluating scenario AngularToReact_AngularCosmoPage_avg_high\n", "Grading scenario AngularToReact_AngularCosmoPage_avg_high\n", "Evaluating scenario UpdateReact_ToDoApp_ReactJS_high_avg\n", "Grading scenario UpdateReact_ToDoApp_ReactJS_high_avg\n", "Evaluating scenario UpdateAngular_ToDoApp_AngularJS_avg_avg_2\n", "Grading scenario UpdateAngular_ToDoApp_AngularJS_avg_avg_2\n", "Evaluating scenario ReactToAngular_ReactSignUp_high_low\n", "Grading scenario ReactToAngular_ReactSignUp_high_low\n", "Evaluating scenario VanillaToReact_Piano_NativeJS_high_low\n", "Grading scenario VanillaToReact_Piano_NativeJS_high_low\n", "File /Users/iliakorol/genai/AIRUN-LLM-Benchmark/Scenarios/Criteria/JS/solution_template_generation/GenerateProjectConfiguration_criteria.yaml does not exist.\n", "Evaluating scenario WriteTestsForLegacyCode_ReactSignUp_high_low\n", "Grading scenario WriteTestsForLegacyCode_ReactSignUp_high_low\n", "Evaluating scenario WriteTestsForLegacyCode_ToDoApp_ReactJS_high_avg\n", "Grading scenario WriteTestsForLegacyCode_ToDoApp_ReactJS_high_avg\n", "Evaluating scenario WriteTestsForLegacyCode_AngularCosmoPage_avg_high\n", "Grading scenario WriteTestsForLegacyCode_AngularCosmoPage_avg_high\n", "Evaluating scenario WriteTestsForActualCode_ReactSelect_extra_high_high\n", "Grading scenario WriteTestsForActualCode_ReactSelect_extra_high_high\n" ] } ], "execution_count": 4 }, { "metadata": {}, "cell_type": "code", "source": "", "id": "36e2b55a03a940b3", "outputs": [], "execution_count": null } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.6" } }, "nbformat": 4, "nbformat_minor": 5 }