databricks/notebooks/calculate_statistics.py (17 lines of code) (raw):

# Databricks notebook source import json from lib.repository.configs.service import load_config from lib.repository.ground_truth.stats import StatsCalculator from lib.spark_helper.ground_truth import GroundTruthFileStorage from lib.spark_helper.predictions import TemporaryStorage from lib.spark_helper.storage_service import SparkStorageService from databricks.sdk.runtime import dbutils job_ids = json.loads(dbutils.widgets.get("job_ids")) configs = load_config(project_name=dbutils.widgets.get("project_name")) storage_service = SparkStorageService(configs) ground_truth_storage = GroundTruthFileStorage(configs) temporary_storage = TemporaryStorage(storage_service) stats = StatsCalculator(temporary_storage, ground_truth_storage) # COMMAND ---------- stats.avg_summary_by_jobs(job_ids) # COMMAND ---------- stats.avg_category_by_jobs(job_ids) # COMMAND ---------- stats.avg_file_by_jobs(job_ids) # COMMAND ---------- stats.avg_file_and_category_by_jobs(job_ids)