databricks/notebooks/store_files.py (16 lines of code) (raw):
# Databricks notebook source
import json
import requests
from lib.repository.configs.service import load_config
from lib.spark_helper.files import FilesStorage
from databricks.sdk.runtime import dbutils
configs = load_config(project_name=dbutils.widgets.get("project_name"))
files_storage = FilesStorage(configs)
job_parameters = json.loads(dbutils.widgets.get("badgerdoc_job_parameters"))
for file in job_parameters["files_data"]:
file_id = int(file["file_id"])
print(f"Saving file_id: {file_id}")
response = requests.get(
file["signed_url"] if "signed_url" in file else file["s3_signed_url"]
)
files_storage.store_pdf(response.content, file_id)
files_storage.store_text(response.content, file_id)