osci/datalake/blob/public.py (65 lines of code) (raw):
"""Copyright since 2020, EPAM Systems
This file is part of OSCI.
OSCI is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OSCI is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OSCI. If not, see <http://www.gnu.org/licenses/>."""
from datetime import datetime
from calendar import monthrange
from typing import List, Tuple
from .base import BlobArea
from osci.datalake.base import BasePublicArea
from osci.utils import normalize_company
import logging
import pandas as pd
import re
log = logging.getLogger(__name__)
class BlobPublicArea(BasePublicArea, BlobArea):
AREA_CONTAINER = 'public'
@property
def _report_base_path(self) -> str:
return 'report'
def get_report_path(self, report_name: str, date: datetime, company: str = None) -> str:
path = self._report_base_path
if company is not None:
company = normalize_company(name=company)
path = f'{path}/{company}'
return date.strftime(f'{path}/{report_name}/{company.upper() + "_" if company else ""}{report_name}_%Y-%m-%d.csv')
def get_report_url(self, report_name: str, date: datetime, company: str = None):
report_path = self.get_report_path(report_name=report_name, date=date, company=company)
return self.add_http_prefix(report_path)
def get_osci_change_excel_report_url(self, base_report_name: str, date: datetime, report_dir_name: str):
return self.add_http_prefix(self.get_osci_change_excel_report_path(base_report_name=base_report_name,
date=date,
report_dir_name=report_dir_name))
def save_report(self, report_df: pd.DataFrame, report_name: str, date: datetime, company: str = None):
"""Save report dataframe to csv
:param report_df: report dataframe
:param report_name: report name
:param date: date of report
:param company: company name
"""
self.write_pandas_dataframe_to_csv(df=report_df,
path=self.get_report_path(report_name=report_name,
date=date,
company=company))
def get_report(self, report_name: str, date: datetime, company: str = None) -> pd.DataFrame:
return self.read_pandas_dataframe_from_csv(path=self.get_report_path(report_name=report_name,
date=date,
company=company))
def get_osci_change_excel_report_path(self, base_report_name: str, report_dir_name: str, date: datetime):
return f"{self._report_base_path}/" \
f"{report_dir_name}/" \
f"{self.get_osci_change_excel_report_name(base_report_name, date)}"
def get_reports_for_last_days_of_month(self, report_name: str, date: datetime, company: str = None) -> List[Tuple[datetime, pd.DataFrame]]:
path = self._report_base_path
if company is not None:
company = normalize_company(name=company)
path = f'{path}/{company}'
path = date.strftime(f'{path}/{report_name}/{company.upper() + "_" if company else ""}{report_name}_%Y')
reports_set = []
for report in self.container_client.list_blobs(name_starts_with=path):
report_name = report['name']
match = re.search(pattern=r'\d{4}-\d{2}-\d{2}', string=report_name)
report_date = datetime.strptime(match.group(), '%Y-%m-%d').date()
date = datetime(year=report_date.year,
month=report_date.month,
day=monthrange(report_date.year, report_date.month)[1])
if report_date.strftime('%Y-%m-%d') == date.strftime('%Y-%m-%d'):
reports_set.append((report_date, self.read_pandas_dataframe_from_csv(path=report_name)))
return reports_set
@property
def _email_base_path(self) -> str:
return 'email'
def _get_email_path(self, date: datetime) -> str:
return date.strftime(f"{self._email_base_path}/%Y-%m-%d.html")
def save_email(self, email_body: str, date: datetime):
self.write_string_to_file(path=self._get_email_path(date=date), data=email_body, content_type='text/html')