osci/datalake/local/public.py (71 lines of code) (raw):

"""Copyright since 2020, EPAM Systems This file is part of OSCI. OSCI is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. OSCI is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OSCI. If not, see <http://www.gnu.org/licenses/>.""" from datetime import datetime from calendar import monthrange from glob import glob from pathlib import Path from typing import List, Tuple from .base import LocalSystemArea from osci.datalake.base import BasePublicArea from osci.utils import normalize_company import re import logging import pandas as pd log = logging.getLogger(__name__) class LocalPublicArea(BasePublicArea, LocalSystemArea): BASE_AREA_DIR = 'public' @property def _report_base_path(self) -> Path: return self.BASE_PATH / self.BASE_AREA_DIR / 'report' def get_report_path(self, report_name: str, date: datetime, company: str = None) -> Path: path = self._report_base_path if company is not None: company = normalize_company(name=company) path /= company path = path / report_name path.mkdir(parents=True, exist_ok=True) filename = f'{company.upper() + "_" if company else ""}{report_name}_{date.strftime("%Y-%m-%d")}.csv' return path / filename def get_report_url(self, report_name: str, date: datetime, company: str = None): report_path = self.get_report_path(report_name=report_name, date=date, company=company) return self.add_fs_absolute_prefix(path=report_path) def get_osci_change_excel_report_url(self, base_report_name: str, date: datetime, report_dir_name: str): return self.add_fs_absolute_prefix(self.get_osci_change_excel_report_path(base_report_name=base_report_name, report_dir_name=report_dir_name, date=date)) def get_reports_for_last_days_of_month(self, report_name: str, date: datetime, company: str = None) -> List[Tuple[datetime, pd.DataFrame]]: path = self._report_base_path if company is not None: company = normalize_company(name=company) path = f'{path}/{company}' path = date.strftime(f'{path}/{report_name}/{company.upper() + "_" if company else ""}{report_name}_%Y*') reports_set = [] for report_name in glob(path): match = re.search(pattern=r'\d{4}-\d{2}-\d{2}', string=report_name) report_date = datetime.strptime(match.group(), '%Y-%m-%d').date() date = datetime(year=report_date.year, month=report_date.month, day=monthrange(report_date.year, report_date.month)[1]) if report_date.strftime('%Y-%m-%d') == date.strftime('%Y-%m-%d'): reports_set.append((report_date, pd.read_csv(report_name))) return reports_set def save_report(self, report_df: pd.DataFrame, report_name: str, date: datetime, company: str = None): path = self.get_report_path(report_name, date, company) report_df.to_csv(path, index=False) def get_report(self, report_name: str, date: datetime, company: str = None) -> pd.DataFrame: path = self.get_report_path(report_name, date, company) return pd.read_csv(path) def get_osci_change_excel_report_path(self, base_report_name: str, report_dir_name: str, date: datetime): path = self._report_base_path path = path / report_dir_name path.mkdir(parents=True, exist_ok=True) filename = self.get_osci_change_excel_report_name(base_report_name, date) return path / filename @property def _email_base_path(self) -> Path: return self.BASE_PATH / self.BASE_AREA_DIR / 'email' def _get_email_path(self, date: datetime) -> Path: path = self._email_base_path path.mkdir(parents=True, exist_ok=True) return path / date.strftime(f"%Y-%m-%d.html") def save_email(self, email_body: str, date: datetime): with open(str(self._get_email_path(date=date)), 'w', encoding='utf-8') as f: f.write(email_body)