osci/datalake/reports/excel/osci_change.py (189 lines of code) (raw):
"""Copyright since 2020, EPAM Systems
This file is part of OSCI.
OSCI is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OSCI is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OSCI. If not, see <http://www.gnu.org/licenses/>."""
import pandas as pd
from datetime import datetime, timedelta
from typing import Union, List, NamedTuple, Optional, Any, Callable, Dict, Iterable
from io import BytesIO
from functools import reduce
from xlsxwriter import Workbook
from xlsxwriter.worksheet import Worksheet
from xlsxwriter.format import Format
from osci.datalake import DataLake
from osci.datalake.schemas.public import OSCIChangeRankingExcelSchema, OSCIChangeRankingSchema
Row = int
Column = int
Rule = Dict[str, Any]
STAR_STAR_SUPERSCRIPT = '*'
NUMBER_SUPERSCRIPT: Callable[[int], str] = lambda num: str(num)
def reduce_rules(rules: Iterable[Rule]) -> Rule:
def _merge_rules(left: Rule, right: Rule) -> Rule:
return {**left, **right}
if rules:
return reduce(_merge_rules, rules)
return {}
class Position(NamedTuple):
row: Row
col: Column
class TableColumn(NamedTuple):
name: str
superscript_suffix: Optional[str]
df_key: str
format_rule: Rule
cell_scale: Optional[Union[int, float]]
class OSCIChangeExcelWriter:
schema = OSCIChangeRankingExcelSchema
plus_minus_format_rule: Rule = {'num_format': '+0;-0;—'}
numbers_format_rule: Rule = {'num_format': '0'}
superscript_format_rule: Rule = {'font_script': 1}
bold_format_rule: Rule = {'bold': True}
blue_font_color_rule: Rule = {'font_color': '#2E75B5'}
align_center_rule: Rule = {'align': 'center'}
@staticmethod
def border_rule_fabric(border_size: int) -> Dict[str, int]:
return {'border': border_size}
border_rule_1: Rule = {'border': 1}
position_change_format_rule: Rule = reduce_rules((border_rule_1,
bold_format_rule,
blue_font_color_rule,
plus_minus_format_rule,
align_center_rule))
change_format_rule: Rule = reduce_rules((border_rule_1,
blue_font_color_rule,
plus_minus_format_rule))
number_cell_format_rule: Rule = reduce_rules((border_rule_1, numbers_format_rule))
table_columns: List[TableColumn] = [
TableColumn(schema.position, None,
schema.position, reduce_rules((border_rule_1, bold_format_rule, align_center_rule)),
cell_scale=5),
TableColumn(schema.position_change, STAR_STAR_SUPERSCRIPT,
OSCIChangeRankingSchema.position_change, position_change_format_rule,
cell_scale=1.2),
TableColumn(schema.company, None,
OSCIChangeRankingSchema.company, border_rule_1,
cell_scale=4),
TableColumn(schema.active, NUMBER_SUPERSCRIPT(1),
OSCIChangeRankingSchema.active, number_cell_format_rule,
cell_scale=1.2),
TableColumn(schema.change_suffix, STAR_STAR_SUPERSCRIPT,
OSCIChangeRankingSchema.active_change, change_format_rule,
cell_scale=1.2),
TableColumn(schema.total, NUMBER_SUPERSCRIPT(2),
OSCIChangeRankingSchema.total, number_cell_format_rule,
cell_scale=1.2),
TableColumn(schema.change_suffix, STAR_STAR_SUPERSCRIPT,
OSCIChangeRankingSchema.total_change, change_format_rule,
cell_scale=1.2)
]
def __init__(self, sheet_name: str, from_date: datetime, to_date: datetime, top_size: int):
self.writer, self.buffer = DataLake().public.get_excel_writer()
self.workbook: Workbook = self.writer.book
self.worksheet: Worksheet = self.workbook.add_worksheet(sheet_name)
self.from_date = from_date
self.to_date = to_date
self.top_size = top_size
self.superscript_format = self.get_format(self.superscript_format_rule)
def get_format(self, *rules: Rule) -> Format:
return self.workbook.add_format(reduce_rules(rules))
def write(self, df):
header_position = Position(0, 1)
table_header_position = Position(header_position.row + 2, header_position.col)
comments_position = Position(table_header_position.row + 1,
table_header_position.col + len(self.table_columns) + 1)
table_position = Position(table_header_position.row + 1, table_header_position.col)
self._write_header(position=header_position)
self._write_table_header(start_from=table_header_position)
self._write_comments(start_from=comments_position)
self._write_table(df, start_from=table_position)
def _write_header(self, position: Position = Position(0, 1)):
self.worksheet.write(*position,
f'{self.from_date:%Y} (differences from {self.from_date:%B, %d} to {self.to_date:%B, %d})')
def _write_comments(self, start_from: Position = Position(3, 9)):
row = start_from.row
self.worksheet.write_rich_string(row, start_from.col,
self.superscript_format, NUMBER_SUPERSCRIPT(1),
' Active Contributors are those who authored 10 '
'or more pushes in the time period')
row += 1
self.worksheet.write_rich_string(row, start_from.col,
self.superscript_format, NUMBER_SUPERSCRIPT(2),
' Total Community counts those who authored 1 '
'or more pushes in the time period')
row += 1
self.worksheet.write_rich_string(row, start_from.col,
self.superscript_format, STAR_STAR_SUPERSCRIPT,
' Changes are relative to the metrics at the end of the previous month')
row += 2
self.worksheet.write(row, start_from.col,
f'The top {self.top_size} is calculated using the Active Contributors metric')
row += 1
self.worksheet.write(row, start_from.col,
'If two companies have equal Active Contributors, '
'their relative positions are determined by Total Community')
def _write_table_header(self, start_from: Position):
row, col = start_from
header_format = self.get_format(self.border_rule_1, self.bold_format_rule)
superscript_format = self.get_format(self.bold_format_rule, self.superscript_format_rule)
border_format = self.get_format(self.border_rule_1)
center_header_format = self.get_format(self.border_rule_1, self.bold_format_rule, self.align_center_rule)
for table_column in self.table_columns:
if table_column.superscript_suffix:
self.worksheet.write_rich_string(row, col, header_format, table_column.name,
superscript_format, table_column.superscript_suffix, border_format)
else:
self.worksheet.write(row, col, table_column.name,
center_header_format
if table_column.name in {self.schema.position, self.schema.position_change}
else header_format)
col_width = len(table_column.name) * table_column.cell_scale
self.worksheet.set_column(col, col, width=col_width)
col += 1
def _write_table(self, df: pd.DataFrame, start_from: Position):
row, start_col = start_from
for record in df.head(self.top_size).to_dict('records'):
col = start_col
for table_column in self.table_columns:
self.worksheet.write(row, col, record[table_column.df_key], self.get_format(table_column.format_rule))
col += 1
row += 1
def save(self) -> BytesIO:
self.writer.save()
return self.buffer
class OSCIChangeRankingExcel:
schema = OSCIChangeRankingExcelSchema
base_name = 'OSCI_Ranking'
dir_name = 'SolutionsHub_OSCI_change_ranking'
def __init__(self, to_date: datetime, from_date: Optional[datetime] = None, rows_limit: int = 100):
self.from_date = from_date if from_date else self.get_previous_date(date=to_date)
self.to_date = to_date
self.rows_limit = rows_limit
@staticmethod
def get_previous_date(date: datetime):
if date.month == 1:
return datetime(year=date.year, month=date.month, day=1)
return datetime(year=date.year, month=date.month, day=1) - timedelta(days=1)
@property
def name(self) -> str:
return DataLake().public.get_osci_change_excel_report_name(base_report_name=self.base_name,
date=self.to_date)
@property
def url(self) -> str:
return DataLake().public.get_osci_change_excel_report_url(base_report_name=self.base_name,
report_dir_name=self.dir_name,
date=self.to_date)
@property
def path(self) -> str:
return DataLake().public.get_osci_change_excel_report_path(base_report_name=self.base_name,
report_dir_name=self.dir_name,
date=self.to_date)
def save(self, df: pd.DataFrame):
DataLake().public.write_bytes_to_file(path=self.path,
buffer=self._write(df))
def _write(self, change_ranking_df: pd.DataFrame) -> BytesIO:
writer = OSCIChangeExcelWriter(sheet_name=self.base_name,
from_date=self.from_date,
to_date=self.to_date,
top_size=self.rows_limit)
df = change_ranking_df.reset_index().rename(columns={OSCIChangeRankingSchema.position: self.schema.position})
df[self.schema.position] += 1
df[self.schema.position_change] *= -1
df = df.fillna(0)
writer.write(df)
return writer.save()