in osci/transformers/rankers/employees_ranking.py [0:0]
def get_amount_employees_monthly(df: DataFrame,
author_email_field: str,
datetime_field: str,
result_employee_field: str = 'Employees',
result_month_field: str = 'Month') -> DataFrame:
"""Get amount of employees (that have any activity) monthly for company
:param df: PushEventsCommits
:param author_email_field: Commit author email field
:param datetime_field: Event created at datetime field
:param result_employee_field: Field in output df which must contains amount of employees
:param result_month_field: Field in output df which must contains month
:return:
"""
return df.select(author_email_field, datetime_field) \
.withColumn(result_month_field, f.date_format(datetime_field, "yyyy-MM")) \
.select(author_email_field, result_month_field) \
.groupBy(result_month_field) \
.agg(f.count(f.col(author_email_field)).alias(result_employee_field)) \
.sort(result_month_field)