def get_amount_employees_monthly()

in osci/transformers/rankers/employees_ranking.py [0:0]


def get_amount_employees_monthly(df: DataFrame,
                                 author_email_field: str,
                                 datetime_field: str,
                                 result_employee_field: str = 'Employees',
                                 result_month_field: str = 'Month') -> DataFrame:
    """Get amount of employees (that have any activity) monthly for company

    :param df: PushEventsCommits
    :param author_email_field: Commit author email field
    :param datetime_field: Event created at datetime field
    :param result_employee_field: Field in output df which must contains amount of employees
    :param result_month_field: Field in output df which must contains month
    :return:
    """
    return df.select(author_email_field, datetime_field) \
        .withColumn(result_month_field, f.date_format(datetime_field, "yyyy-MM")) \
        .select(author_email_field, result_month_field) \
        .groupBy(result_month_field) \
        .agg(f.count(f.col(author_email_field)).alias(result_employee_field)) \
        .sort(result_month_field)