in osci/transformers/contributors.py [0:0]
def get_osci_contributors(df: DataFrame,
author_name_field: str,
author_email_field: str,
company_field: str,
commits_id_field: str,
result_field: str = 'Commits',
limit: int = 5) -> DataFrame:
"""Get top of contributors for each company
:param df: PushEventsCommits
:param author_name_field: Commit author name field
:param author_email_field: Commit author email field
:param commits_id_field: Commit identifier (such as SHA)
:param company_field: Company name field
:param limit: Limit of contributors for company (top size)
:param result_field: Field in output df which must contains amount of commits
:return:
"""
window = Window \
.partitionBy(f.col(company_field)) \
.orderBy(f.col(result_field).desc())
return df \
.select(f.col(company_field), f.col(author_name_field), f.col(author_email_field), f.col(commits_id_field)) \
.groupBy(company_field, author_name_field, author_email_field) \
.agg(f.count(commits_id_field).alias(result_field)) \
.select(f.col('*'), f.row_number().over(window).alias('row_number')) \
.where(f.col('row_number') <= limit).drop(f.col('row_number'))