in osci/transformers/company_contributors_repository_commits.py [0:0]
def get_company_contributors_repository_commits(df: DataFrame,
author_name_field: str,
author_email_field: str,
repo_name_field: str,
language_field: str,
license_field: str,
company_field: str,
commits_id_field: str,
datetime_field: str,
day: date,
result_field: str = 'Commits') -> DataFrame:
"""Get company contributors repository commits
:param df: PushEventsCommits
:param author_name_field: Commit author name field
:param author_email_field: Commit author email field
:param repo_name_field: Repository name field
:param language_field: Language field
:param license_field: License field
:param datetime_field: Event created at datetime field
:param day: Date of filtration
:param commits_id_field: Commit identifier (such as SHA)
:param company_field: Company name field
:param result_field: Field in output df which must contains amount of commits
:return:
"""
return df \
.select(f.col(author_name_field), f.col(author_email_field),
f.col(repo_name_field), f.col(language_field),
f.col(license_field), f.col(company_field),
f.col(commits_id_field), f.col(datetime_field)) \
.filter(f.col(datetime_field).cast('date') == day) \
.groupBy(author_name_field, author_email_field,
repo_name_field, language_field,
license_field, company_field) \
.agg(f.count(commits_id_field).alias(result_field))