def get_company_contributors_repository_commits()

in osci/transformers/company_contributors_repository_commits.py [0:0]


def get_company_contributors_repository_commits(df: DataFrame,
                                                author_name_field: str,
                                                author_email_field: str,
                                                repo_name_field: str,
                                                language_field: str,
                                                license_field: str,
                                                company_field: str,
                                                commits_id_field: str,
                                                datetime_field: str,
                                                day: date,
                                                result_field: str = 'Commits') -> DataFrame:
    """Get company contributors repository commits

    :param df: PushEventsCommits

    :param author_name_field: Commit author name field
    :param author_email_field: Commit author email field
    :param repo_name_field: Repository name field
    :param language_field: Language field
    :param license_field: License field
    :param datetime_field: Event created at datetime field
    :param day: Date of filtration
    :param commits_id_field: Commit identifier (such as SHA)
    :param company_field: Company name field

    :param result_field: Field in output df which must contains amount of commits
    :return:
    """
    return df \
        .select(f.col(author_name_field), f.col(author_email_field),
                f.col(repo_name_field), f.col(language_field),
                f.col(license_field), f.col(company_field),
                f.col(commits_id_field), f.col(datetime_field)) \
        .filter(f.col(datetime_field).cast('date') == day) \
        .groupBy(author_name_field, author_email_field,
                 repo_name_field, language_field,
                 license_field, company_field) \
        .agg(f.count(commits_id_field).alias(result_field))