osci/jobs/company_contributors_repository_commits.py (25 lines of code) (raw):

"""Copyright since 2021, EPAM Systems This file is part of OSCI. OSCI is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. OSCI is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OSCI. If not, see <http://www.gnu.org/licenses/>.""" from pyspark.sql import DataFrame from osci.transformers.company_contributors_repository_commits import get_company_contributors_repository_commits from osci.datalake.reports.general.company_contributors_repository_commits import ( CompanyContributorsRepositoryCommitsFactory ) from osci.datalake.schemas.public import CompaniesContributorsRepositoryCommits from osci.datalake.schemas.staging import PushEventsCommitsSchema from .base import PushCommitsRankingJob from datetime import datetime, timedelta class CompanyContributorsRepositoryCommitsJob(PushCommitsRankingJob): """Job that generates company contributors repository commits""" REPORT_FACTORY = CompanyContributorsRepositoryCommitsFactory def transform(self, df: DataFrame, date: datetime = None, **kwargs) -> DataFrame: schema: CompaniesContributorsRepositoryCommits = self.report_cls.schema day = (datetime.now()-timedelta(days=1) if date is None else date).date() return get_company_contributors_repository_commits(df=df, author_email_field=schema.author_name, author_name_field=schema.author_email, repo_name_field=schema.repository, language_field=schema.language, license_field=schema.license, company_field=schema.company, commits_id_field=PushEventsCommitsSchema().sha, datetime_field=PushEventsCommitsSchema().event_created_at, day=day, result_field=schema.commits)