osci/load/company_commits.py (26 lines of code) (raw):
"""Copyright since 2019, EPAM Systems
This file is part of OSCI.
OSCI is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OSCI is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OSCI. If not, see <http://www.gnu.org/licenses/>."""
from datetime import datetime
from osci.crawlers.github.events.crawler import get_company_repositories_events_commits
from osci.crawlers.github.rest import GithubRest
from osci.config import Config
from osci.datalake import DataLake, DatePeriodType
import pandas as pd
import logging
log = logging.getLogger(__name__)
def load_company_repositories_events_commits(date: datetime, company: str):
events = DataLake().staging.get_push_events_commits(company=company,
from_date=date,
to_date=date,
date_period_type=DatePeriodType.DTD)
schema = DataLake().staging.schemas.push_commits
if events.empty:
log.warning(f'No {company} events at {date}')
return
with GithubRest(token=Config().github_token) as rest:
company_commits = get_company_repositories_events_commits(repositories_names=events[schema.repo_name].unique(),
date=date,
company=company,
rest=rest)
company_commits_df = pd.DataFrame(company_commits)
DataLake().staging.save_private_push_events_commits(push_event_commits=company_commits_df,
company_name=company,
date=date)