osci/crawlers/github/gharchive.py (13 lines of code) (raw):

"""Copyright since 2019, EPAM Systems This file is part of OSCI. OSCI is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. OSCI is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OSCI. If not, see <http://www.gnu.org/licenses/>.""" import datetime import logging from .events.crawler import get_hour_push_events_commits from .rest import GithubArchiveRest from osci.datalake import DataLake log = logging.getLogger(__name__) def get_github_daily_push_events(day: datetime.datetime): with GithubArchiveRest() as rest: for hour in range(24): log.info(f'Crawl events for {day}') day = day.replace(hour=hour) push_events_commits = get_hour_push_events_commits(day=day, rest=rest) DataLake().landing.save_push_events_commits(push_event_commits=push_events_commits, date=day)