osci/crawlers/github/events/crawler.py (32 lines of code) (raw):
"""Copyright since 2019, EPAM Systems
This file is part of OSCI.
OSCI is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OSCI is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OSCI. If not, see <http://www.gnu.org/licenses/>."""
from datetime import datetime
from typing import Iterator, Iterable
from osci.crawlers.github.rest import GithubArchiveRest, GithubRest
from .unpack import decompress_json_lines
from .parser import (Event, PushEventCommit, parse_events, get_daily_events,
get_push_events, get_push_events_commits, get_company_commits_by_email_domain)
import logging
log = logging.getLogger(__name__)
def get_hour_events(day: datetime, rest: GithubArchiveRest) -> Iterator[Event]:
hour_content = rest.get_hourly_events(date=day)
if hour_content is not None:
hour_json_payloads = decompress_json_lines(content=hour_content)
return parse_events(payloads=hour_json_payloads)
return []
def get_hour_push_events_commits(day: datetime, rest: GithubArchiveRest) -> Iterator[PushEventCommit]:
events = get_hour_events(day=day, rest=rest)
push_events = get_push_events(events=events)
return get_push_events_commits(push_events=push_events)
def get_repository_events(repository_name: str, rest: GithubRest) -> Iterator[Event]:
return parse_events(payloads=rest.get_repository_events(repo_name=repository_name))
def get_repositories_events(repositories_names: Iterable[str], rest: GithubRest) -> Iterator[Event]:
for repo_name in repositories_names:
yield from get_repository_events(repository_name=repo_name, rest=rest)
def get_company_repositories_events_commits(repositories_names: Iterable[str],
company: str,
date: datetime,
rest: GithubRest) -> Iterator[PushEventCommit]:
events = get_repositories_events(repositories_names=repositories_names, rest=rest)
daily_events = get_daily_events(events=events, date=date)
push_events = get_push_events(events=daily_events)
push_events_commits = get_push_events_commits(push_events=push_events)
return get_company_commits_by_email_domain(commits=push_events_commits, company=company)