in osci/filter/filter_unlicensed.py [0:0]
def filter_out_unlicensed(date: datetime):
"""Read row PEC, filter and save them with license, language
:param date: push events on this day
"""
log.debug(f'Filter out unlicensed push events commits for date {date:%Y-%m-%d}')
log.debug(f'Read licensed repos for date {date:%Y-%m-%d}')
licensed_repos_df = Repositories(date=date).read()
for company, df in DataLake().staging.get_daily_raw_push_events_commits(date):
log.debug(f'Filter out unlicensed push events commits for date {date:%Y-%m-%d} for {company}')
filtered_df = filter_and_adjunct_push_event_commit(df, licensed_repos_df,
[DataLake().staging.schemas.repositories.license],
[DataLake().staging.schemas.repositories.name,
DataLake().staging.schemas.repositories.language,
DataLake().staging.schemas.repositories.license
],
DataLake().staging.schemas.push_commits.required,
right_index=DataLake().staging.schemas.repositories.name,
left_index=DataLake().staging.schemas.push_commits.repo_name
)
if not filtered_df.empty:
DataLake().staging.save_push_events_commits(
push_event_commits=filtered_df,
company_name=company,
date=date
)