in osci/filter/filter_unlicensed.py [0:0]
def filter_and_adjunct_push_event_commit(df: pd.DataFrame, licensed_repos_df: pd.DataFrame, filter_columns: List[str],
adjunct_columns: List[str], default_columns: List[str], right_index: str = "",
left_index: str = "") -> pd.DataFrame:
"""Adjunct dataframe and filter DataFrame without license
:param df: push event commit dataframe
:param licensed_repos_df: licensed repository dataframe
:param filter_columns: filter columns
:param adjunct_columns: Columns, that are added tp `df`
:param default_columns: Default required columns in schema
:param left_index: column name on df
:param right_index: column index on licensed_repos_df
"""
try:
return df.join(licensed_repos_df[adjunct_columns].set_index(right_index),
on=left_index).dropna(subset=filter_columns).reset_index(drop=True)
except KeyError as ex:
log.warning("`licensed_repos_df` or `df` is empty \n"
f"{licensed_repos_df.info()} , {df.info()}")
log.exception(ex)
return pd.DataFrame(columns=default_columns)