osci/datalake/schemas/bq.py (243 lines of code) (raw):
"""Copyright since 2020, EPAM Systems
This file is part of OSCI.
OSCI is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OSCI is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OSCI. If not, see <http://www.gnu.org/licenses/>."""
from google.cloud import bigquery
from .public import PublicSchemas
class BaseBigQueryOSCIRankingReport:
table_id = ''
class Columns:
total = 'Total_community'
active = 'Active_contributors'
company = 'Company'
date = 'Date'
position = 'Position'
schema = [
bigquery.SchemaField(Columns.position, bigquery.enums.SqlTypeNames.INTEGER),
bigquery.SchemaField(Columns.company, bigquery.enums.SqlTypeNames.STRING),
bigquery.SchemaField(Columns.active, bigquery.enums.SqlTypeNames.INTEGER),
bigquery.SchemaField(Columns.total, bigquery.enums.SqlTypeNames.INTEGER),
bigquery.SchemaField(Columns.date, bigquery.enums.SqlTypeNames.DATE),
]
mapping = {
PublicSchemas.company_contributors_ranking.company: Columns.company,
PublicSchemas.company_contributors_ranking.active: Columns.active,
PublicSchemas.company_contributors_ranking.total: Columns.total,
}
class BigQueryOSCIRankingReport(BaseBigQueryOSCIRankingReport):
table_id = 'OSCI.OSCI_Ranking'
class BigQueryOSCIRankingReportMTD(BaseBigQueryOSCIRankingReport):
table_id = 'OSCI.OSCI_Ranking_MTD'
class BaseBigQueryChangeOSCIRankingReport:
"""OSCI Contributors and Community change DTD"""
table_id = ''
class Columns:
position = 'Position'
position_change = 'Position_change'
company = 'Company'
active = 'Active_contributors'
active_change = 'Active_contributors_change'
total = 'Total_community'
total_change = 'Total_community_change'
date = 'Date'
schema = [
bigquery.SchemaField(Columns.position, bigquery.enums.SqlTypeNames.INTEGER),
bigquery.SchemaField(Columns.position_change, bigquery.enums.SqlTypeNames.INTEGER),
bigquery.SchemaField(Columns.company, bigquery.enums.SqlTypeNames.STRING),
bigquery.SchemaField(Columns.active, bigquery.enums.SqlTypeNames.INTEGER),
bigquery.SchemaField(Columns.active_change, bigquery.enums.SqlTypeNames.INTEGER),
bigquery.SchemaField(Columns.total, bigquery.enums.SqlTypeNames.INTEGER),
bigquery.SchemaField(Columns.total_change, bigquery.enums.SqlTypeNames.INTEGER),
bigquery.SchemaField(Columns.date, bigquery.enums.SqlTypeNames.DATE),
]
mapping = {
PublicSchemas.osci_ranking_schema.company: Columns.company,
PublicSchemas.osci_ranking_schema.active: Columns.active,
PublicSchemas.osci_ranking_schema.total: Columns.total,
PublicSchemas.osci_ranking_schema.position: Columns.position,
PublicSchemas.osci_ranking_schema.position_change: Columns.position_change,
PublicSchemas.osci_ranking_schema.active_change: Columns.active_change,
PublicSchemas.osci_ranking_schema.total_change: Columns.total_change,
}
class BigQueryOSCIDailyChangeRankingReport(BaseBigQueryChangeOSCIRankingReport):
table_id = 'OSCI.OSCI_Change_ranking_DTD'
class BaseBigQueryOSCICommitsRankingReport:
table_id = ''
class Columns:
commits = 'Commits'
company = 'Company'
date = 'Date'
position = 'Position'
schema = [
bigquery.SchemaField(Columns.position, bigquery.enums.SqlTypeNames.INTEGER),
bigquery.SchemaField(Columns.company, bigquery.enums.SqlTypeNames.STRING),
bigquery.SchemaField(Columns.commits, bigquery.enums.SqlTypeNames.INTEGER),
bigquery.SchemaField(Columns.date, bigquery.enums.SqlTypeNames.DATE),
]
mapping = {
PublicSchemas.company_commits_ranking.company: Columns.company,
PublicSchemas.company_commits_ranking.commits: Columns.commits
}
class BigQueryOSCICommitsRankingReport(BaseBigQueryOSCICommitsRankingReport):
table_id = 'OSCI.OSCI_Commits_Ranking'
class BigQueryOSCICommitsRankingReportMTD(BaseBigQueryOSCICommitsRankingReport):
table_id = 'OSCI.OSCI_Commits_Ranking_MTD'
class BigQueryPushEventsCommitsColumns:
table_id = 'OSCI.PushEventsCommits'
class Columns:
event_id = 'event_id'
event_created_at = 'event_created_at'
repo_name = 'repo_name'
org_name = 'org_name'
actor_login = 'actor_login'
sha = 'sha'
author_name = 'author_name'
author_email = 'author_email'
company = 'company'
schema = [
bigquery.SchemaField(Columns.event_id, bigquery.enums.SqlTypeNames.STRING),
bigquery.SchemaField(Columns.event_created_at, bigquery.enums.SqlTypeNames.TIMESTAMP),
bigquery.SchemaField(Columns.repo_name, bigquery.enums.SqlTypeNames.STRING),
bigquery.SchemaField(Columns.org_name, bigquery.enums.SqlTypeNames.STRING),
bigquery.SchemaField(Columns.actor_login, bigquery.enums.SqlTypeNames.STRING),
bigquery.SchemaField(Columns.sha, bigquery.enums.SqlTypeNames.STRING),
bigquery.SchemaField(Columns.author_name, bigquery.enums.SqlTypeNames.STRING),
bigquery.SchemaField(Columns.author_email, bigquery.enums.SqlTypeNames.STRING),
bigquery.SchemaField(Columns.company, bigquery.enums.SqlTypeNames.STRING),
]
class BigQueryLicensedRepository:
"""BigQuery OSCI Licensed Repositories"""
table_id = 'OSCI.LicensedRepositories'
class Columns:
name = 'name'
language = 'language'
license = 'license'
downloaded_at = 'downloaded_at'
schema = [
bigquery.SchemaField(Columns.name, bigquery.enums.SqlTypeNames.STRING),
bigquery.SchemaField(Columns.language, bigquery.enums.SqlTypeNames.STRING),
bigquery.SchemaField(Columns.license, bigquery.enums.SqlTypeNames.STRING),
bigquery.SchemaField(Columns.downloaded_at, bigquery.enums.SqlTypeNames.DATE),
]
class BigQueryCompaniesContributorsRepositoriesCommitsColumns:
table_id = 'OSCI.CompaniesContributorsRepositoriesCommits'
class Columns:
author_name = 'Author'
author_email = 'Email'
company = 'Company'
repository = 'Repository'
language = 'Language'
license = 'License'
commits = 'Commits'
date = 'Date'
schema = [
bigquery.SchemaField(Columns.author_name, bigquery.enums.SqlTypeNames.STRING),
bigquery.SchemaField(Columns.author_email, bigquery.enums.SqlTypeNames.STRING),
bigquery.SchemaField(Columns.company, bigquery.enums.SqlTypeNames.STRING),
bigquery.SchemaField(Columns.repository, bigquery.enums.SqlTypeNames.STRING),
bigquery.SchemaField(Columns.language, bigquery.enums.SqlTypeNames.STRING),
bigquery.SchemaField(Columns.license, bigquery.enums.SqlTypeNames.STRING),
bigquery.SchemaField(Columns.commits, bigquery.enums.SqlTypeNames.INTEGER),
bigquery.SchemaField(Columns.date, bigquery.enums.SqlTypeNames.DATE),
]
mapping = {
PublicSchemas.company_contributors_repository_commits.author_name: Columns.author_name,
PublicSchemas.company_contributors_repository_commits.author_email: Columns.author_email,
PublicSchemas.company_contributors_repository_commits.company: Columns.company,
PublicSchemas.company_contributors_repository_commits.repository: Columns.repository,
PublicSchemas.company_contributors_repository_commits.language: Columns.language,
PublicSchemas.company_contributors_repository_commits.license: Columns.license,
PublicSchemas.company_contributors_repository_commits.commits: Columns.commits,
PublicSchemas.company_contributors_repository_commits.date: Columns.date,
}
class BigQueryCompaniesContributorsRepositoriesCommitsColumns:
table_id = 'OSCI.CompaniesContributorsRepositoriesCommits'
class Columns:
author_name = 'Author'
author_email = 'Email'
company = 'Company'
repository = 'Repository'
language = 'Language'
license = 'License'
commits = 'Commits'
date = 'Date'
schema = [
bigquery.SchemaField(Columns.author_name, bigquery.enums.SqlTypeNames.STRING),
bigquery.SchemaField(Columns.author_email, bigquery.enums.SqlTypeNames.STRING),
bigquery.SchemaField(Columns.company, bigquery.enums.SqlTypeNames.STRING),
bigquery.SchemaField(Columns.repository, bigquery.enums.SqlTypeNames.STRING),
bigquery.SchemaField(Columns.language, bigquery.enums.SqlTypeNames.STRING),
bigquery.SchemaField(Columns.license, bigquery.enums.SqlTypeNames.STRING),
bigquery.SchemaField(Columns.commits, bigquery.enums.SqlTypeNames.INTEGER),
bigquery.SchemaField(Columns.date, bigquery.enums.SqlTypeNames.DATE),
]
mapping = {
PublicSchemas.company_contributors_repository_commits.author_name: Columns.author_name,
PublicSchemas.company_contributors_repository_commits.author_email: Columns.author_email,
PublicSchemas.company_contributors_repository_commits.company: Columns.company,
PublicSchemas.company_contributors_repository_commits.repository: Columns.repository,
PublicSchemas.company_contributors_repository_commits.language: Columns.language,
PublicSchemas.company_contributors_repository_commits.license: Columns.license,
PublicSchemas.company_contributors_repository_commits.commits: Columns.commits,
PublicSchemas.company_contributors_repository_commits.date: Columns.date,
}
class BigQueryOSCIGeneralRankingReport:
table_id = 'OSCI.OSCI_General_Ranking'
class Columns:
position = 'Position'
position_change_ytd = 'Position_change_YTD'
position_change_dtd = 'Position_change_DTD'
position_growth_speed = 'Position_growth_speed'
commits_ytd = 'Commits_YTD'
commits_mtd = 'Commits_MTD'
company = 'Company'
active_ytd = 'Active_contributors_YTD'
active_mtd = 'Active_contributors_MTD'
active_dtd = 'Active_contributors_DTD'
active_change_ytd = 'Active_contributors_change_YTD'
active_change_dtd = 'Active_contributors_change_DTD'
active_contrib_growth_speed = 'Active_contributors_growth_speed'
total_ytd = 'Total_community_YTD'
total_mtd = 'Total_community_MTD'
total_dtd = 'Total_community_DTD'
total_change_ytd = 'Total_community_change_YTD'
total_change_dtd = 'Total_community_change_DTD'
total_com_growth_speed = 'Total_community_growth_speed'
date = 'Date'
schema = [
bigquery.SchemaField(Columns.position, bigquery.enums.SqlTypeNames.INTEGER),
bigquery.SchemaField(Columns.position_change_ytd, bigquery.enums.SqlTypeNames.INTEGER),
bigquery.SchemaField(Columns.position_change_dtd, bigquery.enums.SqlTypeNames.INTEGER),
bigquery.SchemaField(Columns.position_growth_speed, bigquery.enums.SqlTypeNames.INTEGER),
bigquery.SchemaField(Columns.commits_ytd, bigquery.enums.SqlTypeNames.INTEGER),
bigquery.SchemaField(Columns.commits_mtd, bigquery.enums.SqlTypeNames.INTEGER),
bigquery.SchemaField(Columns.company, bigquery.enums.SqlTypeNames.STRING),
bigquery.SchemaField(Columns.active_ytd, bigquery.enums.SqlTypeNames.INTEGER),
bigquery.SchemaField(Columns.active_mtd, bigquery.enums.SqlTypeNames.INTEGER),
bigquery.SchemaField(Columns.active_dtd, bigquery.enums.SqlTypeNames.INTEGER),
bigquery.SchemaField(Columns.active_change_ytd, bigquery.enums.SqlTypeNames.INTEGER),
bigquery.SchemaField(Columns.active_change_dtd, bigquery.enums.SqlTypeNames.INTEGER),
bigquery.SchemaField(Columns.active_contrib_growth_speed, bigquery.enums.SqlTypeNames.INTEGER),
bigquery.SchemaField(Columns.total_ytd, bigquery.enums.SqlTypeNames.INTEGER),
bigquery.SchemaField(Columns.total_mtd, bigquery.enums.SqlTypeNames.INTEGER),
bigquery.SchemaField(Columns.total_dtd, bigquery.enums.SqlTypeNames.INTEGER),
bigquery.SchemaField(Columns.total_change_ytd, bigquery.enums.SqlTypeNames.INTEGER),
bigquery.SchemaField(Columns.total_change_dtd, bigquery.enums.SqlTypeNames.INTEGER),
bigquery.SchemaField(Columns.total_com_growth_speed, bigquery.enums.SqlTypeNames.INTEGER),
bigquery.SchemaField(Columns.date, bigquery.enums.SqlTypeNames.DATE),
]
mapping = {
PublicSchemas.osci_general_report.position: Columns.position,
PublicSchemas.osci_general_report.position_change_ytd: Columns.position_change_ytd,
PublicSchemas.osci_general_report.position_change_dtd: Columns.position_change_dtd,
PublicSchemas.osci_general_report.position_growth_speed: Columns.position_growth_speed,
PublicSchemas.osci_general_report.commits_ytd: Columns.commits_ytd,
PublicSchemas.osci_general_report.commits_mtd: Columns.commits_mtd,
PublicSchemas.osci_general_report.company: Columns.company,
PublicSchemas.osci_general_report.active_ytd: Columns.active_ytd,
PublicSchemas.osci_general_report.active_mtd: Columns.active_mtd,
PublicSchemas.osci_general_report.active_dtd: Columns.active_dtd,
PublicSchemas.osci_general_report.active_change_ytd: Columns.active_change_ytd,
PublicSchemas.osci_general_report.active_change_dtd: Columns.active_change_dtd,
PublicSchemas.osci_general_report.active_growth_speed: Columns.active_contrib_growth_speed,
PublicSchemas.osci_general_report.total_ytd: Columns.total_ytd,
PublicSchemas.osci_general_report.total_mtd: Columns.total_mtd,
PublicSchemas.osci_general_report.total_dtd: Columns.total_dtd,
PublicSchemas.osci_general_report.total_change_ytd: Columns.total_change_ytd,
PublicSchemas.osci_general_report.total_change_dtd: Columns.total_change_dtd,
PublicSchemas.osci_general_report.total_growth_speed: Columns.total_com_growth_speed,
}