scripts/fetch_augur

# Copyright 2018 Twitter, Inc. # SPDX-License-Identifier: Apache-2.0 """ Fetch metrics provided by Augur. https://github.com/osshealth/augur Progress: https://github.com/twitter/metrics/issues/2 """ import datetime import itertools import json import os import pandas as pd import requests API_ENDPOINT = "http://newtwitter.augurlabs.io/api/unstable" PATH_TO_METRICS_DATA = "data" DATESTAMP = datetime.datetime.now().date().isoformat() print("LOG: Assuming the current path to be the root of the metrics repository.") # tracked projects with open(os.path.join(PATH_TO_METRICS_DATA, "projects_tracked.json")) as f: PROJECTS_TRACKED = json.load(f) """ Bus Factor API: /:owner/:repo/bus_factor The API endpoint is unstable and experimental as of now. There is no guarantee of a result each time. That's why bus factor is cached as a one time result and frequently updated. In future, this metric can be included in _data and saved weekly, once the API becomes reliable. Update _metadata/augur/bus_factor.json """ # BUS_FACTOR = {} # bus_factor_json_file = f"{PATH_TO_METRICS_DATA}/augur/bus_factor.json" # if os.path.exists(bus_factor_json_file): # with open(bus_factor_json_file) as f: # BUS_FACTOR = json.load(f) # # for org in PROJECTS_TRACKED['projects']: # for repo in PROJECTS_TRACKED['projects'][org]: # print(f"Sending request to {API_ENDPOINT}/{org}/{repo}/bus_factor") # r = requests.get(f"{API_ENDPOINT}/{org}/{repo}/bus_factor") # try: # if r.ok: # print("OK!") # bus_factor = r.json()[0] # BUS_FACTOR[f"{org}/{repo}"] = bus_factor # else: # print(f"Error! Response code {r.status_code}") # print(r.content.decode("utf-8")) # except Exception as e: # print(f"Error: Something went wrong with /:owner/:repo/bus_factor - {e}") # # with open(bus_factor_json_file, "w+") as f: # json.dump(BUS_FACTOR, f) """ Code Changes (repo) API: /repo-groups/:repo_group_id/repos/:repo_id/code-changes Update _metadata/augur/repo-commits.json """ # function to aggregate all sums def tmp(group_series): if (group_series == group_series.iloc[0]).all(): return group_series.iloc[0] else: return group_series.sum() # necessary repo_ids to loop thru with open(os.path.join(PATH_TO_METRICS_DATA, "repo_ids.json")) as f: REPO_IDS = json.load(f) api_data_commits = [] repo_commits_json_file = f"{PATH_TO_METRICS_DATA}/augur/repo_commits.json" if os.path.exists(repo_commits_json_file): with open(repo_commits_json_file) as f: out = json.load(f) # gets repo_ids for relevant projects in the twitter org for repo in PROJECTS_TRACKED['projects']['twitter']: if len(REPO_IDS[repo]) > 0: repo_id = REPO_IDS[repo][0]['repo_id'] else: repo_id = "None" # hits endpoint using specific repo_ids print(f"Sending request to {API_ENDPOINT}/repo-groups/twitter/repos/{repo_id}/code-changes") request = requests.get(f"{API_ENDPOINT}/repo-groups/twitter/repos/{repo_id}/code-changes") try: if request.ok: print("OK!") api_data_commits.append(json.loads(request.text)) else: print(f"Error! Response code {request.status_code}") print(request.content.decode("utf-8")) except Exception as e: print(f"Error: Something went wrong with repo_commits - {e}") # unnest and clean data unnested_data = list(itertools.chain.from_iterable(api_data_commits)) df = pd.DataFrame(data=unnested_data, columns=unnested_data[0].keys()) df = df.drop("date", axis=1) df = df.groupby('repo_name').agg(tmp).sort_values('commit_count', ascending=False) out = df.to_json(orient='index').replace('},{', ',') with open(repo_commits_json_file, "w") as f: f.write(out)

scripts/fetch_augur_metrics.py (47 lines of code) (raw):