spotify_confidence/analysis/constants.py (144 lines of code) (raw):
# Copyright 2017-2020 Spotify AB
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Tuple, Dict, Union
NUMERATOR = "numerator"
NUMERATOR_SUM_OF_SQUARES = "numerator_sum_of_squares"
DENOMINATOR = "denominator"
BOOTSTRAPS = "bootstraps"
INTERVAL_SIZE = "interval_size"
ALPHA = "alpha"
FINAL_EXPECTED_SAMPLE_SIZE = "final_expected_sample_size"
ORDINAL_GROUP_COLUMN = "ordinal_group_column"
MDE = "mde"
METHOD = "method_column"
CORRECTION_METHOD = "correction_method"
ABSOLUTE = "absolute"
NUMBER_OF_COMPARISONS = "number_of_comparisons"
TREATMENT_WEIGHTS = "treatment_weights"
IS_BINARY = "is_binary"
FEATURE = "feature"
FEATURE_SUMSQ = "feature_sumsq"
FEATURE_CROSS = "feature_cross"
POINT_ESTIMATE = "point_estimate"
VARIANCE = "variance"
CI_LOWER, CI_UPPER = "ci_lower", "ci_upper"
ADJUSTED_LOWER, ADJUSTED_UPPER = "adjusted ci_lower", "adjusted ci_upper"
CI_WIDTH = "ci_width"
DIFFERENCE = "difference"
P_VALUE = "p-value"
ADJUSTED_P = "adjusted p-value"
SFX1, SFX2 = "_1", "_2"
STD_ERR = "std_err"
Z_CRIT = "z_crit"
ALPHA = "alpha"
ADJUSTED_ALPHA = "adjusted_alpha"
ADJUSTED_ALPHA_POWER_SAMPLE_SIZE = "adjusted_alpha_power_sample_size"
POWER = "power"
POWERED_EFFECT = "powered_effect"
ADJUSTED_POWER = "adjusted_power"
IS_SIGNIFICANT = "is_significant"
REQUIRED_SAMPLE_SIZE = "required_sample_size"
REQUIRED_SAMPLE_SIZE_METRIC = "required_sample_size_for_metric"
OPTIMAL_KAPPA = "optimal_kappa"
OPTIMAL_WEIGHTS = "optimal_weigghts"
ORIGINAL_POINT_ESTIMATE = "original_point_estimate"
ORIGINAL_VARIANCE = "original_variance"
VARIANCE_REDUCTION = "variance_reduction_rate"
BONFERRONI = "bonferroni"
HOLM = "holm"
HOMMEL = "hommel"
SIMES_HOCHBERG = "simes-hochberg"
SIDAK = "sidak"
HOLM_SIDAK = "holm-sidak"
FDR_BH = "fdr_bh"
FDR_BY = "fdr_by"
FDR_TSBH = "fdr_tsbh"
FDR_TSBKY = "fdr_tsbky"
BONFERRONI_ONLY_COUNT_TWOSIDED = "bonferroni-only-count-twosided"
BONFERRONI_DO_NOT_COUNT_NON_INFERIORITY = "bonferroni-do-not-count-non-inferiority"
SPOT_1 = "spot-1-bonferroni"
SPOT_1_HOLM = "spot-1-holm"
SPOT_1_HOMMEL = "spot-1-hommel"
SPOT_1_SIMES_HOCHBERG = "spot-1-simes-hochberg"
SPOT_1_SIDAK = "spot-1-sidak"
SPOT_1_HOLM_SIDAK = "spot-1-holm-sidak"
SPOT_1_FDR_BH = "spot-1-fdr_bh"
SPOT_1_FDR_BY = "spot-1-fdr_by"
SPOT_1_FDR_TSBH = "spot-1-fdr_tsbh"
SPOT_1_FDR_TSBKY = "spot-1-fdr_tsbky"
CORRECTION_METHODS = [
BONFERRONI,
HOLM,
HOMMEL,
SIMES_HOCHBERG,
SIDAK,
HOLM_SIDAK,
FDR_BH,
FDR_BY,
FDR_TSBH,
FDR_TSBKY,
BONFERRONI_ONLY_COUNT_TWOSIDED,
BONFERRONI_DO_NOT_COUNT_NON_INFERIORITY,
SPOT_1,
SPOT_1_HOLM,
SPOT_1_HOMMEL,
SPOT_1_SIMES_HOCHBERG,
SPOT_1_SIDAK,
SPOT_1_HOLM_SIDAK,
SPOT_1_FDR_BH,
SPOT_1_FDR_BY,
SPOT_1_FDR_TSBH,
SPOT_1_FDR_TSBKY,
]
CORRECTION_METHODS_THAT_SUPPORT_CI = [
BONFERRONI,
HOLM,
HOMMEL,
SIMES_HOCHBERG,
BONFERRONI_ONLY_COUNT_TWOSIDED,
BONFERRONI_DO_NOT_COUNT_NON_INFERIORITY,
SPOT_1,
SPOT_1_HOLM,
SPOT_1_HOMMEL,
SPOT_1_SIMES_HOCHBERG,
SPOT_1_SIDAK,
SPOT_1_HOLM_SIDAK,
SPOT_1_FDR_BH,
SPOT_1_FDR_BY,
SPOT_1_FDR_TSBH,
SPOT_1_FDR_TSBKY,
]
CORRECTION_METHODS_THAT_REQUIRE_METRIC_INFO = [
BONFERRONI_DO_NOT_COUNT_NON_INFERIORITY,
SPOT_1,
SPOT_1_HOLM,
SPOT_1_HOMMEL,
SPOT_1_SIMES_HOCHBERG,
SPOT_1_SIDAK,
SPOT_1_HOLM_SIDAK,
SPOT_1_FDR_BH,
SPOT_1_FDR_BY,
SPOT_1_FDR_TSBH,
SPOT_1_FDR_TSBKY,
]
CORRECTION_METHODS_THAT_DONT_REQUIRE_METRIC_INFO = list(
set(CORRECTION_METHODS) - set(CORRECTION_METHODS_THAT_REQUIRE_METRIC_INFO)
)
NULL_HYPOTHESIS = "null_hypothesis"
ALTERNATIVE_HYPOTHESIS = "alternative_hypothesis"
NIM = "non-inferiority margin"
NIM_COLUMN_DEFAULT = "non_inferiority_margin"
PREFERRED_DIRECTION_COLUMN_DEFAULT = "preferred_direction"
INCREASE_PREFFERED = "increase"
DECREASE_PREFFERED = "decrease"
TWO_SIDED = "two-sided"
PREFERENCE = "preference"
PREFERENCE_TEST = "preference_used_in_test"
PREFERENCE_DICT = {"smaller": DECREASE_PREFFERED, "larger": INCREASE_PREFFERED, TWO_SIDED: TWO_SIDED}
NIM_TYPE = Union[Tuple[float, str], Dict[str, Tuple[float, str]], bool]
METHOD_COLUMN_NAME = "method_column_name"
CHI2 = "chi-squared"
TTEST = "t-test"
ZTEST = "z-test"
ZTESTLINREG = "z-test-linreg"
BOOTSTRAP = "bootstrap"
METHODS = [CHI2, TTEST, ZTEST, BOOTSTRAP, ZTESTLINREG]
REGRESSION_PARAM = "regression_parameters"