in spotify_confidence/samplesize/sample_size_calculator.py [0:0]
def _get_treatment_allocations(treatments, comparisons, treatment_costs, treatment_allocations):
"""Validate or generate treatment allocation array.
See the footnote on page 31 of "Duflo, E., Glennerster, R., & Kremer,
M. (2007). Using randomization in development economics research: A
toolkit. Handbook of Development Economics, 4, 3895–3962." for math.
Args:
treatments (int, optional): Number of treatment variants in the a/b
test, including control. Defaults to 2.
comparisons ({'control_vs_all', 'all_vs_all'}, optional): Which
treatments to compare. Defaults to 'control_vs_all'.
treatment_costs (numpy.ndarray, optional): Array with same length
as the number of treatments containing positive floats
specifying the treatments' relative costs. Defaults to equal
cost for all treatments.
treatment_allocations (numpy.ndarray/list/tuple, optional): Array
with same length as the number of treatments containing
proportion of sample allocated to each treatment. If not
specified defaults to automatic allocation.
Returns:
numpy.ndarray: Array with same length as the number of treatments
containing proportion of sample allocated to each treatment.
Raises:
TypeError: If `treatment_allocations` is not None or a
numpy.ndarray.
TypeError: If the length of custom `treatment_allocations` is not
the same as the number of treatments.
ValueError: If the values of custom `treatment_allocations` are
not all positive and sum to one.
"""
treatments = SampleSize._clean_treatments(treatments)
if treatment_allocations is not None:
if isinstance(treatment_allocations, list) or isinstance(treatment_allocations, tuple):
treatment_allocations = np.array(treatment_allocations)
if not isinstance(treatment_allocations, np.ndarray) or len(treatment_allocations) != treatments:
raise TypeError(
"treatment_allocations must be a numpy array "
"or list of the same length as the number of "
"treatments"
)
elif not (treatment_allocations > 0).all():
raise ValueError("treatment_allocations values " "must all be positive")
elif not math.isclose(treatment_allocations.sum(), 1.0):
raise ValueError("treatment_allocations values " "must sum to one")
else:
return np.array(treatment_allocations)
comparisons = SampleSize._get_comparison_matrix(treatments, comparisons)
weighted_comparisons = comparisons / np.sum(comparisons)
treatment_costs = SampleSize._clean_treatment_costs(treatments, treatment_costs)
ratios = np.zeros((treatments, treatments))
for i in range(treatments):
sum_importance_i = np.sum(weighted_comparisons[:, i]) + np.sum(weighted_comparisons[i, :])
for j in range(treatments):
sum_importance_j = np.sum(weighted_comparisons[:, j]) + np.sum(weighted_comparisons[j, :])
ratios[i, j] = sum_importance_i / sum_importance_j * np.sqrt(treatment_costs[j] / treatment_costs[i])
treatment_allocations = ratios[:, 0] / np.sum(ratios[:, 0])
return treatment_allocations