Source code for dexpy.power

"""Functions to calculate statistical power for a design."""

import numpy as np
from scipy.stats import f
from scipy.stats import ncf
from patsy import dmatrix


[docs]def f_power(model, design, effect_size, alpha): """Calculates the power of an F test. This calculates the probability that the F-statistic is above its critical value (alpha) given an effect of some size. :param model: A patsy formula for which to calculate power. :type model: patsy.formula :param design: A pandas.DataFrame representing a design. :type design: pandas.DataFrame :param effect_size: The size of the effect that the test should be able to detect (also called a signal to noise ratio). :type effect_size: float :param alpha: The critical value that we want the test to be above. :type alpha: float between 0 and 1 :returns: A list of percentage probabilities that an F-test could detect an effect of the given size at the given alpha value for a particular column. Usage: >>> design = dexpy.factorial.build_factorial(4, 8) >>> print(dexpy.power.f_power("1 + A + B + C + D", design, 2.0, 0.05)) [ 95.016, 49.003, 49.003, 49.003, 49.003 ] """ X = dmatrix(model, design) residual_df = X.shape[0] - X.shape[1] XtXi = np.linalg.inv(np.dot(np.transpose(X), X)) non_centrality = 1 / np.diag(XtXi) # pre-calculate crit value for 1 df, most common case crit_value = f.ppf(1 - alpha, 1, residual_df) power = [] for t in range(0, X.shape[1]): nc = adjust_non_centrality(non_centrality[t], X[:,t]) nc *= effect_size * effect_size / 4.0 p = (1 - ncf.cdf(crit_value, 1, residual_df, nc)) power.append(p) return power
def adjust_non_centrality(nc, x_col): """Adjusts the non-centrality parameter for terms that aren't -1 to 1.""" if always_positive(x_col): return nc * 4 # term goes from 0 to 1 return nc def always_positive(x_col): """Checks that a column is always positive in the design space.""" return np.all(x_col >= 0)