Source code for dexpy.power

"""Functions to calculate statistical power for a design."""

import numpy as np
from scipy.stats import f
from scipy.stats import ncf
from patsy import dmatrix


[docs]def f_power(model, design, effect_size, alpha):
    """Calculates the power of an F test.

    This calculates the probability that the F-statistic is above its critical
    value (alpha) given an effect of some size.

    :param model: A patsy formula for which to calculate power.
    :type model: patsy.formula
    :param design: A pandas.DataFrame representing a design.
    :type design: pandas.DataFrame
    :param effect_size: The size of the effect that the test should be able to detect (also called a signal to noise
        ratio).
    :type effect_size: float
    :param alpha: The critical value that we want the test to be above.
    :type alpha: float between 0 and 1
    :returns: A list of percentage probabilities that an F-test could detect an effect of the given size at the given
        alpha value for a particular column.

    Usage:
      >>> design = dexpy.factorial.build_factorial(4, 8)
      >>> print(dexpy.power.f_power("1 + A + B + C + D", design, 2.0, 0.05))
      [ 95.016, 49.003, 49.003, 49.003, 49.003 ]
    """
    X = dmatrix(model, design)
    residual_df = X.shape[0] - X.shape[1]

    XtXi = np.linalg.inv(np.dot(np.transpose(X), X))
    non_centrality = 1 / np.diag(XtXi)

    # pre-calculate crit value for 1 df, most common case
    crit_value = f.ppf(1 - alpha, 1, residual_df)

    power = []
    for t in range(0, X.shape[1]):
        nc = adjust_non_centrality(non_centrality[t], X[:,t])
        nc *= effect_size * effect_size / 4.0
        p = (1 - ncf.cdf(crit_value, 1, residual_df, nc))
        power.append(p)

    return power

def adjust_non_centrality(nc, x_col):
    """Adjusts the non-centrality parameter for terms that aren't -1 to 1."""
    if always_positive(x_col):
        return nc * 4 # term goes from 0 to 1
    return nc

def always_positive(x_col):
    """Checks that a column is always positive in the design space."""
    return np.all(x_col >= 0)