Source code for silico.analysis

from math import log10, floor

import numpy as np
from scipy.stats import ttest_rel
import pandas as pd


[docs] def paired_t_test(df, col_left, col_right, common_col="seed"): """ Perform and summarize unilateral t-test to decide a difference between columns is significant. Considered calling .round(5) or alike on output for clearer reading. Args: df (pd.DataFrame): The results of the experiment. col_left (str): Name of the "left" column to compare col_right (str): Name of the "left" column to compare common_col (str): Identifier of the column indexing the repetitions of the experiments. Returns: pd.Dataframe: Dataframe with the mean values of the left and right column, as well as the p-values of unilateral tests. p-value-less corresponds to the test with alternative hypothesis col_left < col_right. """ # TODO: Single level not considered group_cols = [level.name for level in df.index.levels] if common_col not in group_cols: raise ValueError("Common column %s not found." % common_col) for c in [col_left, col_right]: if c not in df.columns: raise ValueError("Column %s not found" % c) group_cols.remove(common_col) df_eval = df.groupby(group_cols).agg(list) df_out = pd.concat( ( df.groupby(group_cols)[[col_left, col_right]].agg("mean"), pd.Series( df_eval.apply( lambda row: ttest_rel( row[col_left], row[col_right], alternative="less" ).pvalue, axis=1, ), name="p-value-less", ), pd.Series( df_eval.apply( lambda row: ttest_rel( row[col_left], row[col_right], alternative="greater" ).pvalue, axis=1, ), name="p-value-greater", ), ), axis=1, ) return df_out
[docs] def format_mag_err(mag, err, sep=" ± ", increase=0, increase_ones=True): """ Format a magnitude and its error as a string Args: mag (float): Value of the magnitude err (float): Value of the associated error sep (str): Characters to use to join the numbers. Include spaces if needed. increase (int): A number to increase (or decrease if negative) the number of significant digits. increase_ones (bool): Whether the number of significant digits increases by one when the leading digit is one. Returns: str: The representation of the magnitude with its error. """ if np.isnan(err): return "%s%s%s" % (mag, sep, err) if err == 0: # Zero error return "%s%s%s" % (mag, sep, 0) order = floor(log10(err)) if increase_ones and floor(err / 10 ** order) == 1.0: # If flag on and leading digit is 1 order -= 1 order -= increase if order < 0: mag = ("%%.%df" % -order) % mag err = ("%%.%df" % -order) % err else: mag = "%d" % round(mag, -order) err = "%d" % round(err, -order) return "%s%s%s" % (mag, sep, err)
def _format_err(row): out = {} for var in row.index.levels[0]: out[var] = format_mag_err(row[var]["mean"], row[var]["sem"]) return out
[docs] def df_agg_mean(df, group_cols, raw=False): """ Aggregate a dataframe to summarize it with the mean and its error Args: df (pd.DataFrame): The dataframe. group_cols (list of str): Columns used as index for the aggregation. raw (bool): If False, the result is a table of strings representing the number with its error. If True, the columns will an additional level providing both the mean and its error (sem). Returns: pd.DataFrame: The summarizing dataframe """ df_agg = df.groupby(group_cols).agg(['mean', 'sem']) if raw: return df_agg return df_agg.apply(_format_err, axis=1, result_type="expand")