diff --git a/delta_measure.py b/delta_measure.py index 4d2025d17eab82638fca47974432cea877a1eb80..84bb84a2676df12806801bb9bf5ae3652514b710 100644 --- a/delta_measure.py +++ b/delta_measure.py @@ -5,6 +5,7 @@ import pandas as pd import statistics import re import dataframe_image as dfi +import scipy.stats data_overview = pd.DataFrame(pd.read_csv("data_overview/data_overview.csv", index_col=0)) @@ -27,6 +28,9 @@ mean_std_dev_list = [[columnName, columnData.mean(), columnData.std()] for colum # Create a new DataFrame with the same column names and index labels as data_overview z_scores_all_data = pd.DataFrame(columns=data_overview.columns, index=data_overview.index) +p_values_all_data = pd.DataFrame(columns=data_overview.columns, index=data_overview.index) + + # Iterate over each cell in the data_overview DataFrame and write the corresponding z-score in the z_scores_all_data DataFrame for index, row in data_overview.iterrows(): for column in data_overview.columns: @@ -34,8 +38,12 @@ for index, row in data_overview.iterrows(): cell_value = data_overview.loc[index, column] z_score = (cell_value - mean) / std_dev z_scores_all_data.loc[index, column] = z_score + p_value = scipy.stats.norm.sf(abs(z_score)) + p_values_all_data[index, column] = p_value + dfi.export(z_scores_all_data, "data_overview/z_scores_all_data.png", table_conversion = "matplotlib") +dfi.export(p_values_all_data, "data_overview/p_values_all_data.png", table_conversion = "matplotlib") print(z_scores_all_data)