Commit b79294eb authored by ismael.rodriguez's avatar ismael.rodriguez

Fixing minor issues.

parent 675a3194
This diff is collapsed.
......@@ -5,101 +5,135 @@ Created on Tue Api 22 16:05:31 2014
@author: Adrián
"""
import csv, re, hashlib
from collections import *
import networkx as nx
def singleton(myClass):
"""Patrón Singleton para tener únicamente una instancia de la clase
LimitedSizeDict.
"""
instances = {}
def getInstance(*args,**kwds):
if myClass not in instances:
instances[myClass] = myClass(*args,**kwds)
return instances[myClass]
return getInstance
color = "#adadad"
fillcolor = "#ebebeb"
@singleton
class LimitedSizeDict(OrderedDict):
"""Un diccionario con tamaño máximo. Cuando llega al límite, elimina al
elemento más antiguo del diccionario (FIFO).
"""
def __init__(self, *args, **kwds):
self.size_limit = kwds.pop("size_limit", None)
OrderedDict.__init__(self, *args, **kwds)
self._check_size_limit()
def __setitem__(self, key, value):
OrderedDict.__setitem__(self, key, value)
self._check_size_limit()
def _check_size_limit(self):
if self.size_limit is not None:
while len(self) > self.size_limit:
self.popitem(last=False)
#Función para leer los datos de un fichero.
def leer_datos(archivo):
patron_numeros = re.compile('^\d+(\.\d+)?([eE][+-]?\d+)?$')
palabra = ""
nombres_conj_datos = []
nombres_algoritmos = []
matriz_datos = []
G = nx.DiGraph()
lector = csv.reader(archivo)
# Nodes
G.add_node("parametric_conditions", label="Normality AND\nHomocedasticity?", shape="diamond")
G.add_node("parametric", label="Parametric test")
G.add_node("groups_parametric", label="Number of\ngroups (k)?", shape="diamond")
G.add_node("paired_ttest", label="Paired\nsamples?", shape="diamond")
G.add_node("ttest_rel", label="t-test\npaired samples", shape="box", style="rounded")
G.add_node("ttest_ind", label="t-test\nunpaired samples", shape="box", style="rounded")
G.add_node("paired_anova", label="Paired\nsamples?", shape="diamond")
G.add_node("anova", label="ANOVA\nbetween cases", shape="box", style="rounded")
G.add_node("anova_within", label="ANOVA\nwithin cases", shape="box", style="rounded")
G.add_node("nonparametric", label="Non Parametric test")
G.add_node("groups_nonparametric", label="Number of\ngroups (k)?", shape="diamond")
G.add_node("paired_wilcoxon", label="Paired\nsamples?", shape="diamond")
G.add_node("wilcoxon_test", label="Wilcoxon", shape="box", style="rounded")
G.add_node("mannwhitneyu_test", label="Mann Whitney U", shape="box", style="rounded")
G.add_node("sample_ranking", label="Sample\nsize (n)?", shape="diamond")
G.add_node("friedman_test", label="Friedman", shape="box", style="rounded")
G.add_node("aligned_ranks_test", label="Aligned Ranks", shape="box", style="rounded")
for node in G.nodes(): G.node[node].update({'color': color, 'style': G.node[node].get('style', "")+",filled", 'fillcolor': 'white'})
numero_linea = 0
# Edges
G.add_edge("parametric_conditions", "parametric", label="yes")
G.add_edge("parametric_conditions", "nonparametric", label="no")
G.add_edge("parametric", "groups_parametric")
G.add_edge("groups_parametric", "paired_ttest", label="k = 2")
G.add_edge("paired_ttest", "ttest_rel", label="paired")
G.add_edge("paired_ttest", "ttest_ind", label="unpaired")
G.add_edge("groups_parametric", "paired_anova", label="k > 2")
G.add_edge("paired_anova", "anova_within", label="paired")
G.add_edge("paired_anova", "anova", label="unpaired")
G.add_edge("nonparametric", "groups_nonparametric")
G.add_edge("groups_nonparametric", "paired_wilcoxon", label="k = 2")
G.add_edge("paired_wilcoxon", "wilcoxon_test", label="paired")
G.add_edge("paired_wilcoxon", "mannwhitneyu_test", label="unpaired")
G.add_edge("groups_nonparametric", "sample_ranking", label="k > 2")
G.add_edge("sample_ranking", "friedman_test", label="k > 4\n and n >= 2k")
G.add_edge("sample_ranking", "aligned_ranks_test", label="k < 5\nor n < 2k")
for edge in G.edges(): G.edge[edge[0]][edge[1]].update({'label': " " + G.edge[edge[0]][edge[1]].get('label', "") + " ", 'style': G.edge[edge[0]][edge[1]].get('style', "")+",filled", 'fillcolor': 'white'})
for fila in lector:
if len(fila)<3:
raise Exception("Data format error.")
if numero_linea == 0:
for i in range(len(fila)):
if i == 0:
palabra = fila[i]
else:
if nombres_algoritmos.count(fila[i]) == 0:
nombres_algoritmos.append(fila[i])
else:
raise Exception("Algorithm name repeated.")
def evaluate_test(data):
selection = []
# Tree logic
if data['normality'] and data['homocedasticity']:
selection.extend([
G.node["parametric_conditions"],
G.edge["parametric_conditions"]["parametric"],
G.node["parametric"],
G.edge["parametric"]["groups_parametric"],
G.node["groups_parametric"]
])
if data['k'] > 2:
selection.extend([
G.edge["groups_parametric"]["anova"],
G.node["anova"]
])
test = 'anova'
else:
numero_algoritmos = len(nombres_algoritmos)
if len(fila) != numero_algoritmos + 1:
raise Exception("Data format error")
lista_datos = []
for i in range(len(fila)):
if i == 0:
if nombres_conj_datos.count(fila[i]) == 0:
nombres_conj_datos.append(fila[i])
else:
raise Exception("Algorithm name repeated.")
else:
m = patron_numeros.match(fila[i])
if m:
dato = float(fila[i])
lista_datos.append(dato)
else:
raise Exception("Number \"" + fila[i] + "\" not valid in line " + str(numero_linea+1) +".")
matriz_datos.append(lista_datos)
numero_linea = numero_linea + 1
return {"palabra" : palabra, "nombres_conj_datos" : nombres_conj_datos, "nombres_algoritmos" : nombres_algoritmos,
"matriz_datos" : matriz_datos}
selection.extend([
G.edge["groups_parametric"]["paired_ttest"],
G.node["paired_ttest"]
])
if data['paired']:
selection.extend([
G.edge["paired_ttest"]["ttest_rel"],
G.node["ttest_rel"]
])
test = 'ttest'
else:
selection.extend([
G.edge["paired_ttest"]["ttest_ind"],
G.node["ttest_ind"]
])
test = 'ttest_ind'
else:
selection.extend([
G.node["parametric_conditions"],
G.edge["parametric_conditions"]["nonparametric"],
G.node["nonparametric"],
G.edge["nonparametric"]["groups_nonparametric"],
G.node["groups_nonparametric"]
])
if data['k'] > 2:
selection.extend([
G.edge["groups_nonparametric"]["sample_ranking"],
G.node["sample_ranking"]
])
if data['k'] < 5 or data['n'] < 2*data['k']:
selection.extend([
G.edge["sample_ranking"]["aligned_ranks_test"],
G.node["aligned_ranks_test"]
])
test = 'aligned_ranks'
else:
selection.extend([
G.edge["sample_ranking"]["friedman_test"],
G.node["friedman_test"]
])
test = 'friedman'
else:
selection.extend([
G.edge["groups_nonparametric"]["paired_wilcoxon"],
G.node["paired_wilcoxon"]
])
if data['paired']:
selection.extend([
G.edge["paired_wilcoxon"]["wilcoxon_test"],
G.node["wilcoxon_test"]
])
test = 'wilcoxon'
else:
selection.extend([
G.edge["paired_wilcoxon"]["mannwhitneyu_test"],
G.node["mannwhitneyu_test"]
])
test = 'mannwhitneyu'
for v in selection: v.update({"fillcolor": fillcolor})
return {'test': test, 'graph': str(nx.to_agraph(G))}
#Función para generar el resumen hash MD5 de los ficheros.
def generar_md5(archivo):
tam_bloque = 65536
md5 = hashlib.md5()
bufer = archivo.read(tam_bloque)
while len(bufer) > 0:
md5.update(bufer)
bufer = archivo.read(tam_bloque)
archivo.seek(0, 0);
return md5.hexdigest()
def clean_missing_values(values, delete_row=True):
n = len(values.values()[0])
......
......@@ -205,7 +205,7 @@ def nemenyi_multitest(ranks):
p_values = [2*(1-st.norm.cdf(abs(z))) for z in z_values]
# Sort values by p_value so that p_0 < p_1
p_values, z_values, comparisons = map(list, zip(*sorted(zip(p_values, z_values, comparisons), key=lambda t: t[0])))
m = k*(k-1)/2.
m = int(k*(k-1)/2.)
adj_p_values = [min(m*p_value,1) for p_value in p_values]
return comparisons, z_values, p_values, adj_p_values
......@@ -222,7 +222,7 @@ def holm_multitest(ranks):
p_values = [2*(1-st.norm.cdf(abs(z))) for z in z_values]
# Sort values by p_value so that p_0 < p_1
p_values, z_values, comparisons = map(list, zip(*sorted(zip(p_values, z_values, comparisons), key=lambda t: t[0])))
m = k*(k-1)/2.
m = int(k*(k-1)/2.)
adj_p_values = [min(max((m-j)*p_values[j] for j in range(i+1)), 1) for i in range(m)]
return comparisons, z_values, p_values, adj_p_values
......@@ -239,8 +239,8 @@ def hochberg_multitest(ranks):
p_values = [2*(1-st.norm.cdf(abs(z))) for z in z_values]
# Sort values by p_value so that p_0 < p_1
p_values, z_values, comparisons = map(list, zip(*sorted(zip(p_values, z_values, comparisons), key=lambda t: t[0])))
m = k*(k-1)/2.
adj_p_values = [min(max((m+1-j)*p_values[j-1] for j in range(m-1, i, -1)), 1) for i in range(m)]
m = int(k*(k-1)/2.)
adj_p_values = [max((m+1-j)*p_values[j-1] for j in range(m, i, -1))for i in range(m)]
return comparisons, z_values, p_values, adj_p_values
......@@ -256,7 +256,7 @@ def finner_multitest(ranks):
p_values = [2*(1-st.norm.cdf(abs(z))) for z in z_values]
# Sort values by p_value so that p_0 < p_1
p_values, z_values, comparisons = map(list, zip(*sorted(zip(p_values, z_values, comparisons), key=lambda t: t[0])))
m = k*(k-1)/2.
m = int(k*(k-1)/2.)
adj_p_values = [min(max(1-(1-p_values[j])**(m/float(j+1)) for j in range(i+1)), 1) for i in range(m)]
return comparisons, z_values, p_values, adj_p_values
......
......@@ -6,22 +6,61 @@ import scipy.stats as st
def anova_test(*args):
k = len(args)
if k < 2: raise ValueError('Less than 2 levels')
if k < 2: raise ValueError('Less than 2 groups')
n = len(args[0])
if len(set([len(v) for v in args])) != 1: raise ValueError('Unequal number of samples')
means = [sp.mean(sample) for sample in args]
mean_global = sp.mean(means)
ss_error = sp.sum([(args[i][j] - means[i])**2 for j in range(n) for i in range(k)])
ss_population = sp.sum([n*(means[i] - mean_global)**2 for i in range(k)])
sd = sp.sqrt(sp.sum([(means[i] - mean_global)**2 for i in range(k)])/float(n-k))
pivots = [mean/(sd*sp.sqrt(2/float(n))) for mean in means]
# Precalcs
x_j = [sp.sum(group) for group in args]
x_t = sp.sum(x_j)
F = (ss_population/float(k-1))/(ss_error/float(n*k-k))
p_value = 1 - st.f.cdf(F, k-1, n*k-k)
# Variances
ss_t = sp.sum([v**2 for v in group for group in args]) - x_t**2/float(k*n)
ss_bg = sp.sum(x_j[j]**2/float(n) for j in range(k)) - x_t**2/float(k*n)
ss_wg = ss_t - ss_bg
# Degrees of freedom
df_bg = k - 1
df_wg = n*k - k
F = (ss_bg/df_bg)/(ss_wg/df_wg)
p_value = 1 - st.f.cdf(F, df_bg, df_wg)
# Pivots
pivots = [sp.mean(group)/sp.sqrt(2*(ss_wg/df_wg)/float(n)) for group in args]
return F, p_value, pivots
def anova_within_test(*args):
k = len(args)
if k < 2: raise ValueError('Less than 2 groups')
n = len(args[0])
if len(set([len(v) for v in args])) != 1: raise ValueError('Unequal number of samples')
# Precalcs
x_j = [sp.sum(group) for group in args]
x_t = sp.sum(x_j)
s_i = [sp.sum([group[i] for group in args]) for i in range(n)]
# Variances
ss_t = sp.sum([v**2 for v in group for group in args]) - x_t**2/float(k*n)
ss_bg = sp.sum([x_j[j]**2/float(n) for j in range(k)]) - x_t**2/float(k*n)
ss_bs = sp.sum([s_i[i]**2/float(k) for i in range(n)]) - x_t**2/float(k*n)
ss_wg = ss_t - ss_bg
ss_res = ss_t - ss_bg - ss_bs
# Degrees of freedom
df_bg = k - 1
df_wg = n*k - k
df_res = (n-1)*(k-1)
F = (ss_bg/df_bg)/(ss_res/df_res)
p_value = 1 - st.f.cdf(F, df_bg, df_res)
# Pivots
pivots = [sp.mean(group)/sp.sqrt(2*(ss_wg/df_wg)/float(n)) for group in args]
return F, p_value, pivots
def bonferroni_test(pivots, n):
k = len(pivots)
......
Header set Access-Control-Allow-Origin "*"
Header set Access-Control-Allow-Methods "POST, GET, OPTIONS , PUT"
......@@ -24,7 +24,8 @@
<label class="col-lg-5 control-label">Select test<a href="#helpModal" data-toggle="modal" more-info="normality"> [?] </a>:</label>
<div class="col-lg-7">
<div class="btn-group-vertical" data-toggle="buttons">
<label name="test_cond" class="btn btn-default active"><input type="radio" name="test" id="option1" value="anova" checked> ANOVA</label>
<label name="test_cond" class="btn btn-default active"><input type="radio" name="test" id="option1" value="anova" checked> ANOVA between cases</label>
<label name="test_cond" class="btn btn-default"><input type="radio" name="test" id="option1" value="anova-within"> ANOVA within cases</label>
</div>
</div>
</div>
......@@ -51,7 +52,7 @@
</div>
<div class="form-group">
<div class="col-lg-offset-7 col-lg-5">
<a data-toggle="tab" href="#result"><button type="button" class="btn btn-primary" id="apply" test="anova"><label class="glyphicon glyphicon-play"></label> Apply </button></a>
<a data-toggle="tab" href="#result"><button type="button" class="btn btn-primary apply" id="apply" test="anova"><label class="glyphicon glyphicon-play"></label> Apply </button></a>
</div>
</div>
</form>
......
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<script src="js/loader.js"></script>
<script src="js/assistant.js"></script>
<script src="js/viz.js"></script>
</head>
<body>
<div id="topbar"></div>
<div class="container">
<div id="modals"></div>
<div class="row">
<div class="col-sm-12 col-md-12 main">
<h1 class="page-header"> Assistant</h1>
<p align="justify">In this section, an assistant process can be executed in order to automatically estimate the best fitted statistical test for the data provided by the user.
The decision process takes into account the following data:
<ul>
<li>The number of groups available (k)</li>
<li>The number of samples per group (n)</li>
<li>Pairing between groups</li>
<li>The normality of each group (tested using a Shapiro-Wilks test with alpha 0.1)</li>
<li>The homocedasticity between groups (tested using Levene test with alpha 0.1)</li>
</ul>
</p>
<br><div align="center">
<a href="#" id="apply" class="btn btn-primary btn-xl" test="assistant">Assist Me!</a>
</div>
<br/>
<br/>
<div id="decision_process" class="panel panel-indigo" style="display: none">
<div class="panel-heading">
Decision process
</div>
<div id="graph" class="panel-body" align="center">
</div>
</div>
</div>
</div>
</div>
</body>
</html>
......@@ -132,7 +132,7 @@ a:hover {
width: 100%;
}
#apply {
.apply {
float:right;
}
......
......@@ -45,7 +45,7 @@
</div>
<div class="form-group">
<div class="col-lg-offset-7 col-lg-5">
<a data-toggle="tab" href="#result"><button type="button" class="btn btn-primary" id="apply" test="homocedasticity"><label class="glyphicon glyphicon-play"></label> Apply </button></a>
<a data-toggle="tab" href="#result"><button type="button" class="btn btn-primary apply" id="apply" test="homocedasticity"><label class="glyphicon glyphicon-play"></label> Apply </button></a>
</div>
</div>
</form>
......
......@@ -27,13 +27,50 @@ $(document).ready(function(){
$("#warning").html("There is no file uploaded with the data needed to do the test. Please upload one before applying any test.");
$("#warning").show();
$('button').prop('disabled', true);
}
} else {
names = JSON.parse(sessionStorage.data).names
if ($("#group1").length) {
names.forEach(function(name) {
$("#group1").append("<option value=\""+name+"\">"+name+"</option>");
$("#group2").append("<option value=\""+name+"\">"+name+"</option>");
});
if ($("#group2 option").length > 1) {
$($("#group2 option")[1]).prop('selected', true);
}
}
if ($("#control").length) {
names.forEach(function(name) {
$("#control").append("<option value=\""+name+"\">"+name+"</option>");
});
}
}
post_hoc_labels = $(document).find("input[type=radio][name=post_hoc]").parent()
post_hoc_labels.on('click', function() {
post_hoc_labels.removeClass('active');
$(this).button('toggle');
});
$(document).on('#modal_export show.bs.modal', function (e) {
var format = $("#export_format").val();
if (format == "latex") {
$("#export_text").val(exportTableToLaTeX($('table')));
} else {
$("#export_text").val(exportTableToCSV($('table')));
}
});
$(document).on('change', '#export_format', function (e) {
var format = $("#export_format").val();
if (format == "latex") {
$("#export_text").val(exportTableToLaTeX($('table')));
} else {
$("#export_text").val(exportTableToCSV($('table')));
}
});
});
......
This diff is collapsed.
//Función para exportar archivos .tex
function exportTableToLaTeX($table, filename) {
/*Función para repetir un string un determinado número de veces.*/
String.prototype.repeat = function (n, d) {
return --n ? this + (d || "") + this.repeat(n, d) : "" + this;
};
var $firstrow = $table.find('tr:has(th)'),
// Temporary delimiter characters unlikely to be typed by keyboard
// This is to avoid accidentally splitting the actual contents
tmpColDelim = String.fromCharCode(11), // vertical tab character
tmpRowDelim = String.fromCharCode(0), // null character
// Actual delimiter characters for LaTeX format
colDelim = "&",
rowDelim = "\\\\\n",
/*Primera línea de la tabla LaTeX.*/
latex = "\\begin{tabular}{"+"c".repeat($firstrow.find('th').length,"|")+"}\n",
latex = latex + $firstrow.map(function (i, row) {
var $row = $(row),
$cols = $row.find('th');
return $cols.map(function (j, col) {
var $col = $(col),
text = $col.text();
return text;
}).get().join(tmpColDelim);
}).get().join(tmpRowDelim)
.split(tmpRowDelim).join(rowDelim)
.split(tmpColDelim).join(colDelim);
latex = latex + "\\\\\n\\hline\n",
/*Resto de líneas de la tabla LaTeX.*/
$rows = $table.find('tr:has(td)'),
latex = latex + $rows.map(function (i, row) {
var $row = $(row),
$cols = $row.find('td');
return $cols.map(function (j, col) {
var $col = $(col),
text = $col.text();
return text;
}).get().join(tmpColDelim);
}).get().join(tmpRowDelim)
.split(tmpRowDelim).join(rowDelim)
.split(tmpColDelim).join(colDelim);
/*Última línea de la tabla LaTeX.*/
latex = latex + "\n\\end{tabular}",
// Data URI
latexData = 'data:application/octet-stream;charset=utf-8,' + encodeURIComponent(latex);
if(navigator.appName == 'Microsoft Internet Explorer'){
var generator = window.open(filename, 'latex', 'height=400,width=600');
generator.document.write('<html><head><title>LaTeX</title>');
generator.document.write('</head><body >');
generator.document.write('<textArea cols=70 rows=15 wrap="off" >');
generator.document.write(latex);
generator.document.write('</textArea>');
generator.document.write('</body></html>');
generator.document.close();
}
else{
$(this).attr({
'download': filename,
'href': latexData,
'target': '_blank'
});
function exportTableToLaTeX($table) {
switch ($table.length) {
case 0:
return '';
break;
case 1:
/*Función para repetir un string un determinado número de veces.*/
String.prototype.repeat = function (n, d) {
return --n ? this + (d || "") + this.repeat(n, d) : "" + this;
};
var $firstrow = $table.find('tr:has(th)'),
// Temporary delimiter characters unlikely to be typed by keyboard
// This is to avoid accidentally splitting the actual contents
tmpColDelim = String.fromCharCode(11), // vertical tab character
tmpRowDelim = String.fromCharCode(0), // null character
// Actual delimiter characters for LaTeX format
colDelim = "&",
rowDelim = "\\\\\n",
/*Primera línea de la tabla LaTeX.*/
latex = "\\begin{tabular}{"+"c".repeat($firstrow.find('th').length,"|")+"}\n",
latex = latex + $firstrow.map(function (i, row) {
var $row = $(row),
$cols = $row.find('th');
return $cols.map(function (j, col) {
var $col = $(col),
text = $col.text();
return text;
}).get().join(tmpColDelim);
}).get().join(tmpRowDelim)
.split(tmpRowDelim).join(rowDelim)
.split(tmpColDelim).join(colDelim);
latex = latex + "\\\\\n\\hline\n",
/*Resto de líneas de la tabla LaTeX.*/
$rows = $table.find('tr:has(td)'),
latex = latex + $rows.map(function (i, row) {
var $row = $(row),
$cols = $row.find('td');
return $cols.map(function (j, col) {
var $col = $(col),
text = $col.text();
return text;
}).get().join(tmpColDelim);
}).get().join(tmpRowDelim)
.split(tmpRowDelim).join(rowDelim)
.split(tmpColDelim).join(colDelim);
/*Última línea de la tabla LaTeX.*/
latex = latex + "\n\\end{tabular}";
return latex;
break;
default:
return $.map($table, function(v) {
return exportTableToLaTeX($(v));
}).join('\n\n');
}
}
//Función para exportar archivos .csv
function exportTableToCSV($table, filename) {
var $rows = $table.find('tr:has(th,td)'),
// Temporary delimiter characters unlikely to be typed by keyboard
// This is to avoid accidentally splitting the actual contents
tmpColDelim = String.fromCharCode(11), // vertical tab character
tmpRowDelim = String.fromCharCode(0), // null character
// actual delimiter characters for CSV format
colDelim = '","',
rowDelim = '"\r\n"',
// Grab text from table into CSV formatted string
csv = '"' + $rows.map(function (i, row) {
var $row = $(row),
$cols = $row.find('th,td');
return $cols.map(function (j, col) {
var $col = $(col),
text = $col.text();
return text.replace('"', '""'); // escape double quotes
}).get().join(tmpColDelim);
}).get().join(tmpRowDelim)
.split(tmpRowDelim).join(rowDelim)
.split(tmpColDelim).join(colDelim) + '"',