Commit f866a3ed authored by Bruno López Trigo's avatar Bruno López Trigo

Mellorado o rendemento do demostrador para datasets de gran tamaño

parent f9f5cfd9
......@@ -66,17 +66,25 @@
<scope>system</scope>
<systemPath>${project.basedir}/src/main/webapp/WEB-INF/lib/cycles.jar</systemPath>
</dependency>
<!-- Dependencia local WEKA -->
<dependency>
<groupId>expliclas</groupId>
<artifactId>weka</artifactId>
<version>3.8.3</version>
<scope>system</scope>
<systemPath>${project.basedir}/src/main/webapp/WEB-INF/lib/weka.jar</systemPath>
</dependency>
<dependency>
<groupId>com.github.citiususc</groupId>
<artifactId>SimpleNLG-GL</artifactId>
<version>-SNAPSHOT</version>
</dependency>
<!-- Weka -->
<!-- Weka
<dependency>
<groupId>nz.ac.waikato.cms.weka</groupId>
<artifactId>weka-stable</artifactId>
<version>3.8.0</version>
</dependency>
</dependency> -->
<!-- JJWT -->
<dependency>
<groupId>io.jsonwebtoken</groupId>
......
......@@ -8,5 +8,6 @@ import brunolopez.expliclas.models.DatasetConfig;
public interface BuilderManager {
public DatasetConfig buildModels(String token, String datasetName, String model, String[] options) throws ConflictEx, FormatEx, NotFoundEx;
public void buildTestPredictions(String token, String datasetName, String model) throws ConflictEx, FormatEx, NotFoundEx;
}
......@@ -12,12 +12,18 @@ import brunolopez.expliclas.models.DatasetConfig;
import brunolopez.expliclas.models.Interval;
import brunolopez.expliclas.models.NumericProperty;
import brunolopez.expliclas.utils.FileManager;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintStream;
import java.util.Random;
import java.util.logging.Level;
import java.util.logging.Logger;
import weka.classifiers.Evaluation;
import weka.classifiers.evaluation.Prediction;
import weka.classifiers.trees.J48;
import weka.classifiers.trees.REPTree;
import weka.classifiers.trees.RandomTree;
......@@ -43,7 +49,8 @@ public class BuilderManagerImpl implements BuilderManager {
File dataset = this.fmanager.getDataset(token, datasetName);
String logLocation = this.fmanager.getLogLocation(token, datasetName, model);
String predictionsLocation = this.fmanager.getPredictionsLocation(token, datasetName, model);
if(new File(logLocation).exists()){
throw new ConflictEx("Classifier already exists");
}
......@@ -58,7 +65,8 @@ public class BuilderManagerImpl implements BuilderManager {
REPTree reptree = null;
PrintStream log;
PrintStream predictions;
if (model.equals("J48")) {
c45model = new J48();
c45model.setOptions(options);
......@@ -76,14 +84,22 @@ public class BuilderManagerImpl implements BuilderManager {
new File(this.fmanager.getLogDirectory(token, datasetName, model)).mkdir();
log = new PrintStream(new FileOutputStream(logLocation, false));
predictions = new PrintStream(new FileOutputStream(predictionsLocation, false));
log.println("=== Run information ===\n");
String cname = "";
if (model.equals("J48")) {
cname = c45model.getClass().getName();
} else if (model.equals("RandomTree")) {
cname = rtree.getClass().getName();
} else if (model.equals("REPTree")) {
cname = reptree.getClass().getName();
switch (model) {
case "J48":
cname = c45model.getClass().getName();
break;
case "RandomTree":
cname = rtree.getClass().getName();
break;
case "REPTree":
cname = reptree.getClass().getName();
break;
default:
break;
}
if (cname.startsWith("weka.classifiers.")) {
......@@ -146,30 +162,39 @@ public class BuilderManagerImpl implements BuilderManager {
log.println("=== Classifier model (full training set) ===");
log.println();
if (model.equals("J48")) {
log.println(c45model.toString());
} else if (model.equals("RandomTree")) {
log.println(rtree.toString());
} else if (model.equals("REPTree")) {
log.println(reptree.toString());
switch (model) {
case "J48":
log.println(c45model.toString());
break;
case "RandomTree":
log.println(rtree.toString());
break;
case "REPTree":
log.println(reptree.toString());
break;
default:
break;
}
Evaluation evalLRN = new Evaluation(instancesLRN, null);
if (model.equals("J48")) {
evalLRN.crossValidateModel(c45model, instancesLRN, 10, new Random(1));
} else if (model.equals("RandomTree")) {
evalLRN.crossValidateModel(rtree, instancesLRN, 10, new Random(1));
} else if (model.equals("REPTree")) {
evalLRN.crossValidateModel(reptree, instancesLRN, 10, new Random(1));
switch (model) {
case "J48":
evalLRN.crossValidateModel(c45model, instancesLRN, 10, new Random(1));
break;
case "RandomTree":
evalLRN.crossValidateModel(rtree, instancesLRN, 10, new Random(1));
break;
case "REPTree":
evalLRN.crossValidateModel(reptree, instancesLRN, 10, new Random(1));
break;
default:
break;
}
log.println(evalLRN.toSummaryString(true));
log.println(evalLRN.toMatrixString());
log.flush();
log.close();
readerLRN.close();
File configLocation;
try{
......@@ -180,12 +205,122 @@ public class BuilderManagerImpl implements BuilderManager {
this.mapper.writeJSON(config, new File(this.fmanager.getConfigLocation(token, datasetName, "es")));
this.mapper.writeJSON(config, new File(this.fmanager.getConfigLocation(token, datasetName, "gl")));
}
evalLRN = new Evaluation(instancesLRN);
switch (model) {
case "J48":
evalLRN.evaluateModel(c45model, instancesLRN, new String[1]);
break;
case "RandomTree":
evalLRN.evaluateModel(rtree, instancesLRN, new String[1]);
break;
case "REPTree":
evalLRN.evaluateModel(reptree, instancesLRN, new String[1]);
break;
default:
break;
}
for(Prediction p: evalLRN.predictions()){
predictions.println(p.actual() + " " + p.predicted());
}
log.flush();
log.close();
readerLRN.close();
predictions.flush();
predictions.close();
} catch (Exception ex) {
throw new FormatEx("Error building log, check format");
}
return config;
}
@Override
public void buildTestPredictions(String token, String datasetName, String model) throws ConflictEx, FormatEx, NotFoundEx {
FileReader readerLRN;
File dataset = this.fmanager.getTest(token, datasetName);
String[] options = new String[1];
try {
BufferedReader reader = new BufferedReader(new FileReader(this.fmanager.getLogLocation(token, datasetName, model)));
String line = reader.readLine();
String splittedLine;
while (line != null){
if(line.startsWith("Scheme")){
splittedLine = line.split("trees.REPTree |trees.J48 | trees.RandomTree ")[1];
options = splittedLine.split(" ");
break;
}
line = reader.readLine();
}
reader.close();
readerLRN = new FileReader(dataset.getAbsolutePath());
Instances instancesLRN = new Instances(readerLRN);
instancesLRN.setClassIndex(instancesLRN.numAttributes() - 1);
J48 c45model = null;
RandomTree rtree = null;
REPTree reptree = null;
PrintStream predictionsTest;
predictionsTest = new PrintStream(new FileOutputStream(this.fmanager.getPredictionsTestLocation(token, datasetName, model), false));
switch (model) {
case "J48":
c45model = new J48();
c45model.setOptions(options);
c45model.buildClassifier(instancesLRN);
break;
case "RandomTree":
rtree = new RandomTree();
rtree.setOptions(options);
rtree.buildClassifier(instancesLRN);
break;
case "REPTree":
reptree = new REPTree();
reptree.setOptions(options);
reptree.buildClassifier(instancesLRN);
break;
default:
break;
}
Evaluation evalLRN = new Evaluation(instancesLRN);
switch (model) {
case "J48":
evalLRN.evaluateModel(c45model, instancesLRN, new String[1]);
break;
case "RandomTree":
evalLRN.evaluateModel(rtree, instancesLRN, new String[1]);
break;
case "REPTree":
evalLRN.evaluateModel(reptree, instancesLRN, new String[1]);
break;
default:
break;
}
for(Prediction p: evalLRN.predictions()){
predictionsTest.println(p.actual() + " " + p.predicted());
}
} catch (FileNotFoundException ex) {
throw new NotFoundEx(datasetName + " log not found");
} catch (IOException ex) {
throw new FormatEx(datasetName + " log has wrong format");
} catch (Exception ex) {
throw new FormatEx("Error predicting test instances");
}
}
}
......@@ -32,7 +32,7 @@ public interface ClassifierManager {
public NumericAttribute updateAttributeConfig(String token, String dataset, String attribute, NumericProperty property, String lang) throws NotFoundEx, IOException, FormatEx;
public VisualNode buildTree(String token, String dataset, String algorithm) throws NotFoundEx, FormatEx, ConflictEx, IOException;
public VisualNode getTree(String token, String dataset, String algorithm, String lang) throws NotFoundEx, IOException;
public Classification classify(String token, String dataset, String algorithm, String lang, Instance instance) throws NotFoundEx, FormatEx, IOException;
public Classification classify(String token, String dataset, String algorithm, String lang, Instance instance, boolean visual) throws NotFoundEx, FormatEx, IOException;
public ArrayList<Classification> classify(String token, String dataset, String algorithm, String lang, Instance instance, double percentage) throws NotFoundEx, FormatEx, IOException;
public Matrix getMatrix(String token, String dataset, String algorithm, String type) throws NotFoundEx, IOException, FormatEx;
}
......@@ -109,9 +109,9 @@ public class ClassifierManagerImpl implements ClassifierManager {
new VisualNode(), root, config, lang);
}
@Override
public Classification classify(String token, String dataset, String algorithm, String lang, Instance instance) throws NotFoundEx, FormatEx, IOException {
public Classification classify(String token, String dataset, String algorithm, String lang, Instance instance, boolean visual) throws NotFoundEx, FormatEx, IOException {
File configFile;
......@@ -138,7 +138,7 @@ public class ClassifierManagerImpl implements ClassifierManager {
this.treeInterpreter = new TreeInterpreter(root);
return this.treeInterpreter.classify(instance, config, lang);
return this.treeInterpreter.classify(instance, config, lang, visual);
}
......
package brunolopez.expliclas.classifiers;
import brunolopez.expliclas.builder.BuilderManager;
import brunolopez.expliclas.builder.BuilderManagerImpl;
import brunolopez.expliclas.datasets.DatasetManager;
import brunolopez.expliclas.datasets.DatasetManagerImpl;
import brunolopez.expliclas.exceptions.FormatEx;
......@@ -16,43 +17,43 @@ import brunolopez.expliclas.models.NumericAttribute;
import brunolopez.expliclas.models.Position;
import brunolopez.expliclas.utils.FileManager;
import brunolopez.expliclas.utils.MapperJSON;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Scanner;
public class MatrixBuilder {
private final Scanner sc;
private BufferedReader reader;
private final FileManager fmanager;
private final DatasetManager datasetManager;
private final ClassifierManager classifierManager;
private final BuilderManager builder;
public MatrixBuilder(File input) throws FileNotFoundException {
this.sc = new Scanner(input);
this.reader = new BufferedReader(new FileReader(input));
this.fmanager = new FileManager();
this.datasetManager = new DatasetManagerImpl();
this.classifierManager = new ClassifierManagerImpl();
this.builder = new BuilderManagerImpl();
}
public Matrix readMatrix() {
public Matrix readMatrix() throws IOException {
String line = "";
ArrayList<Integer> row = new ArrayList<>();
Matrix m = new Matrix();
while (!line.startsWith("=== Confusion")) {
line = this.sc.nextLine();
line = this.reader.readLine();
}
for (int i = 0; i < 2; i++) {
this.sc.nextLine();
this.reader.readLine();
}
line = this.sc.nextLine().split(" \\| ")[0].trim();
line = this.reader.readLine().split(" \\| ")[0].trim();
while (line != null && !line.isEmpty()) {
for (String s : line.split("\\s+")) {
......@@ -62,70 +63,61 @@ public class MatrixBuilder {
m.addRow(row);
row.clear();
if (this.sc.hasNextLine()) {
line = this.sc.nextLine().split(" \\| ")[0].trim();
} else {
line = null;
line = this.reader.readLine();
if (line != null) {
line = line.split(" \\| ")[0].trim();
}
}
this.sc.close();
this.reader.close();
m.setPrecision();
return m;
}
public Matrix buildMatrixInstances(String token, String name, String algorithm, String type) throws NotFoundEx, IOException, FormatEx{
public Matrix buildMatrixInstances(String token, String name, String algorithm, String type) throws NotFoundEx, IOException, FormatEx {
File predictions;
File configFile;
MapperJSON mapper = new MapperJSON();
try{
configFile = this.fmanager.getConfig(name, "en");
} catch (NotFoundEx ex){
Position p;
Matrix m;
try {
configFile = this.fmanager.getConfig(token, name, "en");
}
DatasetConfig config = mapper.readConfigJSON(configFile);
Dataset dataset;
if(type.equals("train"))
dataset = this.datasetManager.getDataset(token, name);
else
dataset = this.datasetManager.getTestDataset(token, name);
Instance instance;
Classification classification;
Matrix m = new Matrix(config.getConsequents().size());
int line = 0;
for(Line l: dataset.getLines()){
line++;
instance = new Instance();
for(int i=1; i<=l.getAttributes().size(); i++){
if(l.getAttributes().get(i) instanceof NumericAttribute)
instance.putValue(l.getAttributes().get(i).getId(), ((NumericAttribute) l.getAttributes().get(i)).getValue());
else
instance.putValue(l.getAttributes().get(i).getId(), ((CategoricAttribute) l.getAttributes().get(i)).getValue());
instance.setSolution(l.getSolution());
DatasetConfig config = mapper.readConfigJSON(configFile);
if (type.equals("train")) {
predictions = this.fmanager.getPredictions(token, name, algorithm);
} else {
this.builder.buildTestPredictions(token, name, algorithm);
predictions = this.fmanager.getPredictionsTest(token, name, algorithm);
}
classification = this.classifierManager.classify(token, name, algorithm, "en", instance);
if(classification.getState().equals("incorrect")){
Position p = new Position(config.getConsequentById(instance.getSolution()).getMatrixPosition() - 1, classification.getConsequent().getMatrixPosition() -1);
m.addConfused(p, line);
m = new Matrix(config.getConsequents().size());
this.reader = new BufferedReader(new FileReader(predictions));
String line = reader.readLine();
int instance = 1;
while (line != null) {
p = new Position((int) Float.parseFloat(line.split(" ")[0]), (int) Float.parseFloat(line.split(" ")[1]));
if (p.getRow() != p.getColumn()) {
m.addConfused(p, instance);
}
m.increment(p.getRow(), p.getColumn());
} else {
m.increment(classification.getConsequent().getMatrixPosition() - 1, classification.getConsequent().getMatrixPosition() - 1);
line = reader.readLine();
instance++;
}
}
} catch (Exception ex) {
throw new FormatEx("Something went wrong building " + type + " matrix");
}
m.setPrecision();
return m;
}
}
......@@ -56,9 +56,9 @@ public class TreeBuilder {
Pattern conseqNode = Pattern.compile("^([\\w-_]*)\\s(<=|>)\\s([+-]?[0-9]*.?[0-9]+)\\s?:\\s([+-]?[0-9]*.?[0-9]+|.*)\\s\\((.*)\\).*$");
Pattern conseqNodeAux = Pattern.compile("^([\\w-_]*)\\s(<|>=)\\s([+-]?[0-9]*.?[0-9]+)\\s?:\\s([+-]?[0-9]*.?[0-9]+|.*)\\s\\((.*)\\).*$");
// Grupo 1: Atributo --- Grupo 2: Condicion --- Grupo 3: Valor
Pattern categoricNode = Pattern.compile("^([\\w-_]*)\\s(=|!=)\\s([\\w-_]*)$");
Pattern categoricNode = Pattern.compile("^([\\w-_]*)\\s(=|!=)\\s([\\w-_.]*)$");
// Grupo 1: Atributo --- Grupo 2: Condicion --- Grupo 3: Valor -- Grupo 4: Clasificacion -- Grupo 5: Instancias
Pattern categoricNodeAux = Pattern.compile("^([\\w-_]*)\\s(=|!=)\\s([\\w-_]*)\\s?:\\s([+-]?[0-9]*.?[0-9]+|.*)\\s\\((.*)\\).*$");
Pattern categoricNodeAux = Pattern.compile("^([\\w-_]*)\\s(=|!=)\\s([\\w-_.]*)\\s?:\\s([+-]?[0-9]*.?[0-9]+|.*)\\s\\((.*)\\).*$");
// Grupo 1: Clasificacion --- Grupo 2: Instancias
Pattern oneNode = Pattern.compile("^\\s?:\\s([+-]?[0-9]*.?[0-9]+|.*)\\s\\((.*)\\).*$");
......
......@@ -40,7 +40,7 @@ public class TreeInterpreter {
this.tree = tree;
}
public Classification classify(Instance instance, DatasetConfig config, String lang) {
public Classification classify(Instance instance, DatasetConfig config, String lang, boolean visual) {
ArrayList<Node> path = new ArrayList();
path.add(tree);
Attribute att;
......@@ -147,10 +147,12 @@ public class TreeInterpreter {
classification.setInstance(instance);
}
VisualNode treePath = this.treebuilder.buildVisualNode(new VisualNode(), this.tree, config, path, lang);
classification.setTree(treePath);
if (visual) {
VisualNode treePath = this.treebuilder.buildVisualNode(new VisualNode(), this.tree, config, path, lang);
classification.setTree(treePath);
}
return classification;
}
......
......@@ -6,19 +6,16 @@ import brunolopez.expliclas.models.Position;
import de.normalisiert.utils.graphs.ElementaryCyclesSearch;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
public class ConfusionAnalyzer {
private Scanner sc;
private Matrix matrix;
public ConfusionAnalyzer(File input) throws FileNotFoundException {
public ConfusionAnalyzer(File input) throws FileNotFoundException, IOException {
this.matrix = (new MatrixBuilder(input)).readMatrix();
this.sc = new Scanner(input);
}
public ConfusionAnalyzer(Matrix matrix) throws FileNotFoundException {
......
......@@ -449,10 +449,10 @@ public class ExplainerManagerImpl implements ExplainerManager {
this.generator = new ClauseGeneratorEn();
if (confusion != 0) {
phrase = this.generator.generateClause(
consequents.get(0).getName(), "be", false, "confused with " + consequents.get(1).getName() + " by " + df.format(confusion) + "%");
this.extractor.getConsequentName(consequents.get(0)), "be", false, "confused with " + this.extractor.getConsequentName(consequents.get(1)) + " by " + df.format(confusion) + "%");
} else {
phrase = this.generator.generateClause(
consequents.get(0).getName(), "be", false, "never confused with " + consequents.get(1).getName());
this.extractor.getConsequentName(consequents.get(0)), "be", false, "never confused with " + this.extractor.getConsequentName(consequents.get(1)));
}
explanation.addClause(((ClauseGeneratorEn) this.generator).getRealisation(phrase));
break;
......@@ -460,10 +460,10 @@ public class ExplainerManagerImpl implements ExplainerManager {
this.generator = new ClauseGeneratorEs();
if (confusion != 0) {
phrase = this.generator.generateClause(
consequents.get(0).getName(), "ser", false, "confundido con " + consequents