Commit 7d65731c authored by Bruno López Trigo's avatar Bruno López Trigo

Engadida a detección de erros na clasificación de instancias coñecidas e matriz de entrenamento

parent 514f4f91
......@@ -34,5 +34,5 @@ public interface ClassifierManager {
public VisualNode getTree(String token, String dataset, String algorithm, String lang) throws NotFoundEx, IOException;
public Classification classify(String token, String dataset, String algorithm, String lang, Instance instance) throws NotFoundEx, FormatEx, IOException;
public ArrayList<Classification> classify(String token, String dataset, String algorithm, String lang, Instance instance, double percentage) throws NotFoundEx, FormatEx, IOException;
public Matrix getMatrix(String token, String dataset, String algorithm) throws NotFoundEx, IOException;
public Matrix getMatrix(String token, String dataset, String algorithm, String type) throws NotFoundEx, IOException, FormatEx;
}
......@@ -55,7 +55,7 @@ public class ClassifierManagerImpl implements ClassifierManager {
return config;
}
@Override
public File downloadConfig(String token, String dataset, String lang) throws NotFoundEx, IOException {
......@@ -174,21 +174,46 @@ public class ClassifierManagerImpl implements ClassifierManager {
}
@Override
public Matrix getMatrix(String token, String dataset, String algorithm) throws NotFoundEx, IOException {
File input;
public Matrix getMatrix(String token, String dataset, String algorithm, String type) throws NotFoundEx, IOException, FormatEx {
File input = null;
File matrix = null;
String matrixLocation = "";
try {
input = this.fmanager.getLog(dataset, algorithm);
matrix = this.fmanager.getMatrix(dataset, algorithm, type);
} catch (NotFoundEx ex) {
input = this.fmanager.getLog(token, dataset, algorithm);
try {
matrix = this.fmanager.getMatrix(token, dataset, algorithm, type);
} catch (NotFoundEx ex2) {
try {
matrixLocation = this.fmanager.getMatrixLocation(dataset, algorithm, type);
input = this.fmanager.getLog(dataset, algorithm);
} catch (NotFoundEx ex3) {
input = this.fmanager.getLog(token, dataset, algorithm);
matrixLocation = this.fmanager.getMatrixLocation(token, dataset, algorithm, type);
}
}
}
this.matrixBuilder = new MatrixBuilder(input);
return this.matrixBuilder.readMatrix();
Matrix m = null;
if(matrix != null)
return this.mapper.readMatrixJSON(matrix);
else if(type.equals("cv")){
this.matrixBuilder = new MatrixBuilder(input);
m = this.matrixBuilder.readMatrix();
this.mapper.writeJSON(m, new File(matrixLocation));
return m;
} else if(type.equals("train")){
this.matrixBuilder = new MatrixBuilder(input);
m = this.matrixBuilder.buildMatrixInstances(token, dataset, algorithm);
this.mapper.writeJSON(m, new File(matrixLocation));
return m;
} else {
return m;
}
}
@Override
public Attribute getAttributeConfig(String token, String dataset, String attribute, String lang) throws NotFoundEx, IOException {
File configFile;
......@@ -391,7 +416,7 @@ public class ClassifierManagerImpl implements ClassifierManager {
return attributeConfig;
}
@Override
public CategoricAttribute updateCategoricAttributeConfig(String token, String dataset, String attribute, CategoricAttribute attributeConfig, String lang) throws NotFoundEx, IOException, FormatEx {
File configFileEn, configFileEs, configFileGl,
......@@ -425,17 +450,18 @@ public class ClassifierManagerImpl implements ClassifierManager {
switch (lang) {
case "en":
if(attEn.getProperties().size() != attributeConfig.getProperties().size())
if (attEn.getProperties().size() != attributeConfig.getProperties().size()) {
throw new FormatEx("Properties size doesn't match");
}
boolean found = false;
for(Property p: attEn.getProperties()){
for(Property prop: attributeConfig.getProperties()){
if(p.getName().equals(prop.getName())){
for (Property p : attEn.getProperties()) {
for (Property prop : attributeConfig.getProperties()) {
if (p.getName().equals(prop.getName())) {
found = true;
((CategoricProperty) p).setValue(((CategoricProperty) prop).getValue());
}
}
if(!found){
if (!found) {
throw new FormatEx("Property " + p.getName() + " not found");
}
found = false;
......@@ -443,17 +469,18 @@ public class ClassifierManagerImpl implements ClassifierManager {
this.mapper.writeJSON(configEn, configFileEn);
break;
case "es":
if(attEs.getProperties().size() != attributeConfig.getProperties().size())
if (attEs.getProperties().size() != attributeConfig.getProperties().size()) {
throw new FormatEx("Properties size doesn't match");
}
found = false;
for(Property p: attEs.getProperties()){
for(Property prop: attributeConfig.getProperties()){
if(p.getName().equals(prop.getName())){
for (Property p : attEs.getProperties()) {
for (Property prop : attributeConfig.getProperties()) {
if (p.getName().equals(prop.getName())) {
found = true;
((CategoricProperty) p).setValue(((CategoricProperty) prop).getValue());
}
}
if(!found){
if (!found) {
throw new FormatEx("Property " + p.getName() + " not found");
}
found = false;
......@@ -461,17 +488,18 @@ public class ClassifierManagerImpl implements ClassifierManager {
this.mapper.writeJSON(configEs, configFileEs);
break;
case "gl":
if(attGl.getProperties().size() != attributeConfig.getProperties().size())
if (attGl.getProperties().size() != attributeConfig.getProperties().size()) {
throw new FormatEx("Properties size doesn't match");
}
found = false;
for(Property p: attGl.getProperties()){
for(Property prop: attributeConfig.getProperties()){
if(p.getName().equals(prop.getName())){
for (Property p : attGl.getProperties()) {
for (Property prop : attributeConfig.getProperties()) {
if (p.getName().equals(prop.getName())) {
found = true;
((CategoricProperty) p).setValue(((CategoricProperty) prop).getValue());
}
}
if(!found){
if (!found) {
throw new FormatEx("Property " + p.getName() + " not found");
}
found = false;
......@@ -603,7 +631,7 @@ public class ClassifierManagerImpl implements ClassifierManager {
} catch (NotFoundEx ex) {
}
configFileEn = this.fmanager.getConfig(token, dataset, "en");
configFileEs = this.fmanager.getConfig(token, dataset, "es");
configFileGl = this.fmanager.getConfig(token, dataset, "gl");
......
package brunolopez.expliclas.classifiers;
import brunolopez.expliclas.datasets.DatasetManager;
import brunolopez.expliclas.datasets.DatasetManagerImpl;
import brunolopez.expliclas.exceptions.FormatEx;
import brunolopez.expliclas.exceptions.NotFoundEx;
import brunolopez.expliclas.models.CategoricAttribute;
import brunolopez.expliclas.models.Classification;
import brunolopez.expliclas.models.Dataset;
import brunolopez.expliclas.models.DatasetConfig;
import brunolopez.expliclas.models.Instance;
import brunolopez.expliclas.models.Line;
import brunolopez.expliclas.models.Matrix;
import brunolopez.expliclas.models.NumericAttribute;
import brunolopez.expliclas.models.Position;
import brunolopez.expliclas.utils.FileManager;
import brunolopez.expliclas.utils.MapperJSON;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Scanner;
public class MatrixBuilder {
private final Scanner sc;
private final FileManager fmanager;
private final DatasetManager datasetManager;
private final ClassifierManager classifierManager;
public MatrixBuilder(File input) throws FileNotFoundException {
this.sc = new Scanner(input);
this.fmanager = new FileManager();
this.datasetManager = new DatasetManagerImpl();
this.classifierManager = new ClassifierManagerImpl();
}
public Matrix readMatrix() {
......@@ -54,5 +75,47 @@ public class MatrixBuilder {
}
public Matrix buildMatrixInstances(String token, String name, String algorithm) throws NotFoundEx, IOException, FormatEx{
File configFile;
MapperJSON mapper = new MapperJSON();
try{
configFile = this.fmanager.getConfig(name, "en");
} catch (NotFoundEx ex){
configFile = this.fmanager.getConfig(token, name, "en");
}
DatasetConfig config = mapper.readConfigJSON(configFile);
Dataset dataset = this.datasetManager.getDataset(token, name);
Instance instance;
Classification classification;
Matrix m = new Matrix(config.getConsequents().size());
int line = 0;
for(Line l: dataset.getLines()){
line++;
instance = new Instance();
for(int i=1; i<=l.getAttributes().size(); i++){
if(l.getAttributes().get(i) instanceof NumericAttribute)
instance.putValue(l.getAttributes().get(i).getId(), ((NumericAttribute) l.getAttributes().get(i)).getValue());
else
instance.putValue(l.getAttributes().get(i).getId(), ((CategoricAttribute) l.getAttributes().get(i)).getValue());
instance.setSolution(l.getSolution());
}
classification = this.classifierManager.classify(token, name, algorithm, "en", instance);
if(classification.getState().equals("incorrect")){
Position p = new Position(config.getConsequentById(instance.getSolution()).getMatrixPosition() - 1, classification.getConsequent().getMatrixPosition() -1);
m.addConfused(p, line);
m.increment(p.getRow(), p.getColumn());
} else {
m.increment(classification.getConsequent().getMatrixPosition() - 1, classification.getConsequent().getMatrixPosition() - 1);
}
}
return m;
}
}
......@@ -4,7 +4,6 @@ import brunolopez.expliclas.models.Antecedent;
import brunolopez.expliclas.models.Attribute;
import brunolopez.expliclas.models.CategoricNode;
import brunolopez.expliclas.models.Classification;
import brunolopez.expliclas.models.Consequent;
import brunolopez.expliclas.models.ConsequentNode;
import brunolopez.expliclas.models.DatasetConfig;
import brunolopez.expliclas.models.Instance;
......@@ -113,6 +112,15 @@ public class TreeInterpreter {
classification.addAntecedent(new Antecedent(att.getId() + " is " + instance.getCategoricValue(att.getId())));
}
if (path.get(path.size() - 1) instanceof ConsequentNode) {
if (instance.getSolution() != null) {
if (((ConsequentNode) path.get(path.size() - 1)).getConsequent().getId().equals(instance.getSolution())) {
classification.setState("correct");
} else {
classification.setState("incorrect");
}
} else {
classification.setState("unknown");
}
classification.setConsequent(((ConsequentNode) path.get(path.size() - 1)).getConsequent());
classification.setInstance(instance);
}
......@@ -137,13 +145,13 @@ public class TreeInterpreter {
HashMap<String, Boolean> modified = new HashMap();
ArrayList<HashMap<String, Boolean>> modifiedList = new ArrayList();
ArrayList<String> solutions = new ArrayList();
for (Map.Entry<String, Object> entry : instance.getValues().entrySet()) {
modified.put(entry.getKey(), false);
}
modifiedList.add(modified);
ArrayList<Instance> alternatives = new ArrayList();
alternatives.add(instance);
Instance auxInstance;
......@@ -176,7 +184,7 @@ public class TreeInterpreter {
modified.put(att.getId(), true);
modifiedList.add(modified);
alternatives.add(auxInstance);
} else if(modifiedList.get(0).get(att.getId()) && instance.getNumericValue(att.getId()) > splitValue) {
} else if (modifiedList.get(0).get(att.getId()) && instance.getNumericValue(att.getId()) > splitValue) {
classification.addSplitValue(att.getId(), splitValue);
}
} else {
......@@ -199,7 +207,7 @@ public class TreeInterpreter {
modified.put(att.getId(), true);
modifiedList.add(modified);
alternatives.add(auxInstance);
} else if(modifiedList.get(0).get(att.getId()) && instance.getNumericValue(att.getId()) <= splitValue) {
} else if (modifiedList.get(0).get(att.getId()) && instance.getNumericValue(att.getId()) <= splitValue) {
classification.addSplitValue(att.getId(), splitValue);
}
}
......@@ -224,7 +232,7 @@ public class TreeInterpreter {
modified.put(att.getId(), true);
modifiedList.add(modified);
alternatives.add(auxInstance);
} else if(modifiedList.get(0).get(att.getId()) && instance.getNumericValue(att.getId()) >= splitValue) {
} else if (modifiedList.get(0).get(att.getId()) && instance.getNumericValue(att.getId()) >= splitValue) {
classification.addSplitValue(att.getId(), splitValue);
}
} else {
......@@ -247,7 +255,7 @@ public class TreeInterpreter {
modified.put(att.getId(), true);
modifiedList.add(modified);
alternatives.add(auxInstance);
} else if(modifiedList.get(0).get(att.getId()) && instance.getNumericValue(att.getId()) < splitValue) {
} else if (modifiedList.get(0).get(att.getId()) && instance.getNumericValue(att.getId()) < splitValue) {
classification.addSplitValue(att.getId(), splitValue);
}
}
......@@ -257,7 +265,16 @@ public class TreeInterpreter {
path.add(((CategoricNode) path.get(path.size() - 1)).getChild(alternatives.get(0).getCategoricValue(att.getId())));
classification.addAntecedent(new Antecedent(att.getId() + " is " + alternatives.get(0).getCategoricValue(att.getId())));
}
if (path.get(path.size() - 1) instanceof ConsequentNode && !solutions.contains(((ConsequentNode) path.get(path.size()-1)).getConsequent().getId())) {
if (path.get(path.size() - 1) instanceof ConsequentNode && !solutions.contains(((ConsequentNode) path.get(path.size() - 1)).getConsequent().getId())) {
if (alternatives.get(0).getSolution() != null) {
if (((ConsequentNode) path.get(path.size() - 1)).getConsequent().getId().equals(alternatives.get(0).getSolution())) {
classification.setState("correct");
} else {
classification.setState("incorrect");
}
} else {
classification.setState("unknown");
}
classification.setConsequent(((ConsequentNode) path.get(path.size() - 1)).getConsequent());
solutions.add(((ConsequentNode) path.get(path.size() - 1)).getConsequent().getId());
VisualNode treePath = this.treebuilder.buildVisualNode(new VisualNode(), this.tree, config, path);
......@@ -267,7 +284,7 @@ public class TreeInterpreter {
classification = new Classification();
alternatives.remove(0);
modifiedList.remove(0);
} else if(path.get(path.size() -1) instanceof ConsequentNode) {
} else if (path.get(path.size() - 1) instanceof ConsequentNode) {
classification = new Classification();
alternatives.remove(0);
modifiedList.remove(0);
......@@ -285,7 +302,7 @@ public class TreeInterpreter {
int label = 0;
double best = 0, actual;
Property p;
for (Property prop : att.getProperties()) {
actual = ((NumericProperty) prop).getInterval().coincidencePercentage(interval);
if (actual > best) {
......
......@@ -120,6 +120,7 @@ public class DatasetManagerImpl implements DatasetManager {
l.addAttribute(i+1, new CategoricAttribute(atts.get(i), atts.get(i), split[i]));
}
}
l.setSolution(split[split.length-1].replaceAll("'", ""));
dataset.addLine(l);
n++;
}
......
......@@ -10,7 +10,8 @@ public class Classification {
private Instance instance;
private Consequent consequent;
private VisualNode tree;
private String state;
public Classification() {
this.antecedents = new ArrayList();
this.splitValues = new HashMap();
......@@ -85,5 +86,13 @@ public class Classification {
public void setInstance(Instance instance) {
this.instance = instance;
}
public String getState() {
return state;
}
public void setState(String state) {
this.state = state;
}
}
......@@ -5,7 +5,7 @@ import java.util.ArrayList;
public class Dataset {
private final String name;
private final ArrayList<Line> lines;
public Dataset(String name) {
this.name = name;
this.lines = new ArrayList<>();
......
......@@ -5,11 +5,20 @@ import java.util.Map;
public class Instance {
HashMap<String, Object> values;
private HashMap<String, Object> values;
private String solution;
public Instance() {
this.values = new HashMap();
}
public void setSolution(String solution) {
this.solution = solution;
}
public String getSolution() {
return solution;
}
public HashMap<String, Object> getValues() {
return values;
......@@ -20,6 +29,7 @@ public class Instance {
for(Map.Entry<String, Object> entry: instance.getValues().entrySet()){
this.values.put(entry.getKey(), entry.getValue());
}
this.solution = instance.getSolution();
}
public Double getNumericValue(String attribute){
......
......@@ -5,7 +5,8 @@ import java.util.HashMap;
public class Line {
private HashMap<Integer, Attribute> attributes;
private String solution;
public Line() {
this.attributes = new HashMap();
}
......@@ -17,5 +18,13 @@ public class Line {
public void addAttribute(int id, Attribute attribute){
this.attributes.put(id, attribute);
}
public void setSolution(String solution) {
this.solution = solution;
}
public String getSolution() {
return solution;
}
}
package brunolopez.expliclas.models;
import java.util.ArrayList;
import java.util.HashMap;
public class Matrix {
private ArrayList<ArrayList<Integer>> data;
private boolean adjMatrix[][];
private HashMap<Position, ArrayList<Integer>> confused;
public Matrix() {
this.data = new ArrayList();
this.confused = new HashMap();
}
public Matrix(int initialCapacity) {
this.data = new ArrayList(initialCapacity);
for(int i=0; i<initialCapacity; i++){
this.data.add(new ArrayList());
for(int j=0; j<initialCapacity; j++){
this.data.get(i).add(0);
}
}
this.confused = new HashMap();
}
public void addRow(ArrayList<Integer> row) {
if(this.adjMatrix == null)
this.adjMatrix = new boolean[row.size()][row.size()];
......@@ -25,10 +40,33 @@ public class Matrix {
this.data.add(new ArrayList(row));
}
public void increment(int row, int column) {
this.data.get(row).set(column, this.data.get(row).get(column) + 1);
}
public void addConfused(Position position, Integer instance){
ArrayList<Integer> c;
if(this.confused.get(position) == null){
c = new ArrayList();
} else {
c = this.confused.get(position);
}
c.add(instance);
this.confused.put(position, c);
}
public ArrayList<ArrayList<Integer>> getData() {
return data;
}
public HashMap<Position, ArrayList<Integer>> getConfused() {
return confused;
}
public boolean[][] getAdjMatrix() {
return adjMatrix;
}
......
package brunolopez.expliclas.models;
import com.fasterxml.jackson.annotation.JsonValue;
public class Position {
private final int row;
private final int column;
public Position() {
this.row = 0;
this.column = 0;
}
public Position(String position) {
this.row = Integer.parseInt(position.split(":")[0]);
this.column = Integer.parseInt(position.split(":")[1]);
}
public Position(int row, int column) {
this.row = row;
this.column = column;
}
public int getRow() {
return row;
}
public int getColumn() {
return column;
}
@Override
@JsonValue
public String toString() {
return this.row + ":" + this.column;
}
@Override
public boolean equals(Object obj) {
if (obj instanceof Position){
return ((Position) obj).getColumn() == this.column && ((Position) obj).getRow() == this.row;
}
return false;
}
@Override
public int hashCode() {