Commit 9fabdfec authored by Bruno López Trigo's avatar Bruno López Trigo

Engadidas melloras sobre a explicación global do clasificador, actualizado...

Engadidas melloras sobre a explicación global do clasificador, actualizado documento de requisitos de linguaxe e engadido documento de especificación do servizo
parent 4bead0f3
#Sat, 31 Mar 2018 17:52:16 +0200
#Thu, 05 Apr 2018 19:37:12 +0200
/home/brilemau/Documents/repos/ExpliClas-TFG/codigo/WekaParser-v2=
......@@ -4,15 +4,13 @@
<open-files xmlns="http://www.netbeans.org/ns/projectui-open-files/2">
<group>
<file>file:/home/brilemau/Documents/repos/ExpliClas-TFG/codigo/WekaParser-v2/resources/log.xml</file>
<file>file:/home/brilemau/Documents/repos/ExpliClas-TFG/codigo/WekaParser-v2/src/iosystem/IOInterface.java</file>
<file>file:/home/brilemau/Documents/repos/ExpliClas-TFG/codigo/WekaParser-v2/src/iosystem/Problem.java</file>
<file>file:/home/brilemau/Documents/repos/ExpliClas-TFG/codigo/WekaParser-v2/src/languagerealiser/LanguageRealiser.java</file>
<file>file:/home/brilemau/Documents/repos/ExpliClas-TFG/codigo/WekaParser-v2/src/languagerealiser/ClauseGenerator.java</file>
<file>file:/home/brilemau/Documents/repos/ExpliClas-TFG/codigo/WekaParser-v2/src/iosystem/ConfusionMatrix.java</file>
<file>file:/home/brilemau/Documents/repos/ExpliClas-TFG/codigo/WekaParser-v2/src/wekaparser/v2/Controller.java</file>
<file>file:/home/brilemau/Documents/repos/ExpliClas-TFG/codigo/WekaParser-v2/src/iosystem/XMLParser.java</file>
<file>file:/home/brilemau/Documents/repos/ExpliClas-TFG/codigo/WekaParser-v2/src/iosystem/IOImpl.java</file>
<file>file:/home/brilemau/Documents/repos/ExpliClas-TFG/codigo/WekaParser-v2/src/iosystem/ReaderWeka.java</file>
<file>file:/home/brilemau/Documents/repos/ExpliClas-TFG/codigo/WekaParser-v2/resources/config.xml</file>
<file>file:/home/brilemau/Documents/repos/ExpliClas-TFG/codigo/WekaParser-v2/src/rulesystem/RuleBuilder.java</file>
<file>file:/home/brilemau/Documents/repos/ExpliClas-TFG/codigo/WekaParser-v2/resources/beer.xml</file>
</group>
</open-files>
</project-private>
......@@ -27,7 +27,7 @@
<property>
<valueTxt>an exception</valueTxt>
<interval>
<leftValue>0</leftValue>
<leftValue>-1</leftValue>
<rightValue>10</rightValue>
</interval>
</property>
......
#!/bin/bash
properties_xsd="properties.xsd"
properties_xml="flavia.xml"
entry_xsd="entry.xsd"
config_xsd="config.xsd"
config="config.xml"
log="log.xml"
input="flaviainput.xml"
weka="flavia.txt"
html="explanationFlavia.html"
type=flavia
index=1
echo "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" > $input
echo "<problem>" >> $input
echo " <type>$type</type>" >> $input
echo " <attributes>" >> $input
for var in $@
do
if [ $index -eq 1 ]
then
echo " <attribute name=\"$var\">" >> $input
index=0
else
echo " <value>$var</value>" >> $input
echo " </attribute>" >> $input
index=1
fi
done
echo " </attributes>" >> $input
echo "</problem>" >> $input
java -jar ../dist/WekaParser-v2.jar $properties_xsd $properties_xml $entry_xsd $config_xsd $config $log $input $weka $html
firefox $html
......@@ -7,7 +7,7 @@ config_xsd="config.xsd"
config="config.xml"
log="logGlass.xml"
input="glassinput.xml"
weka="glass.txt"
weka="glassMod.txt"
html="explanationGlass.html"
type=glass
index=1
......
<h1>AutoExplainable Classifier</h1><h2>Global information</h2><p>There are 8 types of beer: Blanche, Lager, Pilsner, IPA, Stout, Barleywine, Porter and Belgian Strong Ale.</p><p>Types Lager, Porter, Stout, Belgian Strong Ale, Barleywine, IPA and Pilsner can lead to confusion. But among all of them pairs Barleywine, IPA and Pilsner, Lager are the most confused.</p><h2>Main Solution</h2><p>Beer is type Blanche because its strength is standard, its color is straw and its bitterness is low.</p><h2></h2>
\ No newline at end of file
<h1>AutoExplainable Classifier</h1><h2>Global information</h2><p>There are 8 types of beer: Blanche, Lager, Pilsner, IPA, Stout, Barleywine, Porter and Belgian Strong Ale.</p><p>There may be some confusion among samples related to some types of beer. But among all of them the pair [IPA; Barleywine] is the most confused.</p><h2>Main Solution</h2><p>Beer is type Pilsner because its strength is standard, its color is straw and its bitterness is high.</p><h2></h2>
\ No newline at end of file
<h1>AutoExplainable Classifier</h1><h2>Global information</h2><p>There are 5 types of flavia: Aesculus chinensis, Berberis anhweiensis, Cercis chinensis, Phoebe zhennan and Lagerstroemia indica.</p><p>There may be some confusion among samples related to some types of beer. But among all of them the pair [Cercis chinensis; Phoebe zhennan] is the most confused.</p><h2>Main Solution</h2><p>Flavia is type Phoebe zhennan because its area is not very small and its perimeter is small.</p><h2>Alternatives</h2><p>However, this flavia may be also Lagerstroemia indica because area is quite close to the usual Lagerstroemia indica value.</p><p>For these specific values it is just as likely to be Lagerstroemia indica.</p><p>But Lagerstroemia indica will be an exception if classifier's global information is taken into account.</p>
\ No newline at end of file
<h1>AutoExplainable Classifier</h1><h2>Global information</h2><p>There are 7 types of glass: build wind float, build wind non-float, vehic wind float, vehic wind non-float, containers, tableware and headlamps.</p><p>Types build wind float, vehic wind float, build wind non-float, containers, headlamps and tableware can lead to confusion. But among all of them pairs tableware, build wind float and build wind float, vehic wind float are the most confused.</p><h2>Main Solution</h2><p>Beer is type build wind non-float because its silicon is medium and its barium and refractive index are high.</p><h2>Alternatives</h2><p>However, this glass may be also build wind float because aluminum is quite close to the usual build wind float value. It may be also vehic wind float because silicon and calcium are quite close to the usual vehic wind float values.</p><p>For these specific values it is just as likely to be build wind float and vehic wind float.</p><p>In addition build wind float would be likely if classifier's global information is taken into account. But vehic wind float would be an exception.</p>
\ No newline at end of file
<h1>AutoExplainable Classifier</h1><h2>Global information</h2><p>There are 7 types of glass: build wind float, build wind non-float, vehic wind float, vehic wind non-float, containers, tableware and headlamps.</p><p>There may be some confusion among samples related to most types of beer. Only in exceptional cases, confusion involve vehic wind non-float and headlamps. But among all of them the pairs [build wind float; containers] and [build wind non-float; build wind float] are the most confused.</p><p>On the other hand, following pairs are eventually misleaded.</p><ul><li>Types vehic wind non-float and vehic wind float </li><li>Types vehic wind non-float and tableware </li><li>Types headlamps and containers </li></ul><h2>Main Solution</h2><p>Glass is type tableware because its sodium is medium and its potassium, barium and magnesium are high.</p><h2></h2>
\ No newline at end of file
=== Run information ===
Scheme: weka.classifiers.trees.J48 -U -M 2
Relation: FLAVIA5
Instances: 310
Attributes: 4
Area
Perimeter
Diameter
Class
Test mode: 10-fold cross-validation
=== Classifier model (full training set) ===
J48 unpruned tree
------------------
Area <= 325474
| Perimeter <= 2395.398195: 2.0 (47.0)
| Perimeter > 2395.398195
| | Area <= 276186: 5.0 (56.0/1.0)
| | Area > 276186
| | | Perimeter <= 3437.313201: 4.0 (7.0)
| | | Perimeter > 3437.313201: 3.0 (3.0/1.0)
Area > 325474
| Area <= 694774
| | Perimeter <= 6054.299274
| | | Perimeter <= 3042.186217: 3.0 (27.0/2.0)
| | | Perimeter > 3042.186217
| | | | Perimeter <= 3399.552883
| | | | | Area <= 548851: 4.0 (15.0/1.0)
| | | | | Area > 548851
| | | | | | Perimeter <= 3222.009594: 1.0 (2.0)
| | | | | | Perimeter > 3222.009594: 3.0 (3.0/1.0)
| | | | Perimeter > 3399.552883: 4.0 (36.0/1.0)
| | Perimeter > 6054.299274: 3.0 (34.0/1.0)
| Area > 694774
| | Area <= 918774: 1.0 (62.0/2.0)
| | Area > 918774
| | | Area <= 1006143: 2.0 (11.0)
| | | Area > 1006143: 3.0 (7.0)
Number of Leaves : 13
Size of the tree : 25
Time taken to build model: 0.02 seconds
=== Stratified cross-validation ===
=== Summary ===
Correctly Classified Instances 282 90.9677 %
Incorrectly Classified Instances 28 9.0323 %
Kappa statistic 0.8868
Mean absolute error 0.044
Root mean squared error 0.1789
Relative absolute error 13.7778 %
Root relative squared error 44.7576 %
Total Number of Instances 310
=== Detailed Accuracy By Class ===
TP Rate FP Rate Precision Recall F-Measure MCC ROC Area PRC Area Class
0,952 0,016 0,938 0,952 0,945 0,931 0,975 0,897 1.0
0,948 0,004 0,982 0,948 0,965 0,957 0,972 0,941 2.0
0,875 0,042 0,863 0,875 0,869 0,829 0,956 0,865 3.0
0,867 0,044 0,825 0,867 0,846 0,808 0,937 0,815 4.0
0,912 0,008 0,963 0,912 0,937 0,924 0,974 0,916 5.0
Weighted Avg. 0,910 0,024 0,912 0,910 0,910 0,887 0,962 0,886
=== Confusion Matrix ===
a b c d e <-- classified as
60 1 2 0 0 | a = 1.0
0 55 2 1 0 | b = 2.0
3 0 63 6 0 | c = 3.0
1 0 5 52 2 | d = 4.0
0 0 1 4 52 | e = 5.0
<?xml version="1.0" encoding="UTF-8"?>
<problem>
<attributes>
<attribute name="Area">
<valuetxt>Area</valuetxt>
<maxValue>1123619</maxValue>
<minValue>112823</minValue>
<properties>
<property>
<valueTxt>Very small</valueTxt>
<interval>
<leftValue>112823</leftValue>
<rightValue>244359.304</rightValue>
</interval>
</property>
<property>
<valueTxt>Not very small</valueTxt>
<interval>
<leftValue>244359.304</leftValue>
<rightValue>1123619</rightValue>
</interval>
</property>
</properties>
</attribute>
<attribute name="Perimeter">
<valuetxt>Perimeter</valuetxt>
<maxValue>9660.198</maxValue>
<minValue>1882.344</minValue>
<properties>
<property>
<valueTxt>Very small</valueTxt>
<interval>
<leftValue>1882.344</leftValue>
<rightValue>2401.673</rightValue>
</interval>
</property>
<property>
<valueTxt>Small</valueTxt>
<interval>
<leftValue>2401.673</leftValue>
<rightValue>3609.257</rightValue>
</interval>
</property>
<property>
<valueTxt>Medium</valueTxt>
<interval>
<leftValue>3609.257</leftValue>
<rightValue>5294.988</rightValue>
</interval>
</property>
<property>
<valueTxt>Large</valueTxt>
<interval>
<leftValue>5294.988</leftValue>
<rightValue>7123.296</rightValue>
</interval>
</property>
<property>
<valueTxt>Very large</valueTxt>
<interval>
<leftValue>7123.296</leftValue>
<rightValue>9660.180</rightValue>
</interval>
</property>
</properties>
</attribute>
<attribute name="Diameter">
<valuetxt>Diameter</valuetxt>
<maxValue>1196.092</maxValue>
<minValue>379.013</minValue>
<properties>
<property>
<valueTxt>Very small</valueTxt>
<interval>
<leftValue>379.013</leftValue>
<rightValue>552.443</rightValue>
</interval>
</property>
<property>
<valueTxt>Small</valueTxt>
<interval>
<leftValue>552.443</leftValue>
<rightValue>686.462</rightValue>
</interval>
</property>
<property>
<valueTxt>Medium</valueTxt>
<interval>
<leftValue>686.462</leftValue>
<rightValue>827.546</rightValue>
</interval>
</property>
<property>
<valueTxt>Large</valueTxt>
<interval>
<leftValue>827.546</leftValue>
<rightValue>1025.387</rightValue>
</interval>
</property>
<property>
<valueTxt>Very large</valueTxt>
<interval>
<leftValue>1025.387</leftValue>
<rightValue>1196.092</rightValue>
</interval>
</property>
</properties>
</attribute>
</attributes>
<consequents>
<consequent>
<code>1</code>
<name>1.0</name>
<text>Aesculus chinensis</text>
</consequent>
<consequent>
<code>2</code>
<name>2.0</name>
<text>Berberis anhweiensis</text>
</consequent>
<consequent>
<code>3</code>
<name>3.0</name>
<text>Cercis chinensis</text>
</consequent>
<consequent>
<code>4</code>
<name>4.0</name>
<text>Phoebe zhennan</text>
</consequent>
<consequent>
<code>5</code>
<name>5.0</name>
<text>Lagerstroemia indica</text>
</consequent>
</consequents>
</problem>
<?xml version="1.0" encoding="UTF-8"?>
<problem>
<type>flavia</type>
<attributes>
<attribute name="Area">
<value>276766</value>
</attribute>
<attribute name="Perimeter">
<value>2844.792206</value>
</attribute>
<attribute name="Diameter">
<value>593.6239684</value>
</attribute>
</attributes>
</problem>
=== Run information ===
Scheme: weka.classifiers.trees.J48 -U -M 2
Relation: Glass
Instances: 214
Attributes: 10
RI
Na
Mg
Al
Si
K
Ca
Ba
Fe
Type
Test mode: 10-fold cross-validation
=== Classifier model (full training set) ===
J48 unpruned tree
------------------
Ba <= 0.27
| Mg <= 2.41
| | K <= 0.03
| | | Na <= 13.75: build wind non-float (3.0)
| | | Na > 13.75: tableware (9.0)
| | K > 0.03
| | | Na <= 13.49
| | | | RI <= 1.5241: containers (13.0/1.0)
| | | | RI > 1.5241: build wind non-float (3.0)
| | | Na > 13.49: build wind non-float (7.0/1.0)
| Mg > 2.41
| | Al <= 1.41
| | | RI <= 1.51707
| | | | RI <= 1.51596: build wind float (3.0)
| | | | RI > 1.51596
| | | | | Fe <= 0.12
| | | | | | Mg <= 3.54: vehic wind float (5.0)
| | | | | | Mg > 3.54
| | | | | | | RI <= 1.51667: build wind non-float (2.0)
| | | | | | | RI > 1.51667: vehic wind float (2.0)
| | | | | Fe > 0.12: build wind non-float (2.0)
| | | RI > 1.51707
| | | | K <= 0.23
| | | | | Mg <= 3.34: build wind non-float (2.0)
| | | | | Mg > 3.34
| | | | | | Si <= 72.64
| | | | | | | Na <= 14.01: build wind float (14.0)
| | | | | | | Na > 14.01
| | | | | | | | RI <= 1.52211
| | | | | | | | | Na <= 14.32: vehic wind float (3.0)
| | | | | | | | | Na > 14.32: build wind float (2.0)
| | | | | | | | RI > 1.52211: build wind float (3.0)
| | | | | | Si > 72.64: vehic wind float (3.0)
| | | | K > 0.23
| | | | | Mg <= 3.75
| | | | | | Fe <= 0.14
| | | | | | | RI <= 1.52043: build wind float (36.0)
| | | | | | | RI > 1.52043: build wind non-float (2.0/1.0)
| | | | | | Fe > 0.14
| | | | | | | Al <= 1.17: build wind non-float (5.0)
| | | | | | | Al > 1.17: build wind float (6.0/1.0)
| | | | | Mg > 3.75: build wind non-float (10.0)
| | Al > 1.41
| | | Si <= 72.49
| | | | Ca <= 8.28: build wind non-float (6.0)
| | | | Ca > 8.28: vehic wind float (5.0/1.0)
| | | Si > 72.49
| | | | RI <= 1.51732
| | | | | Fe <= 0.22: build wind non-float (30.0/1.0)
| | | | | Fe > 0.22
| | | | | | RI <= 1.51629: build wind float (2.0)
| | | | | | RI > 1.51629: build wind non-float (2.0)
| | | | RI > 1.51732
| | | | | RI <= 1.51789: build wind float (3.0)
| | | | | RI > 1.51789: build wind non-float (2.0)
Ba > 0.27
| Si <= 70.16: build wind non-float (2.0/1.0)
| Si > 70.16: headlamps (27.0/1.0)
Number of Leaves : 30
Size of the tree : 59
Time taken to build model: 0.03 seconds
=== Stratified cross-validation ===
=== Summary ===
Correctly Classified Instances 144 67.2897 %
Incorrectly Classified Instances 70 32.7103 %
Kappa statistic 0.5571
Mean absolute error 0.1001
Root mean squared error 0.2854
Relative absolute error 47.2665 %
Root relative squared error 87.9232 %
Total Number of Instances 214
=== Detailed Accuracy By Class ===
TP Rate FP Rate Precision Recall F-Measure MCC ROC Area PRC Area Class
0,714 0,174 0,667 0,714 0,690 0,532 0,809 0,674 build wind float
0,618 0,174 0,662 0,618 0,639 0,452 0,774 0,611 build wind non-float
0,412 0,046 0,438 0,412 0,424 0,376 0,775 0,270 vehic wind float
? 0,000 ? ? ? ? ? ? vehic wind non-float
0,769 0,010 0,833 0,769 0,800 0,788 0,872 0,575 containers
0,778 0,029 0,538 0,778 0,636 0,629 0,930 0,527 tableware
0,793 0,022 0,852 0,793 0,821 0,795 0,869 0,738 headlamps
Weighted Avg. 0,673 0,127 0,677 0,673 0,673 0,546 0,811 0,616
=== Confusion Matrix ===
a b c d e f g <-- classified as
50 15 3 0 12 1 1 | a = build wind float
16 47 6 0 2 3 2 | b = build wind non-float
5 14 7 0 0 21 0 | c = vehic wind float
0 0 20 30 0 15 0 | d = vehic wind non-float
0 32 20 0 10 0 1 | e = containers
1 11 0 15 0 7 0 | f = tableware
3 2 0 0 12 1 23 | g = headlamps
......@@ -3,25 +3,25 @@
<type>glass</type>
<attributes>
<attribute name="RI">
<value>1.5159</value>
<value>1.516</value>
</attribute>
<attribute name="Na">
<value>13.24</value>
<value>16</value>
</attribute>
<attribute name="Mg">
<value>3.34</value>
<value>2</value>
</attribute>
<attribute name="Al">
<value>1.47</value>
<value>1.3</value>
</attribute>
<attribute name="Si">
<value>73.1</value>
<value>70</value>
</attribute>
<attribute name="K">
<value>0.39</value>
<value>0</value>
</attribute>
<attribute name="Ca">
<value>8.22</value>
<value>4</value>
</attribute>
<attribute name="Ba">
<value>0</value>
......
......@@ -6,10 +6,10 @@
<value>2</value>
</attribute>
<attribute name="Bitterness">
<value>17</value>
<value>39</value>
</attribute>
<attribute name="Strength">
<value>0.046</value>
<value>0.049</value>
</attribute>
</attributes>
</problem>
......@@ -6,39 +6,39 @@
<antecedents>
<antecedent>
<attribute name="Strength">
<value>0.046</value>
<value>0.049</value>
<valuetxt>Strength</valuetxt>
<maxValue>0.136</maxValue>
<minValue>0.035</minValue>
<modified>false</modified>
<properties>
<property>
<valueTxt>Session</valueTxt>
<interval>
<leftValue>0.035</leftValue>
<rightValue>0.0525</rightValue>
</interval>
<valueTxt>Session</valueTxt>
<interval>
<leftValue>0.035</leftValue>
<rightValue>0.0525</rightValue>
</interval>
</property>
<property>
<valueTxt>Standard</valueTxt>
<interval>
<leftValue>0.0525</leftValue>
<rightValue>0.0675</rightValue>
</interval>
<valueTxt>Standard</valueTxt>
<interval>
<leftValue>0.0525</leftValue>
<rightValue>0.0675</rightValue>
</interval>
</property>
<property>
<valueTxt>High</valueTxt>
<interval>
<leftValue>0.0675</leftValue>
<rightValue>0.09</rightValue>
</interval>
<valueTxt>High</valueTxt>
<interval>
<leftValue>0.0675</leftValue>
<rightValue>0.09</rightValue>
</interval>
</property>
<property>
<valueTxt>Very high</valueTxt>
<interval>
<leftValue>0.09</leftValue>
<rightValue>0.136</rightValue>
</interval>
<valueTxt>Very high</valueTxt>
<interval>
<leftValue>0.09</leftValue>
<rightValue>0.136</rightValue>
</interval>
</property>
</properties>
</attribute>
......@@ -66,39 +66,39 @@
<modified>false</modified>
<properties>
<property>
<valueTxt>Pale</valueTxt>
<interval>
<leftValue>0.0</leftValue>
<rightValue>3.0</rightValue>
</interval>
<valueTxt>Pale</valueTxt>
<interval>
<leftValue>0.0</leftValue>
<rightValue>3.0</rightValue>
</interval>
</property>
<property>
<valueTxt>Straw</valueTxt>
<interval>
<leftValue>3.0</leftValue>
<rightValue>7.5</rightValue>
</interval>
<valueTxt>Straw</valueTxt>
<interval>
<leftValue>3.0</leftValue>
<rightValue>7.5</rightValue>
</interval>
</property>
<property>
<valueTxt>Amber</valueTxt>
<interval>
<leftValue>7.5</leftValue>
<rightValue>19.0</rightValue>
</interval>
<valueTxt>Amber</valueTxt>
<interval>
<leftValue>7.5</leftValue>
<rightValue>19.0</rightValue>
</interval>
</property>
<property>
<valueTxt>Brown</valueTxt>
<interval>
<leftValue>19.0</leftValue>
<rightValue>29.0</rightValue>
</interval>
<valueTxt>Brown</valueTxt>
<interval>
<leftValue>19.0</leftValue>
<rightValue>29.0</rightValue>
</interval>
</property>
<property>
<valueTxt>Black</valueTxt>
<interval>
<leftValue>29.0</leftValue>
<rightValue>45.0</rightValue>
</interval>
<valueTxt>Black</valueTxt>
<interval>
<leftValue>29.0</leftValue>
<rightValue>45.0</rightValue>
</interval>
</property>
</properties>
</attribute>
......@@ -119,61 +119,82 @@
</antecedent>
<antecedent>
<attribute name="Bitterness">
<value>17.0</value>
<value>39.0</value>
<valuetxt>Bitterness</valuetxt>
<maxValue>250.0</maxValue>
<minValue>7.0</minValue>
<modified>false</modified>
<properties>
<property>
<valueTxt>Low</valueTxt>
<interval>
<leftValue>7.0</leftValue>
<rightValue>21.0</rightValue>
</interval>
<valueTxt>Low</valueTxt>
<interval>
<leftValue>7.0</leftValue>
<rightValue>21.0</rightValue>
</interval>
</property>
<property>
<valueTxt>Low-Medium</valueTxt>
<interval>
<leftValue>21.0</leftValue>
<rightValue>32.5</rightValue>
</interval>
<valueTxt>Low-Medium</valueTxt>
<interval>
<leftValue>21.0</leftValue>
<rightValue>32.5</rightValue>
</interval>
</property>
<property>
<valueTxt>Medium-High</valueTxt>
<interval>
<leftValue>32.5</leftValue>
<rightValue>47.5</rightValue>
</interval>
<valueTxt>Medium-High</valueTxt>