Coverage Report - weka.attributeSelection.ReliefFAttributeEval
 
Classes in this File Line Coverage Branch Coverage Complexity
ReliefFAttributeEval
0%
0/413
0%
0/231
4.571
 
 1  
 /*
 2  
  *   This program is free software: you can redistribute it and/or modify
 3  
  *   it under the terms of the GNU General Public License as published by
 4  
  *   the Free Software Foundation, either version 3 of the License, or
 5  
  *   (at your option) any later version.
 6  
  *
 7  
  *   This program is distributed in the hope that it will be useful,
 8  
  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 9  
  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10  
  *   GNU General Public License for more details.
 11  
  *
 12  
  *   You should have received a copy of the GNU General Public License
 13  
  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 14  
  */
 15  
 
 16  
 /*
 17  
  *    ReliefFAttributeEval.java
 18  
  *    Copyright (C) 1999-2012 University of Waikato, Hamilton, New Zealand
 19  
  *
 20  
  */
 21  
 
 22  
 package weka.attributeSelection;
 23  
 
 24  
 import java.util.Enumeration;
 25  
 import java.util.Random;
 26  
 import java.util.Vector;
 27  
 
 28  
 import weka.core.Attribute;
 29  
 import weka.core.Capabilities;
 30  
 import weka.core.Capabilities.Capability;
 31  
 import weka.core.Instance;
 32  
 import weka.core.Instances;
 33  
 import weka.core.Option;
 34  
 import weka.core.OptionHandler;
 35  
 import weka.core.RevisionUtils;
 36  
 import weka.core.TechnicalInformation;
 37  
 import weka.core.TechnicalInformation.Field;
 38  
 import weka.core.TechnicalInformation.Type;
 39  
 import weka.core.TechnicalInformationHandler;
 40  
 import weka.core.Utils;
 41  
 
 42  
 /** 
 43  
  <!-- globalinfo-start -->
 44  
  * ReliefFAttributeEval :<br/>
 45  
  * <br/>
 46  
  * Evaluates the worth of an attribute by repeatedly sampling an instance and considering the value of the given attribute for the nearest instance of the same and different class. Can operate on both discrete and continuous class data.<br/>
 47  
  * <br/>
 48  
  * For more information see:<br/>
 49  
  * <br/>
 50  
  * Kenji Kira, Larry A. Rendell: A Practical Approach to Feature Selection. In: Ninth International Workshop on Machine Learning, 249-256, 1992.<br/>
 51  
  * <br/>
 52  
  * Igor Kononenko: Estimating Attributes: Analysis and Extensions of RELIEF. In: European Conference on Machine Learning, 171-182, 1994.<br/>
 53  
  * <br/>
 54  
  * Marko Robnik-Sikonja, Igor Kononenko: An adaptation of Relief for attribute estimation in regression. In: Fourteenth International Conference on Machine Learning, 296-304, 1997.
 55  
  * <p/>
 56  
  <!-- globalinfo-end -->
 57  
  *
 58  
  <!-- technical-bibtex-start -->
 59  
  * BibTeX:
 60  
  * <pre>
 61  
  * &#64;inproceedings{Kira1992,
 62  
  *    author = {Kenji Kira and Larry A. Rendell},
 63  
  *    booktitle = {Ninth International Workshop on Machine Learning},
 64  
  *    editor = {Derek H. Sleeman and Peter Edwards},
 65  
  *    pages = {249-256},
 66  
  *    publisher = {Morgan Kaufmann},
 67  
  *    title = {A Practical Approach to Feature Selection},
 68  
  *    year = {1992}
 69  
  * }
 70  
  * 
 71  
  * &#64;inproceedings{Kononenko1994,
 72  
  *    author = {Igor Kononenko},
 73  
  *    booktitle = {European Conference on Machine Learning},
 74  
  *    editor = {Francesco Bergadano and Luc De Raedt},
 75  
  *    pages = {171-182},
 76  
  *    publisher = {Springer},
 77  
  *    title = {Estimating Attributes: Analysis and Extensions of RELIEF},
 78  
  *    year = {1994}
 79  
  * }
 80  
  * 
 81  
  * &#64;inproceedings{Robnik-Sikonja1997,
 82  
  *    author = {Marko Robnik-Sikonja and Igor Kononenko},
 83  
  *    booktitle = {Fourteenth International Conference on Machine Learning},
 84  
  *    editor = {Douglas H. Fisher},
 85  
  *    pages = {296-304},
 86  
  *    publisher = {Morgan Kaufmann},
 87  
  *    title = {An adaptation of Relief for attribute estimation in regression},
 88  
  *    year = {1997}
 89  
  * }
 90  
  * </pre>
 91  
  * <p/>
 92  
  <!-- technical-bibtex-end -->
 93  
  *
 94  
  <!-- options-start -->
 95  
  * Valid options are: <p/>
 96  
  * 
 97  
  * <pre> -M &lt;num instances&gt;
 98  
  *  Specify the number of instances to
 99  
  *  sample when estimating attributes.
 100  
  *  If not specified, then all instances
 101  
  *  will be used.</pre>
 102  
  * 
 103  
  * <pre> -D &lt;seed&gt;
 104  
  *  Seed for randomly sampling instances.
 105  
  *  (Default = 1)</pre>
 106  
  * 
 107  
  * <pre> -K &lt;number of neighbours&gt;
 108  
  *  Number of nearest neighbours (k) used
 109  
  *  to estimate attribute relevances
 110  
  *  (Default = 10).</pre>
 111  
  * 
 112  
  * <pre> -W
 113  
  *  Weight nearest neighbours by distance</pre>
 114  
  * 
 115  
  * <pre> -A &lt;num&gt;
 116  
  *  Specify sigma value (used in an exp
 117  
  *  function to control how quickly
 118  
  *  weights for more distant instances
 119  
  *  decrease. Use in conjunction with -W.
 120  
  *  Sensible value=1/5 to 1/10 of the
 121  
  *  number of nearest neighbours.
 122  
  *  (Default = 2)</pre>
 123  
  * 
 124  
  <!-- options-end -->
 125  
  *
 126  
  * @author Mark Hall (mhall@cs.waikato.ac.nz)
 127  
  * @version $Revision: 8034 $
 128  
  */
 129  
 public class ReliefFAttributeEval
 130  
   extends ASEvaluation
 131  
   implements AttributeEvaluator,
 132  
              OptionHandler, 
 133  
              TechnicalInformationHandler {
 134  
   
 135  
   /** for serialization */
 136  
   static final long serialVersionUID = -8422186665795839379L;
 137  
 
 138  
   /** The training instances */
 139  
   private Instances m_trainInstances;
 140  
 
 141  
   /** The class index */
 142  
   private int m_classIndex;
 143  
 
 144  
   /** The number of attributes */
 145  
   private int m_numAttribs;
 146  
 
 147  
   /** The number of instances */
 148  
   private int m_numInstances;
 149  
 
 150  
   /** Numeric class */
 151  
   private boolean m_numericClass;
 152  
 
 153  
   /** The number of classes if class is nominal */
 154  
   private int m_numClasses;
 155  
 
 156  
   /** 
 157  
    * Used to hold the probability of a different class val given nearest
 158  
    * instances (numeric class)
 159  
    */
 160  
   private double m_ndc;
 161  
 
 162  
   /** 
 163  
    * Used to hold the prob of different value of an attribute given
 164  
    * nearest instances (numeric class case)
 165  
    */
 166  
   private double[] m_nda;
 167  
 
 168  
   /**
 169  
    * Used to hold the prob of a different class val and different att
 170  
    * val given nearest instances (numeric class case)
 171  
    */
 172  
   private double[] m_ndcda;
 173  
 
 174  
   /** Holds the weights that relief assigns to attributes */
 175  
   private double[] m_weights;
 176  
 
 177  
   /** Prior class probabilities (discrete class case) */
 178  
   private double[] m_classProbs;
 179  
 
 180  
   /** 
 181  
    * The number of instances to sample when estimating attributes
 182  
    * default == -1, use all instances
 183  
    */
 184  
   private int m_sampleM;
 185  
 
 186  
   /** The number of nearest hits/misses */
 187  
   private int m_Knn;
 188  
 
 189  
   /** k nearest scores + instance indexes for n classes */
 190  
   private double[][][] m_karray;
 191  
 
 192  
   /** Upper bound for numeric attributes */
 193  
   private double[] m_maxArray;
 194  
 
 195  
   /** Lower bound for numeric attributes */
 196  
   private double[] m_minArray;
 197  
 
 198  
   /** Keep track of the farthest instance for each class */
 199  
   private double[] m_worst;
 200  
 
 201  
   /** Index in the m_karray of the farthest instance for each class */
 202  
   private int[] m_index;
 203  
 
 204  
   /** Number of nearest neighbours stored of each class */
 205  
   private int[] m_stored;
 206  
  
 207  
   /** Random number seed used for sampling instances */
 208  
   private int m_seed;
 209  
 
 210  
   /**
 211  
    *  used to (optionally) weight nearest neighbours by their distance
 212  
    *  from the instance in question. Each entry holds 
 213  
    *  exp(-((rank(r_i, i_j)/sigma)^2)) where rank(r_i,i_j) is the rank of
 214  
    *  instance i_j in a sequence of instances ordered by the distance
 215  
    *  from r_i. sigma is a user defined parameter, default=20
 216  
    **/
 217  
   private double[] m_weightsByRank;
 218  
   private int m_sigma;
 219  
   
 220  
   /** Weight by distance rather than equal weights */
 221  
   private boolean m_weightByDistance;
 222  
 
 223  
   /**
 224  
    * Constructor
 225  
    */
 226  0
   public ReliefFAttributeEval () {
 227  0
     resetOptions();
 228  0
   }
 229  
 
 230  
   /**
 231  
    * Returns a string describing this attribute evaluator
 232  
    * @return a description of the evaluator suitable for
 233  
    * displaying in the explorer/experimenter gui
 234  
    */
 235  
   public String globalInfo() {
 236  0
     return "ReliefFAttributeEval :\n\nEvaluates the worth of an attribute by "
 237  
       +"repeatedly sampling an instance and considering the value of the "
 238  
       +"given attribute for the nearest instance of the same and different "
 239  
       +"class. Can operate on both discrete and continuous class data.\n\n"
 240  
       + "For more information see:\n\n"
 241  
       + getTechnicalInformation().toString();
 242  
   }
 243  
 
 244  
   /**
 245  
    * Returns an instance of a TechnicalInformation object, containing 
 246  
    * detailed information about the technical background of this class,
 247  
    * e.g., paper reference or book this class is based on.
 248  
    * 
 249  
    * @return the technical information about this class
 250  
    */
 251  
   public TechnicalInformation getTechnicalInformation() {
 252  
     TechnicalInformation        result;
 253  
     TechnicalInformation        additional;
 254  
     
 255  0
     result = new TechnicalInformation(Type.INPROCEEDINGS);
 256  0
     result.setValue(Field.AUTHOR, "Kenji Kira and Larry A. Rendell");
 257  0
     result.setValue(Field.TITLE, "A Practical Approach to Feature Selection");
 258  0
     result.setValue(Field.BOOKTITLE, "Ninth International Workshop on Machine Learning");
 259  0
     result.setValue(Field.EDITOR, "Derek H. Sleeman and Peter Edwards");
 260  0
     result.setValue(Field.YEAR, "1992");
 261  0
     result.setValue(Field.PAGES, "249-256");
 262  0
     result.setValue(Field.PUBLISHER, "Morgan Kaufmann");
 263  
     
 264  0
     additional = result.add(Type.INPROCEEDINGS);
 265  0
     additional.setValue(Field.AUTHOR, "Igor Kononenko");
 266  0
     additional.setValue(Field.TITLE, "Estimating Attributes: Analysis and Extensions of RELIEF");
 267  0
     additional.setValue(Field.BOOKTITLE, "European Conference on Machine Learning");
 268  0
     additional.setValue(Field.EDITOR, "Francesco Bergadano and Luc De Raedt");
 269  0
     additional.setValue(Field.YEAR, "1994");
 270  0
     additional.setValue(Field.PAGES, "171-182");
 271  0
     additional.setValue(Field.PUBLISHER, "Springer");
 272  
     
 273  0
     additional = result.add(Type.INPROCEEDINGS);
 274  0
     additional.setValue(Field.AUTHOR, "Marko Robnik-Sikonja and Igor Kononenko");
 275  0
     additional.setValue(Field.TITLE, "An adaptation of Relief for attribute estimation in regression");
 276  0
     additional.setValue(Field.BOOKTITLE, "Fourteenth International Conference on Machine Learning");
 277  0
     additional.setValue(Field.EDITOR, "Douglas H. Fisher");
 278  0
     additional.setValue(Field.YEAR, "1997");
 279  0
     additional.setValue(Field.PAGES, "296-304");
 280  0
     additional.setValue(Field.PUBLISHER, "Morgan Kaufmann");
 281  
     
 282  0
     return result;
 283  
   }
 284  
 
 285  
   /**
 286  
    * Returns an enumeration describing the available options.
 287  
    * @return an enumeration of all the available options.
 288  
    **/
 289  
   public Enumeration listOptions () {
 290  0
     Vector newVector = new Vector(4);
 291  0
     newVector
 292  
       .addElement(new Option("\tSpecify the number of instances to\n" 
 293  
                              + "\tsample when estimating attributes.\n" 
 294  
                              + "\tIf not specified, then all instances\n" 
 295  
                              + "\twill be used.", "M", 1
 296  
                              , "-M <num instances>"));
 297  0
     newVector.
 298  
       addElement(new Option("\tSeed for randomly sampling instances.\n" 
 299  
                             + "\t(Default = 1)", "D", 1
 300  
                             , "-D <seed>"));
 301  0
     newVector.
 302  
       addElement(new Option("\tNumber of nearest neighbours (k) used\n" 
 303  
                             + "\tto estimate attribute relevances\n" 
 304  
                             + "\t(Default = 10).", "K", 1
 305  
                             , "-K <number of neighbours>"));
 306  0
     newVector.
 307  
       addElement(new Option("\tWeight nearest neighbours by distance", "W"
 308  
                             , 0, "-W"));
 309  0
     newVector.
 310  
       addElement(new Option("\tSpecify sigma value (used in an exp\n" 
 311  
                             + "\tfunction to control how quickly\n" 
 312  
                             + "\tweights for more distant instances\n" 
 313  
                             + "\tdecrease. Use in conjunction with -W.\n" 
 314  
                             + "\tSensible value=1/5 to 1/10 of the\n" 
 315  
                             + "\tnumber of nearest neighbours.\n" 
 316  
                             + "\t(Default = 2)", "A", 1, "-A <num>"));
 317  0
     return  newVector.elements();
 318  
   }
 319  
 
 320  
 
 321  
   /**
 322  
    * Parses a given list of options. <p/>
 323  
    *
 324  
    <!-- options-start -->
 325  
    * Valid options are: <p/>
 326  
    * 
 327  
    * <pre> -M &lt;num instances&gt;
 328  
    *  Specify the number of instances to
 329  
    *  sample when estimating attributes.
 330  
    *  If not specified, then all instances
 331  
    *  will be used.</pre>
 332  
    * 
 333  
    * <pre> -D &lt;seed&gt;
 334  
    *  Seed for randomly sampling instances.
 335  
    *  (Default = 1)</pre>
 336  
    * 
 337  
    * <pre> -K &lt;number of neighbours&gt;
 338  
    *  Number of nearest neighbours (k) used
 339  
    *  to estimate attribute relevances
 340  
    *  (Default = 10).</pre>
 341  
    * 
 342  
    * <pre> -W
 343  
    *  Weight nearest neighbours by distance</pre>
 344  
    * 
 345  
    * <pre> -A &lt;num&gt;
 346  
    *  Specify sigma value (used in an exp
 347  
    *  function to control how quickly
 348  
    *  weights for more distant instances
 349  
    *  decrease. Use in conjunction with -W.
 350  
    *  Sensible value=1/5 to 1/10 of the
 351  
    *  number of nearest neighbours.
 352  
    *  (Default = 2)</pre>
 353  
    * 
 354  
    <!-- options-end -->
 355  
    *
 356  
    * @param options the list of options as an array of strings
 357  
    * @throws Exception if an option is not supported
 358  
    */
 359  
   public void setOptions (String[] options)
 360  
     throws Exception {
 361  
     String optionString;
 362  0
     resetOptions();
 363  0
     setWeightByDistance(Utils.getFlag('W', options));
 364  0
     optionString = Utils.getOption('M', options);
 365  
 
 366  0
     if (optionString.length() != 0) {
 367  0
       setSampleSize(Integer.parseInt(optionString));
 368  
     }
 369  
 
 370  0
     optionString = Utils.getOption('D', options);
 371  
 
 372  0
     if (optionString.length() != 0) {
 373  0
       setSeed(Integer.parseInt(optionString));
 374  
     }
 375  
 
 376  0
     optionString = Utils.getOption('K', options);
 377  
 
 378  0
     if (optionString.length() != 0) {
 379  0
       setNumNeighbours(Integer.parseInt(optionString));
 380  
     }
 381  
 
 382  0
     optionString = Utils.getOption('A', options);
 383  
 
 384  0
     if (optionString.length() != 0) {
 385  0
       setWeightByDistance(true); // turn on weighting by distance
 386  0
       setSigma(Integer.parseInt(optionString));
 387  
     }
 388  0
   }
 389  
 
 390  
   /**
 391  
    * Returns the tip text for this property
 392  
    * @return tip text for this property suitable for
 393  
    * displaying in the explorer/experimenter gui
 394  
    */
 395  
   public String sigmaTipText() {
 396  0
     return "Set influence of nearest neighbours. Used in an exp function to "
 397  
       +"control how quickly weights decrease for more distant instances. "
 398  
       +"Use in conjunction with weightByDistance. Sensible values = 1/5 to "
 399  
       +"1/10 the number of nearest neighbours.";
 400  
   }
 401  
 
 402  
   /**
 403  
    * Sets the sigma value.
 404  
    *
 405  
    * @param s the value of sigma (> 0)
 406  
    * @throws Exception if s is not positive
 407  
    */
 408  
   public void setSigma (int s)
 409  
     throws Exception {
 410  0
     if (s <= 0) {
 411  0
       throw  new Exception("value of sigma must be > 0!");
 412  
     }
 413  
 
 414  0
     m_sigma = s;
 415  0
   }
 416  
 
 417  
 
 418  
   /**
 419  
    * Get the value of sigma.
 420  
    *
 421  
    * @return the sigma value.
 422  
    */
 423  
   public int getSigma () {
 424  0
     return  m_sigma;
 425  
   }
 426  
 
 427  
   /**
 428  
    * Returns the tip text for this property
 429  
    * @return tip text for this property suitable for
 430  
    * displaying in the explorer/experimenter gui
 431  
    */
 432  
   public String numNeighboursTipText() {
 433  0
     return "Number of nearest neighbours for attribute estimation.";
 434  
   }
 435  
 
 436  
   /**
 437  
    * Set the number of nearest neighbours
 438  
    *
 439  
    * @param n the number of nearest neighbours.
 440  
    */
 441  
   public void setNumNeighbours (int n) {
 442  0
     m_Knn = n;
 443  0
   }
 444  
 
 445  
 
 446  
   /**
 447  
    * Get the number of nearest neighbours
 448  
    *
 449  
    * @return the number of nearest neighbours
 450  
    */
 451  
   public int getNumNeighbours () {
 452  0
     return  m_Knn;
 453  
   }
 454  
 
 455  
   /**
 456  
    * Returns the tip text for this property
 457  
    * @return tip text for this property suitable for
 458  
    * displaying in the explorer/experimenter gui
 459  
    */
 460  
   public String seedTipText() {
 461  0
     return "Random seed for sampling instances.";
 462  
   }
 463  
 
 464  
   /**
 465  
    * Set the random number seed for randomly sampling instances.
 466  
    *
 467  
    * @param s the random number seed.
 468  
    */
 469  
   public void setSeed (int s) {
 470  0
     m_seed = s;
 471  0
   }
 472  
 
 473  
 
 474  
   /**
 475  
    * Get the seed used for randomly sampling instances.
 476  
    *
 477  
    * @return the random number seed.
 478  
    */
 479  
   public int getSeed () {
 480  0
     return  m_seed;
 481  
   }
 482  
 
 483  
   /**
 484  
    * Returns the tip text for this property
 485  
    * @return tip text for this property suitable for
 486  
    * displaying in the explorer/experimenter gui
 487  
    */
 488  
   public String sampleSizeTipText() {
 489  0
     return "Number of instances to sample. Default (-1) indicates that all "
 490  
       +"instances will be used for attribute estimation.";
 491  
   }
 492  
 
 493  
   /**
 494  
    * Set the number of instances to sample for attribute estimation
 495  
    *
 496  
    * @param s the number of instances to sample.
 497  
    */
 498  
   public void setSampleSize (int s) {
 499  0
     m_sampleM = s;
 500  0
   }
 501  
 
 502  
 
 503  
   /**
 504  
    * Get the number of instances used for estimating attributes
 505  
    *
 506  
    * @return the number of instances.
 507  
    */
 508  
   public int getSampleSize () {
 509  0
     return  m_sampleM;
 510  
   }
 511  
 
 512  
   /**
 513  
    * Returns the tip text for this property
 514  
    * @return tip text for this property suitable for
 515  
    * displaying in the explorer/experimenter gui
 516  
    */
 517  
   public String weightByDistanceTipText() {
 518  0
     return "Weight nearest neighbours by their distance.";
 519  
   }
 520  
 
 521  
   /**
 522  
    * Set the nearest neighbour weighting method
 523  
    *
 524  
    * @param b true nearest neighbours are to be weighted by distance.
 525  
    */
 526  
   public void setWeightByDistance (boolean b) {
 527  0
     m_weightByDistance = b;
 528  0
   }
 529  
 
 530  
 
 531  
   /**
 532  
    * Get whether nearest neighbours are being weighted by distance
 533  
    *
 534  
    * @return m_weightByDiffernce
 535  
    */
 536  
   public boolean getWeightByDistance () {
 537  0
     return  m_weightByDistance;
 538  
   }
 539  
 
 540  
 
 541  
   /**
 542  
    * Gets the current settings of ReliefFAttributeEval.
 543  
    *
 544  
    * @return an array of strings suitable for passing to setOptions()
 545  
    */
 546  
   public String[] getOptions () {
 547  0
     String[] options = new String[9];
 548  0
     int current = 0;
 549  
 
 550  0
     if (getWeightByDistance()) {
 551  0
       options[current++] = "-W";
 552  
     }
 553  
 
 554  0
     options[current++] = "-M";
 555  0
     options[current++] = "" + getSampleSize();
 556  0
     options[current++] = "-D";
 557  0
     options[current++] = "" + getSeed();
 558  0
     options[current++] = "-K";
 559  0
     options[current++] = "" + getNumNeighbours();
 560  
     
 561  0
     if (getWeightByDistance()) {
 562  0
       options[current++] = "-A";
 563  0
       options[current++] = "" + getSigma();
 564  
     }
 565  
 
 566  0
     while (current < options.length) {
 567  0
       options[current++] = "";
 568  
     }
 569  
 
 570  0
     return  options;
 571  
   }
 572  
 
 573  
 
 574  
   /**
 575  
    * Return a description of the ReliefF attribute evaluator.
 576  
    *
 577  
    * @return a description of the evaluator as a String.
 578  
    */
 579  
   public String toString () {
 580  0
     StringBuffer text = new StringBuffer();
 581  
 
 582  0
     if (m_trainInstances == null) {
 583  0
       text.append("ReliefF feature evaluator has not been built yet\n");
 584  
     }
 585  
     else {
 586  0
       text.append("\tReliefF Ranking Filter");
 587  0
       text.append("\n\tInstances sampled: ");
 588  
 
 589  0
       if (m_sampleM == -1) {
 590  0
         text.append("all\n");
 591  
       }
 592  
       else {
 593  0
         text.append(m_sampleM + "\n");
 594  
       }
 595  
 
 596  0
       text.append("\tNumber of nearest neighbours (k): " + m_Knn + "\n");
 597  
 
 598  0
       if (m_weightByDistance) {
 599  0
         text.append("\tExponentially decreasing (with distance) " 
 600  
                     + "influence for\n" 
 601  
                     + "\tnearest neighbours. Sigma: " 
 602  
                     + m_sigma + "\n");
 603  
       }
 604  
       else {
 605  0
         text.append("\tEqual influence nearest neighbours\n");
 606  
       }
 607  
     }
 608  
 
 609  0
     return  text.toString();
 610  
   }
 611  
 
 612  
   /**
 613  
    * Returns the capabilities of this evaluator.
 614  
    *
 615  
    * @return            the capabilities of this evaluator
 616  
    * @see               Capabilities
 617  
    */
 618  
   public Capabilities getCapabilities() {
 619  0
     Capabilities result = super.getCapabilities();
 620  0
     result.disableAll();
 621  
     
 622  
     // attributes
 623  0
     result.enable(Capability.NOMINAL_ATTRIBUTES);
 624  0
     result.enable(Capability.NUMERIC_ATTRIBUTES);
 625  0
     result.enable(Capability.DATE_ATTRIBUTES);
 626  0
     result.enable(Capability.MISSING_VALUES);
 627  
     
 628  
     // class
 629  0
     result.enable(Capability.NOMINAL_CLASS);
 630  0
     result.enable(Capability.NUMERIC_CLASS);
 631  0
     result.enable(Capability.DATE_CLASS);
 632  0
     result.enable(Capability.MISSING_CLASS_VALUES);
 633  
     
 634  0
     return result;
 635  
   }
 636  
 
 637  
   /**
 638  
    * Initializes a ReliefF attribute evaluator. 
 639  
    *
 640  
    * @param data set of instances serving as training data 
 641  
    * @throws Exception if the evaluator has not been 
 642  
    * generated successfully
 643  
    */
 644  
   public void buildEvaluator (Instances data)
 645  
     throws Exception {
 646  
     
 647  
     int z, totalInstances;
 648  0
     Random r = new Random(m_seed);
 649  
 
 650  
     // can evaluator handle data?
 651  0
     getCapabilities().testWithFail(data);
 652  
 
 653  0
     m_trainInstances = data;
 654  0
     m_classIndex = m_trainInstances.classIndex();
 655  0
     m_numAttribs = m_trainInstances.numAttributes();
 656  0
     m_numInstances = m_trainInstances.numInstances();
 657  
 
 658  0
     if (m_trainInstances.attribute(m_classIndex).isNumeric()) {
 659  0
       m_numericClass = true;
 660  
     }
 661  
     else {
 662  0
       m_numericClass = false;
 663  
     }
 664  
 
 665  0
     if (!m_numericClass) {
 666  0
       m_numClasses = m_trainInstances.attribute(m_classIndex).numValues();
 667  
     }
 668  
     else {
 669  0
       m_ndc = 0;
 670  0
       m_numClasses = 1;
 671  0
       m_nda = new double[m_numAttribs];
 672  0
       m_ndcda = new double[m_numAttribs];
 673  
     }
 674  
 
 675  0
     if (m_weightByDistance) // set up the rank based weights
 676  
       {
 677  0
         m_weightsByRank = new double[m_Knn];
 678  
 
 679  0
         for (int i = 0; i < m_Knn; i++) {
 680  0
           m_weightsByRank[i] = 
 681  
             Math.exp(-((i/(double)m_sigma)*(i/(double)m_sigma)));
 682  
         }
 683  
       }
 684  
 
 685  
     // the final attribute weights
 686  0
     m_weights = new double[m_numAttribs];
 687  
     // num classes (1 for numeric class) knn neighbours, 
 688  
     // and 0 = distance, 1 = instance index
 689  0
     m_karray = new double[m_numClasses][m_Knn][2];
 690  
 
 691  0
     if (!m_numericClass) {
 692  0
       m_classProbs = new double[m_numClasses];
 693  
 
 694  0
       for (int i = 0; i < m_numInstances; i++) {
 695  0
         m_classProbs[(int)m_trainInstances.instance(i).value(m_classIndex)]++;
 696  
       }
 697  
 
 698  0
       for (int i = 0; i < m_numClasses; i++) {
 699  0
         m_classProbs[i] /= m_numInstances;
 700  
       }
 701  
     }
 702  
 
 703  0
     m_worst = new double[m_numClasses];
 704  0
     m_index = new int[m_numClasses];
 705  0
     m_stored = new int[m_numClasses];
 706  0
     m_minArray = new double[m_numAttribs];
 707  0
     m_maxArray = new double[m_numAttribs];
 708  
 
 709  0
     for (int i = 0; i < m_numAttribs; i++) {
 710  0
       m_minArray[i] = m_maxArray[i] = Double.NaN;
 711  
     }
 712  
 
 713  0
     for (int i = 0; i < m_numInstances; i++) {
 714  0
       updateMinMax(m_trainInstances.instance(i));
 715  
     }
 716  
     
 717  0
     if ((m_sampleM > m_numInstances) || (m_sampleM < 0)) {
 718  0
       totalInstances = m_numInstances;
 719  
     }
 720  
     else {
 721  0
       totalInstances = m_sampleM;
 722  
     }
 723  
 
 724  
     // process each instance, updating attribute weights
 725  0
     for (int i = 0; i < totalInstances; i++) {
 726  0
       if (totalInstances == m_numInstances) {
 727  0
         z = i;
 728  
       }
 729  
       else {
 730  0
         z = r.nextInt()%m_numInstances;
 731  
       }
 732  
 
 733  0
       if (z < 0) {
 734  0
         z *= -1;
 735  
       }
 736  
 
 737  0
       if (!(m_trainInstances.instance(z).isMissing(m_classIndex))) {
 738  
         // first clear the knn and worst index stuff for the classes
 739  0
         for (int j = 0; j < m_numClasses; j++) {
 740  0
           m_index[j] = m_stored[j] = 0;
 741  
 
 742  0
           for (int k = 0; k < m_Knn; k++) {
 743  0
             m_karray[j][k][0] = m_karray[j][k][1] = 0;
 744  
           }
 745  
         }
 746  
 
 747  0
         findKHitMiss(z);
 748  
 
 749  0
         if (m_numericClass) {
 750  0
           updateWeightsNumericClass(z);
 751  
         }
 752  
         else {
 753  0
           updateWeightsDiscreteClass(z);
 754  
         }
 755  
       }
 756  
     }
 757  
 
 758  
     // now scale weights by 1/m_numInstances (nominal class) or
 759  
     // calculate weights numeric class
 760  
     // System.out.println("num inst:"+m_numInstances+" r_ndc:"+r_ndc);
 761  0
     for (int i = 0; i < m_numAttribs; i++) {if (i != m_classIndex) {
 762  0
       if (m_numericClass) {
 763  0
         m_weights[i] = m_ndcda[i]/m_ndc - 
 764  
           ((m_nda[i] - m_ndcda[i])/((double)totalInstances - m_ndc));
 765  
       }
 766  
       else {
 767  0
         m_weights[i] *= (1.0/(double)totalInstances);
 768  
       }
 769  
 
 770  
       //          System.out.println(r_weights[i]);
 771  
     }
 772  
     }
 773  0
   }
 774  
 
 775  
 
 776  
   /**
 777  
    * Evaluates an individual attribute using ReliefF's instance based approach.
 778  
    * The actual work is done by buildEvaluator which evaluates all features.
 779  
    *
 780  
    * @param attribute the index of the attribute to be evaluated
 781  
    * @throws Exception if the attribute could not be evaluated
 782  
    */
 783  
   public double evaluateAttribute (int attribute)
 784  
     throws Exception {
 785  0
     return  m_weights[attribute];
 786  
   }
 787  
 
 788  
 
 789  
   /**
 790  
    * Reset options to their default values
 791  
    */
 792  
   protected void resetOptions () {
 793  0
     m_trainInstances = null;
 794  0
     m_sampleM = -1;
 795  0
     m_Knn = 10;
 796  0
     m_sigma = 2;
 797  0
     m_weightByDistance = false;
 798  0
     m_seed = 1;
 799  0
   }
 800  
 
 801  
 
 802  
   /**
 803  
    * Normalizes a given value of a numeric attribute.
 804  
    *
 805  
    * @param x the value to be normalized
 806  
    * @param i the attribute's index
 807  
    * @return the normalized value
 808  
    */
 809  
   private double norm (double x, int i) {
 810  0
     if (Double.isNaN(m_minArray[i]) || 
 811  
         Utils.eq(m_maxArray[i], m_minArray[i])) {
 812  0
       return  0;
 813  
     }
 814  
     else {
 815  0
       return  (x - m_minArray[i])/(m_maxArray[i] - m_minArray[i]);
 816  
     }
 817  
   }
 818  
 
 819  
 
 820  
   /**
 821  
    * Updates the minimum and maximum values for all the attributes
 822  
    * based on a new instance.
 823  
    *
 824  
    * @param instance the new instance
 825  
    */
 826  
   private void updateMinMax (Instance instance) {
 827  
     //    for (int j = 0; j < m_numAttribs; j++) {
 828  
     try {
 829  0
       for (int j = 0; j < instance.numValues(); j++) {
 830  0
         if ((instance.attributeSparse(j).isNumeric()) && 
 831  
             (!instance.isMissingSparse(j))) {
 832  0
           if (Double.isNaN(m_minArray[instance.index(j)])) {
 833  0
             m_minArray[instance.index(j)] = instance.valueSparse(j);
 834  0
             m_maxArray[instance.index(j)] = instance.valueSparse(j);
 835  
           }
 836  
         else {
 837  0
           if (instance.valueSparse(j) < m_minArray[instance.index(j)]) {
 838  0
             m_minArray[instance.index(j)] = instance.valueSparse(j);
 839  
           }
 840  
           else {
 841  0
             if (instance.valueSparse(j) > m_maxArray[instance.index(j)]) {
 842  0
               m_maxArray[instance.index(j)] = instance.valueSparse(j);
 843  
             }
 844  
           }
 845  
         }
 846  
         }
 847  
       }
 848  0
     } catch (Exception ex) {
 849  0
       System.err.println(ex);
 850  0
       ex.printStackTrace();
 851  0
     }
 852  0
   }
 853  
 
 854  
   /**
 855  
    * Computes the difference between two given attribute
 856  
    * values.
 857  
    */
 858  
   private double difference(int index, double val1, double val2) {
 859  
 
 860  0
     switch (m_trainInstances.attribute(index).type()) {
 861  
     case Attribute.NOMINAL:
 862  
       
 863  
       // If attribute is nominal
 864  0
       if (Utils.isMissingValue(val1) || 
 865  
           Utils.isMissingValue(val2)) {
 866  0
         return (1.0 - (1.0/((double)m_trainInstances.
 867  
                             attribute(index).numValues())));
 868  0
       } else if ((int)val1 != (int)val2) {
 869  0
         return 1;
 870  
       } else {
 871  0
         return 0;
 872  
       }
 873  
     case Attribute.NUMERIC:
 874  
 
 875  
       // If attribute is numeric
 876  0
       if (Utils.isMissingValue(val1) || 
 877  
           Utils.isMissingValue(val2)) {
 878  0
         if (Utils.isMissingValue(val1) && 
 879  
             Utils.isMissingValue(val2)) {
 880  0
           return 1;
 881  
         } else {
 882  
           double diff;
 883  0
           if (Utils.isMissingValue(val2)) {
 884  0
             diff = norm(val1, index);
 885  
           } else {
 886  0
             diff = norm(val2, index);
 887  
           }
 888  0
           if (diff < 0.5) {
 889  0
             diff = 1.0 - diff;
 890  
           }
 891  0
           return diff;
 892  
         }
 893  
       } else {
 894  0
         return Math.abs(norm(val1, index) - norm(val2, index));
 895  
       }
 896  
     default:
 897  0
       return 0;
 898  
     }
 899  
   }
 900  
 
 901  
   /**
 902  
    * Calculates the distance between two instances
 903  
    *
 904  
    * @param first the first instance
 905  
    * @param second the second instance
 906  
    * @return the distance between the two given instances, between 0 and 1
 907  
    */          
 908  
   private double distance(Instance first, Instance second) {  
 909  
 
 910  0
     double distance = 0;
 911  
     int firstI, secondI;
 912  
 
 913  0
     for (int p1 = 0, p2 = 0; 
 914  0
          p1 < first.numValues() || p2 < second.numValues();) {
 915  0
       if (p1 >= first.numValues()) {
 916  0
         firstI = m_trainInstances.numAttributes();
 917  
       } else {
 918  0
         firstI = first.index(p1); 
 919  
       }
 920  0
       if (p2 >= second.numValues()) {
 921  0
         secondI = m_trainInstances.numAttributes();
 922  
       } else {
 923  0
         secondI = second.index(p2);
 924  
       }
 925  0
       if (firstI == m_trainInstances.classIndex()) {
 926  0
         p1++; continue;
 927  
       } 
 928  0
       if (secondI == m_trainInstances.classIndex()) {
 929  0
         p2++; continue;
 930  
       } 
 931  
       double diff;
 932  0
       if (firstI == secondI) {
 933  0
         diff = difference(firstI, 
 934  
                           first.valueSparse(p1),
 935  
                           second.valueSparse(p2));
 936  0
         p1++; p2++;
 937  0
       } else if (firstI > secondI) {
 938  0
         diff = difference(secondI, 
 939  
                           0, second.valueSparse(p2));
 940  0
         p2++;
 941  
       } else {
 942  0
         diff = difference(firstI, 
 943  
                           first.valueSparse(p1), 0);
 944  0
         p1++;
 945  
       }
 946  
       //      distance += diff * diff;
 947  0
       distance += diff;
 948  0
     }
 949  
     
 950  
     //    return Math.sqrt(distance / m_NumAttributesUsed);
 951  0
     return distance;
 952  
   }
 953  
 
 954  
 
 955  
   /**
 956  
    * update attribute weights given an instance when the class is numeric
 957  
    *
 958  
    * @param instNum the index of the instance to use when updating weights
 959  
    */
 960  
   private void updateWeightsNumericClass (int instNum) {
 961  
     int i, j;
 962  
     double temp,temp2;
 963  0
     int[] tempSorted = null;
 964  0
     double[] tempDist = null;
 965  0
     double distNorm = 1.0;
 966  
     int firstI, secondI;
 967  
 
 968  0
     Instance inst = m_trainInstances.instance(instNum);
 969  
    
 970  
     // sort nearest neighbours and set up normalization variable
 971  0
     if (m_weightByDistance) {
 972  0
       tempDist = new double[m_stored[0]];
 973  
 
 974  0
       for (j = 0, distNorm = 0; j < m_stored[0]; j++) {
 975  
         // copy the distances
 976  0
         tempDist[j] = m_karray[0][j][0];
 977  
         // sum normalizer
 978  0
         distNorm += m_weightsByRank[j];
 979  
       }
 980  
 
 981  0
       tempSorted = Utils.sort(tempDist);
 982  
     }
 983  
 
 984  0
     for (i = 0; i < m_stored[0]; i++) {
 985  
       // P diff prediction (class) given nearest instances
 986  0
       if (m_weightByDistance) {
 987  0
         temp = difference(m_classIndex, 
 988  
                           inst.value(m_classIndex),
 989  
                           m_trainInstances.
 990  
                           instance((int)m_karray[0][tempSorted[i]][1]).
 991  
                           value(m_classIndex));
 992  0
         temp *= (m_weightsByRank[i]/distNorm);
 993  
       }
 994  
       else {
 995  0
         temp = difference(m_classIndex, 
 996  
                           inst.value(m_classIndex), 
 997  
                           m_trainInstances.
 998  
                           instance((int)m_karray[0][i][1]).
 999  
                           value(m_classIndex));
 1000  0
         temp *= (1.0/(double)m_stored[0]); // equal influence
 1001  
       }
 1002  
 
 1003  0
       m_ndc += temp;
 1004  
 
 1005  
       Instance cmp;
 1006  0
       cmp = (m_weightByDistance) 
 1007  
         ? m_trainInstances.instance((int)m_karray[0][tempSorted[i]][1])
 1008  
         : m_trainInstances.instance((int)m_karray[0][i][1]);
 1009  
  
 1010  0
       double temp_diffP_diffA_givNearest = 
 1011  
         difference(m_classIndex, inst.value(m_classIndex),
 1012  
                    cmp.value(m_classIndex));
 1013  
       // now the attributes
 1014  0
       for (int p1 = 0, p2 = 0; 
 1015  0
            p1 < inst.numValues() || p2 < cmp.numValues();) {
 1016  0
         if (p1 >= inst.numValues()) {
 1017  0
           firstI = m_trainInstances.numAttributes();
 1018  
         } else {
 1019  0
           firstI = inst.index(p1); 
 1020  
         }
 1021  0
         if (p2 >= cmp.numValues()) {
 1022  0
           secondI = m_trainInstances.numAttributes();
 1023  
         } else {
 1024  0
           secondI = cmp.index(p2);
 1025  
         }
 1026  0
         if (firstI == m_trainInstances.classIndex()) {
 1027  0
           p1++; continue;
 1028  
         } 
 1029  0
         if (secondI == m_trainInstances.classIndex()) {
 1030  0
           p2++; continue;
 1031  
         } 
 1032  0
         temp = 0.0;
 1033  0
         temp2 = 0.0;
 1034  
       
 1035  0
         if (firstI == secondI) {
 1036  0
           j = firstI;
 1037  0
           temp = difference(j, inst.valueSparse(p1), cmp.valueSparse(p2)); 
 1038  0
           p1++;p2++;
 1039  0
         } else if (firstI > secondI) {
 1040  0
           j = secondI;
 1041  0
           temp = difference(j, 0, cmp.valueSparse(p2));
 1042  0
           p2++;
 1043  
         } else {
 1044  0
           j = firstI;
 1045  0
           temp = difference(j, inst.valueSparse(p1), 0);
 1046  0
           p1++;
 1047  
         } 
 1048  
        
 1049  0
         temp2 = temp_diffP_diffA_givNearest * temp; 
 1050  
         // P of different prediction and different att value given
 1051  
         // nearest instances
 1052  0
         if (m_weightByDistance) {
 1053  0
           temp2 *= (m_weightsByRank[i]/distNorm);
 1054  
         }
 1055  
         else {
 1056  0
           temp2 *= (1.0/(double)m_stored[0]); // equal influence
 1057  
         }
 1058  
 
 1059  0
         m_ndcda[j] += temp2;
 1060  
        
 1061  
         // P of different attribute val given nearest instances
 1062  0
         if (m_weightByDistance) {
 1063  0
           temp *= (m_weightsByRank[i]/distNorm);
 1064  
         }
 1065  
         else {
 1066  0
           temp *= (1.0/(double)m_stored[0]); // equal influence
 1067  
         }
 1068  
 
 1069  0
         m_nda[j] += temp;
 1070  
       }
 1071  
     }
 1072  0
   }
 1073  
 
 1074  
 
 1075  
   /**
 1076  
    * update attribute weights given an instance when the class is discrete
 1077  
    *
 1078  
    * @param instNum the index of the instance to use when updating weights
 1079  
    */
 1080  
   private void updateWeightsDiscreteClass (int instNum) {
 1081  
     int i, j, k;
 1082  
     int cl;
 1083  0
     double temp_diff, w_norm = 1.0;
 1084  
     double[] tempDistClass;
 1085  0
     int[] tempSortedClass = null;
 1086  0
     double distNormClass = 1.0;
 1087  
     double[] tempDistAtt;
 1088  0
     int[][] tempSortedAtt = null;
 1089  0
     double[] distNormAtt = null;
 1090  
     int firstI, secondI;
 1091  
 
 1092  
     // store the indexes (sparse instances) of non-zero elements
 1093  0
     Instance inst = m_trainInstances.instance(instNum);
 1094  
 
 1095  
     // get the class of this instance
 1096  0
     cl = (int)m_trainInstances.instance(instNum).value(m_classIndex);
 1097  
 
 1098  
     // sort nearest neighbours and set up normalization variables
 1099  0
     if (m_weightByDistance) {
 1100  
       // do class (hits) first
 1101  
       // sort the distances
 1102  0
       tempDistClass = new double[m_stored[cl]];
 1103  
 
 1104  0
       for (j = 0, distNormClass = 0; j < m_stored[cl]; j++) {
 1105  
         // copy the distances
 1106  0
         tempDistClass[j] = m_karray[cl][j][0];
 1107  
         // sum normalizer
 1108  0
         distNormClass += m_weightsByRank[j];
 1109  
       }
 1110  
 
 1111  0
       tempSortedClass = Utils.sort(tempDistClass);
 1112  
       // do misses (other classes)
 1113  0
       tempSortedAtt = new int[m_numClasses][1];
 1114  0
       distNormAtt = new double[m_numClasses];
 1115  
 
 1116  0
       for (k = 0; k < m_numClasses; k++) {
 1117  0
         if (k != cl) // already done cl
 1118  
           {
 1119  
             // sort the distances
 1120  0
             tempDistAtt = new double[m_stored[k]];
 1121  
 
 1122  0
             for (j = 0, distNormAtt[k] = 0; j < m_stored[k]; j++) {
 1123  
               // copy the distances
 1124  0
               tempDistAtt[j] = m_karray[k][j][0];
 1125  
               // sum normalizer
 1126  0
               distNormAtt[k] += m_weightsByRank[j];
 1127  
             }
 1128  
 
 1129  0
             tempSortedAtt[k] = Utils.sort(tempDistAtt);
 1130  
           }
 1131  
       }
 1132  
     }
 1133  
 
 1134  0
     if (m_numClasses > 2) {
 1135  
       // the amount of probability space left after removing the
 1136  
       // probability of this instance's class value
 1137  0
       w_norm = (1.0 - m_classProbs[cl]);
 1138  
     }
 1139  
     
 1140  
     // do the k nearest hits of the same class
 1141  0
     for (j = 0, temp_diff = 0.0; j < m_stored[cl]; j++) {
 1142  
       Instance cmp;
 1143  0
       cmp = (m_weightByDistance) 
 1144  
         ? m_trainInstances.
 1145  
         instance((int)m_karray[cl][tempSortedClass[j]][1])
 1146  
         : m_trainInstances.instance((int)m_karray[cl][j][1]);
 1147  
 
 1148  0
       for (int p1 = 0, p2 = 0; 
 1149  0
            p1 < inst.numValues() || p2 < cmp.numValues();) {
 1150  0
         if (p1 >= inst.numValues()) {
 1151  0
           firstI = m_trainInstances.numAttributes();
 1152  
         } else {
 1153  0
           firstI = inst.index(p1); 
 1154  
         }
 1155  0
         if (p2 >= cmp.numValues()) {
 1156  0
           secondI = m_trainInstances.numAttributes();
 1157  
         } else {
 1158  0
           secondI = cmp.index(p2);
 1159  
         }
 1160  0
         if (firstI == m_trainInstances.classIndex()) {
 1161  0
           p1++; continue;
 1162  
         } 
 1163  0
         if (secondI == m_trainInstances.classIndex()) {
 1164  0
           p2++; continue;
 1165  
         } 
 1166  0
         if (firstI == secondI) {
 1167  0
           i = firstI;
 1168  0
           temp_diff = difference(i, inst.valueSparse(p1), 
 1169  
                                  cmp.valueSparse(p2)); 
 1170  0
           p1++;p2++;
 1171  0
         } else if (firstI > secondI) {
 1172  0
           i = secondI;
 1173  0
           temp_diff = difference(i, 0, cmp.valueSparse(p2));
 1174  0
           p2++;
 1175  
         } else {
 1176  0
           i = firstI;
 1177  0
           temp_diff = difference(i, inst.valueSparse(p1), 0);
 1178  0
           p1++;
 1179  
         } 
 1180  
         
 1181  0
         if (m_weightByDistance) {
 1182  0
           temp_diff *=
 1183  
             (m_weightsByRank[j]/distNormClass);
 1184  
         } else {
 1185  0
           if (m_stored[cl] > 0) {
 1186  0
             temp_diff /= (double)m_stored[cl];
 1187  
           }
 1188  
         }
 1189  0
         m_weights[i] -= temp_diff;
 1190  
 
 1191  
       }
 1192  
     }
 1193  
       
 1194  
 
 1195  
     // now do k nearest misses from each of the other classes
 1196  0
     temp_diff = 0.0;
 1197  
 
 1198  0
     for (k = 0; k < m_numClasses; k++) {
 1199  0
       if (k != cl) // already done cl
 1200  
         {
 1201  0
           for (j = 0; j < m_stored[k]; j++) {
 1202  
             Instance cmp;
 1203  0
             cmp = (m_weightByDistance) 
 1204  
               ? m_trainInstances.
 1205  
               instance((int)m_karray[k][tempSortedAtt[k][j]][1])
 1206  
               : m_trainInstances.instance((int)m_karray[k][j][1]);
 1207  
         
 1208  0
             for (int p1 = 0, p2 = 0; 
 1209  0
                  p1 < inst.numValues() || p2 < cmp.numValues();) {
 1210  0
               if (p1 >= inst.numValues()) {
 1211  0
                 firstI = m_trainInstances.numAttributes();
 1212  
               } else {
 1213  0
                 firstI = inst.index(p1); 
 1214  
               }
 1215  0
               if (p2 >= cmp.numValues()) {
 1216  0
                 secondI = m_trainInstances.numAttributes();
 1217  
               } else {
 1218  0
                 secondI = cmp.index(p2);
 1219  
               }
 1220  0
               if (firstI == m_trainInstances.classIndex()) {
 1221  0
                 p1++; continue;
 1222  
               } 
 1223  0
               if (secondI == m_trainInstances.classIndex()) {
 1224  0
                 p2++; continue;
 1225  
               } 
 1226  0
               if (firstI == secondI) {
 1227  0
                 i = firstI;
 1228  0
                 temp_diff = difference(i, inst.valueSparse(p1), 
 1229  
                                        cmp.valueSparse(p2)); 
 1230  0
                 p1++;p2++;
 1231  0
               } else if (firstI > secondI) {
 1232  0
                 i = secondI;
 1233  0
                 temp_diff = difference(i, 0, cmp.valueSparse(p2));
 1234  0
                 p2++;
 1235  
               } else {
 1236  0
                 i = firstI;
 1237  0
                 temp_diff = difference(i, inst.valueSparse(p1), 0);
 1238  0
                 p1++;
 1239  
               } 
 1240  
 
 1241  0
               if (m_weightByDistance) {
 1242  0
                 temp_diff *=
 1243  
                   (m_weightsByRank[j]/distNormAtt[k]);
 1244  
               }
 1245  
               else {
 1246  0
                 if (m_stored[k] > 0) {
 1247  0
                   temp_diff /= (double)m_stored[k];
 1248  
                 }
 1249  
               }
 1250  0
               if (m_numClasses > 2) {
 1251  0
                 m_weights[i] += ((m_classProbs[k]/w_norm)*temp_diff);
 1252  
               } else {
 1253  0
                 m_weights[i] += temp_diff;
 1254  
               }
 1255  
             }
 1256  
           }
 1257  
         }
 1258  
     }
 1259  0
   }
 1260  
 
 1261  
 
 1262  
   /**
 1263  
    * Find the K nearest instances to supplied instance if the class is numeric,
 1264  
    * or the K nearest Hits (same class) and Misses (K from each of the other
 1265  
    * classes) if the class is discrete.
 1266  
    *
 1267  
    * @param instNum the index of the instance to find nearest neighbours of
 1268  
    */
 1269  
   private void findKHitMiss (int instNum) {
 1270  
     int i, j;
 1271  
     int cl;
 1272  
     double ww;
 1273  0
     double temp_diff = 0.0;
 1274  0
     Instance thisInst = m_trainInstances.instance(instNum);
 1275  
 
 1276  0
     for (i = 0; i < m_numInstances; i++) {
 1277  0
       if (i != instNum) {
 1278  0
         Instance cmpInst = m_trainInstances.instance(i);
 1279  0
         temp_diff = distance(cmpInst, thisInst);
 1280  
 
 1281  
         // class of this training instance or 0 if numeric
 1282  0
         if (m_numericClass) {
 1283  0
           cl = 0;
 1284  
         }
 1285  
         else {
 1286  0
           cl = (int)m_trainInstances.instance(i).value(m_classIndex);
 1287  
         }
 1288  
 
 1289  
         // add this diff to the list for the class of this instance
 1290  0
         if (m_stored[cl] < m_Knn) {
 1291  0
           m_karray[cl][m_stored[cl]][0] = temp_diff;
 1292  0
           m_karray[cl][m_stored[cl]][1] = i;
 1293  0
           m_stored[cl]++;
 1294  
 
 1295  
           // note the worst diff for this class
 1296  0
           for (j = 0, ww = -1.0; j < m_stored[cl]; j++) {
 1297  0
             if (m_karray[cl][j][0] > ww) {
 1298  0
               ww = m_karray[cl][j][0];
 1299  0
               m_index[cl] = j;
 1300  
             }
 1301  
           }
 1302  
 
 1303  0
           m_worst[cl] = ww;
 1304  
         }
 1305  
         else 
 1306  
           /* if we already have stored knn for this class then check to
 1307  
              see if this instance is better than the worst */
 1308  
           {
 1309  0
             if (temp_diff < m_karray[cl][m_index[cl]][0]) {
 1310  0
               m_karray[cl][m_index[cl]][0] = temp_diff;
 1311  0
               m_karray[cl][m_index[cl]][1] = i;
 1312  
 
 1313  0
               for (j = 0, ww = -1.0; j < m_stored[cl]; j++) {
 1314  0
                 if (m_karray[cl][j][0] > ww) {
 1315  0
                   ww = m_karray[cl][j][0];
 1316  0
                   m_index[cl] = j;
 1317  
                 }
 1318  
               }
 1319  
 
 1320  0
               m_worst[cl] = ww;
 1321  
             }
 1322  
           }
 1323  
       }
 1324  
     }
 1325  0
   }
 1326  
   
 1327  
   /**
 1328  
    * Returns the revision string.
 1329  
    * 
 1330  
    * @return                the revision
 1331  
    */
 1332  
   public String getRevision() {
 1333  0
     return RevisionUtils.extract("$Revision: 8034 $");
 1334  
   }
 1335  
 
 1336  
   // ============
 1337  
   // Test method.
 1338  
   // ============
 1339  
   /**
 1340  
    * Main method for testing this class.
 1341  
    *
 1342  
    * @param args the options
 1343  
    */
 1344  
   public static void main (String[] args) {
 1345  0
     runEvaluator(new ReliefFAttributeEval(), args);
 1346  0
   }
 1347  
 }