cvrDocuClassifiers.h

00001 /*
00002  * Copyright (C) 1998-2006
00003  * Lehrstuhl fuer Technische Informatik, RWTH-Aachen, Germany
00004  *
00005  *
00006  * This file is part of the Computer Vision and Robotics Library (CVR-Lib)
00007  *
00008  * The CVR-Lib is free software; you can redistribute it and/or
00009  * modify it under the terms of the BSD License.
00010  *
00011  * All rights reserved.
00012  *
00013  * Redistribution and use in source and binary forms, with or without
00014  * modification, are permitted provided that the following conditions are met:
00015  *
00016  * 1. Redistributions of source code must retain the above copyright notice,
00017  *    this list of conditions and the following disclaimer.
00018  *
00019  * 2. Redistributions in binary form must reproduce the above copyright notice,
00020  *    this list of conditions and the following disclaimer in the documentation
00021  *    and/or other materials provided with the distribution.
00022  *
00023  * 3. Neither the name of the authors nor the names of its contributors may be
00024  *    used to endorse or promote products derived from this software without
00025  *    specific prior written permission.
00026  *
00027  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
00028  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00029  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00030  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
00031  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
00032  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
00033  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
00034  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
00035  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
00036  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00037  * POSSIBILITY OF SUCH DAMAGE.
00038  */
00039 
00040 
00041 
00042 /*----------------------------------------------------------------
00043  * project ....: LTI Digital Image/Signal Processing Library
00044  * file .......: cvrDocuClassifiers.h
00045  * authors ....: Peter Doerfler
00046  * organization: LTI, RWTH Aachen
00047  * creation ...: 08.11.2002
00048  * revisions ..: $Id: cvrDocuClassifiers.h,v 1.3 2006/01/03 19:56:00 alvarado Exp $
00049  */
00050 
00051 #ifndef _CVR_DOCUCLASSIFIERS_H_
00052 #define _CVR_DOCUCLASSIFIERS_H_
00053 
00054 /**
00055 \page docuClassifiers How to use the Classifiers.
00056 
00057 - \ref types
00058 - \ref internalclasses
00059 - \ref related
00060   - \ref viz
00061   - \ref classStat
00062   - \ref sammon
00063 - \ref unsupervised
00064 - \ref instance
00065 - \ref sequence
00066 
00067 \section types Types of Classifiers
00068 
00069 All classifiers follow the same principle. First, they are trained
00070 using some kind of training data. The trained classifier can then be
00071 used to classify new data, i.e. to assign the same label to the new
00072 data as to the most similar training data.
00073 
00074 Three different categories of classifiers exist in the LTI-Lib:
00075 - unsupervised classifiers
00076 - supervised sequence classifiers
00077 - supervised instance classifiers
00078 
00079 The first category includes all algorithms that are trained without
00080 knowing the proper labels or results for each training
00081 pattern. Clustering algorithms are included here, although most are
00082 not designed for later classifying new data. Supervised classifiers
00083 are trained knowing the expected result for each training
00084 pattern. They are divided in two groups: sequence classifiers work on
00085 time series whereas instance classifiers work on n-dimensional
00086 vectors.
00087 
00088 All classifiers have four essential properties:
00089 
00090 - The \b parameters are static properties of the classifier. All
00091   classifiers have default settings for the parameters
00092   object. However, many classifiers will not work properly without
00093   adjustment of some of the parameters.
00094 - The \b status contains error messages. If any of the methods,
00095   particularly train and classify, return false. Information of the
00096   nature of the failure can be obtained by calling getStatusString().
00097 - The \b progress of the training can be monitored with a
00098   progressObject. See \ref progress.
00099 - The \b outputTemplate contains information on how internal results
00100   are mapped to an ouputVector which is the result of a
00101   classification. This member should only be changed by experienced
00102   users. Usually, the outputTemplate is set by the classifier itself
00103   during the training.
00104 
00105 The results of the classification are returned in an ouputVector. This
00106 object contains labels and corresponding recognition values. These can
00107 often be interpreted as probabilities. Section \ref outvec gives
00108 details about this data structure.
00109 
00110 \section internalclasses Helper Classes
00111 
00112 \subsection progress Progress Objects
00113 
00114 The progressInfo object gives information on how many steps an
00115 algorithm will take until finished and how many have already been
00116 accomplished. Also the name of the classifier is usually given. The
00117 following progress infos exist:
00118 
00119 - cvr::streamProgressInfo writes ASCII into a stream. The default
00120   stream is the console output.
00121 
00122 \subsection params Parameters
00123 
00124 The parameters of the classifier class define an enumeration
00125 eDistanceMeasure which is used to specify which distance is used in
00126 the classifier. Options are the L1 and L2 distances.
00127 
00128 See also cvr::classifier::parameters.
00129 
00130 \subsection outvec Output Vector
00131 
00132 The output vector is the result of a classification, i.e. calling the
00133 classify method of a classifier. It assigns values to the labels. In
00134 the case of supervised classifiers these labels where supplied by the
00135 user during training. In case of unsupervised classification the
00136 classifier usually assigns labels from 0 to C-1 with C the number of
00137 classes found.
00138 
00139 Output vectors can be the final result of a process. In this case they
00140 are usually displayed by an application or used for statistical
00141 analysis of the classification process. For the later the
00142 classificationStatistics %functor can be used. It is also possible to
00143 combine the results of several classifiers using the combination
00144 %functor.
00145 
00146 For further reading see the documentation of cvr::classifier::outputVector.
00147 
00148 \section related Related Topics
00149 
00150 
00151 \subsection viz Visualization of Classifiers and Data
00152 
00153 \subsubsection vizData Visualizing Data
00154 
00155 cvr::draw
00156 cvr::epsDraw
00157 cvr::draw3D
00158 cvr::draw2DDistribution
00159 
00160 \subsubsection vizClass Visualizing Classification Results
00161 
00162 cvr::classifier2DVisualizer
00163 
00164 \subsection classStat Classification Statistics
00165 
00166 cvr::classificationStatistics
00167 
00168 \subsection sammon Sammon's Mapping
00169 
00170 Sammon's Mapping transforms points in n-dimensional space to points in
00171 m-dimensional space while trying to preserve all distances between the
00172 points. Usually, m will be 2 or 3 so that the points can be displayed
00173 using one of the cvr::draw classes and an appropriate
00174 cvr::viewer. Sammon's mapping can be very useful to get an idea of the
00175 distribution of higher dimensional data without losing as much
00176 information as when using e.g. cvr::principalComponents to reduce the
00177 dimensionality.
00178 
00179 However, the mapping is a very difficult task and might easily fail to
00180 converge at a minimum. Check the error to get an idea about the performance. In case it is bad there are several options:
00181 - try to reduce the amount of data (point). This can be done e.g. by
00182   using a cvr::clustering algorithm with the number of clusters a
00183   fraction of the number of data points, say a third. Then use the
00184   cluster centers as data points. This is an easy quantization method.
00185 - reduce the number of dimensions a little e.g. to 10 with
00186   cvr::principalComponents or a feature selection method. Use the
00187   lower dimensional points for Sammon's mapping. Thus, at least you'll
00188   get the information from 10 dimensions.
00189 
00190 For more information see cvr::sammonsMapping.
00191 
00192 */
00193 
00194 /**
00195 \page unsupervised Unsupervised Classifiers
00196 
00197 Unsupervised classifiers are methods that try to find 'natural' or
00198 'sensible' structure in data. To this end each data point is usually
00199 member of a newly found group. A distinct label is assigned to each
00200 group. Some algorithms allow membership of each point to more then one
00201 group. One example is the \ref fcm.
00202 
00203 The most popular application of unsupervised classifiers is data
00204 analysis and possibly visualization. However, some methods are also
00205 used for data compression or quantization.
00206 
00207 This page first deals with clustering techniques. These are usually
00208 statistical methods for finding groups (clusters). The second section
00209 deals with an artificial neural network, the Self-Organizing Feature
00210 Maps, which perform unsupervised classification.
00211 
00212 - \ref cluster
00213   - \ref kMeans
00214   - \ref fcm
00215   - \ref adapt
00216 - \ref som
00217   - \ref som2
00218   - \ref somViz
00219 
00220 Related classes:
00221   - cvr::unsupervisedClassifier
00222 
00223 \section cluster Clustering
00224 
00225 - \ref kMeans
00226 - \ref fcm
00227 - \ref adapt
00228 
00229 \subsection kMeans k-Means Clustering
00230 
00231 cvr::kMeansClustering
00232 
00233 \subsection fcm Fuzzy-C-Means Clustering
00234 
00235 cvr::fuzzyCMeans
00236 
00237 \subsection adapt Adaptive k-Means Clustering
00238 
00239 cvr::adaptiveKMeans
00240 
00241 \section som Self Organizing Feature Maps
00242 
00243 cvr::SOFM
00244 
00245 - \ref som2
00246 - \ref somViz
00247 
00248 \subsection som2 2D SOMs
00249 
00250 cvr::SOFM2D
00251 
00252 \subsection somViz Visualization of SOMs
00253 
00254 Still to be done.
00255 
00256 */
00257 
00258 /**
00259 \page instance Supervised Instance Classifiers
00260 
00261 - \ref stat
00262   - \ref svm
00263   - \ref shClass
00264   - \ref trees
00265 - \ref ann
00266   - \ref mlp
00267   - \ref lvq
00268   - \ref rbf
00269 
00270 \section stat Statistical Classifiers
00271 
00272 - \ref svm
00273 - \ref shClass
00274 - \ref trees
00275 
00276 \subsection svm Support Vector Machine
00277 
00278 cvr::svm
00279 
00280 \subsection shClass Sparse Histogram-based Classifier
00281 
00282 cvr::shClassifier
00283 
00284 \subsection trees Decision Trees
00285 
00286 cvr::decisionTree
00287 
00288 \section ann Artificial Neural Networks
00289 
00290 - \ref mlp
00291 - \ref lvq
00292 - \ref rbf
00293 
00294 \subsection mlp Multi Layer Perceptron
00295 
00296 cvr::MLP
00297 
00298 \subsection lvq Learning Vector Quantization
00299 
00300 cvr::lvq
00301 
00302 \subsection rbf Radial Basis Function Network
00303 
00304 cvr::rbf
00305 
00306 */
00307 
00308 /**
00309 \page sequence Supervised Sequence Classifiers
00310 
00311 cvr::hmmClassifier
00312 */
00313 
00314 
00315 */
00316 
00317 #endif