cvrSecondOrderStatistics.h

Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 1998 - 2005
00003  * Lehrstuhl fuer Technische Informatik, RWTH-Aachen, Germany
00004  *
00005  *
00006  * This file is part of the Computer Vision and Robotics Library (CVR-Lib)
00007  *
00008  * The CVR-Lib is free software; you can redistribute it and/or
00009  * modify it under the terms of the BSD License.
00010  *
00011  * All rights reserved.
00012  *
00013  * Redistribution and use in source and binary forms, with or without
00014  * modification, are permitted provided that the following conditions are met:
00015  *
00016  * 1. Redistributions of source code must retain the above copyright notice,
00017  *    this list of conditions and the following disclaimer.
00018  *
00019  * 2. Redistributions in binary form must reproduce the above copyright notice,
00020  *    this list of conditions and the following disclaimer in the documentation
00021  *    and/or other materials provided with the distribution.
00022  *
00023  * 3. Neither the name of the authors nor the names of its contributors may be
00024  *    used to endorse or promote products derived from this software without
00025  *    specific prior written permission.
00026  *
00027  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
00028  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00029  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00030  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
00031  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
00032  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
00033  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
00034  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
00035  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
00036  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00037  * POSSIBILITY OF SUCH DAMAGE.
00038  */
00039 
00040 
00041 
00042 /**
00043  * \file   cvrSecondOrderStatistics.h
00044  *         Contains the functor secondOrderStatistics, for means,
00045  *         variances and covariance matrix
00046  * \author Jochen Wickel
00047  * \author Peter Doerfler
00048  * \date   25.09.2000
00049  *
00050  * $Id: cvrSecondOrderStatistics.h,v 1.8 2007/10/07 03:17:47 alvarado Exp $
00051  */
00052 
00053 #ifndef _CVR_SECOND_ORDER_STATISTICS_H_
00054 #define _CVR_SECOND_ORDER_STATISTICS_H_
00055 
00056 #include "cvrFunctor.h"
00057 #include "cvrVector.h"
00058 #include "cvrMatrix.h"
00059 #include "cvrDataRepresentationType.h"
00060 
00061 namespace cvr {
00062   /**
00063    * Computes mean, variance, and/or covariance matrix of data
00064    * matrices and vectors. Correlations can be obtained as well.
00065    *
00066    * The first template parameter (T) represents the type of the input data,
00067    * and the second (U) the type of the output data, which defaults to T
00068    * if not specified. The user should ensure that the result values fit into
00069    * U, that is, T*T should be assignable to a U without loss of precision.
00070    *
00071    * \section rcrep Row or column representations
00072    *
00073    * If the data samples are given in form of a matrix data samples
00074    * (points) can be in the rows or columns of the matrix. The CVR-Lib 
00075    * usually assumes rows, but here you can choose via
00076    * parameters::dataRepresentationType.
00077    *
00078    * The following example clarifies the usage of
00079    * eDataRepresentationType. Given the data matrix
00080    *
00081    * \code
00082    * |  5  6  10  2 |
00083    * |  1 -2   4  4 |
00084    * |  3  2   1 -3 |
00085    * \endcode
00086    *
00087    * With the default parameters (Rows, Empirical, no correlation)
00088    * calling variance() yields (4 8 21 13). Switching to Maxlikely yields
00089    * (2.667 5.333 14 8.667).
00090    *
00091    * When the data samples are set to be in Columns the empirical
00092    * variance is (8.667 4.667 2)^T. The Maxlikely variance is
00093    * (6.5 3.5 1.5)^T.
00094    *
00095    * \section vtype Variance type
00096    *
00097    * Variances and covariances can either be calculated Empirical
00098    * (unbiased) or as maximum likelihood (Maxlikely, second
00099    * moments). With N the number of samples, the former normalizes by
00100    * dividing by N-1 the latter by N.
00101    *
00102    * To obtain a correlation matrix instead of a covariance matrix set
00103    * parameters::useCorrelation to true.
00104    *
00105    * Besides the mandatory apply-methods, this functor also offers a number
00106    * of shortcuts to calculate only first or second order
00107    * statistics. However, if both are needed it is always faster to
00108    * call an apply() method that serves your purpose.
00109    *
00110    * \section cplx Complex numbers
00111    *
00112    * This class works with matrices and vectors of complex numbers too.  The
00113    * definition of mean and variance is kept as with real numbers.
00114    *
00115    * \section cplx Complex types
00116    *
00117    * You can also compute the mean, variance and covariance of vectors/scalars
00118    * of complex values (i.e. cvr::complex<float> or cvr::complex<double>).  The
00119    * variace is defined through conjugation as
00120    * 
00121    * \f[
00122    *   \operatorname{var}(x) = E((x-\mu)^*(x-\mu))
00123    * \f]
00124    *
00125    * \ingroup gStatistics
00126    */
00127   template <typename T,  typename U = T>
00128   class secondOrderStatistics : public functor {
00129   public:
00130 
00131     /**
00132      * Type of the variance. It can either be empirical or maximum
00133      * likelihood.
00134      */
00135     enum eVarianceType {
00136       Empirical=0, /**< Empirical Variance:
00137                     *   \f[ \frac{1}{n-1} \sum_i^n (x_i - \mu)^2 \f]
00138                     */
00139       Maxlikely=1  /**< Maximum Likelihood Variance:
00140                     *   \f[ \frac{1}{n} \sum_i^n (x_i - \mu)^2 \f]
00141                     */
00142     };
00143 
00144     /**
00145      * the parameters for the class secondOrderStatistics
00146      */
00147     class parameters : public functor::parameters {
00148     public:
00149 
00150       /**
00151        * Default constructor
00152        */
00153       parameters();
00154 
00155       /**
00156        * Copy constructor
00157        *
00158        * @param other the parameters object to be copied
00159        */
00160       parameters(const parameters& other);
00161 
00162       /**
00163        * Destructor
00164        */
00165       ~parameters();
00166 
00167       /**
00168        * Copy the contents of a parameters object.
00169        *
00170        * @param other the parameters object to be copied
00171        * @return a reference to this parameters object
00172        */
00173       parameters& copy(const parameters& other);
00174 
00175       /**
00176        * Copy the contents of a parameters object
00177        * \param other the parameters object to be copied
00178        * \return a reference to this parameters object
00179        */
00180       parameters& operator=(const parameters& other);
00181 
00182       /**
00183        * Returns the name of this class.
00184        */
00185       const std::string& name() const;
00186 
00187       /**
00188        * Write the parameters in the given ioHandler
00189        *
00190        * @param handler the ioHandler to be used
00191        * @param complete if true (the default) the enclosing begin/end will
00192        *        be also written, otherwise only the data block will be
00193        *        written.
00194        * @return true if write was successful
00195        */
00196       virtual bool write(ioHandler& handler, const bool complete=true) const;
00197 
00198       /**
00199        * Read the parameters from the given ioHandler
00200        *
00201        * @param handler the ioHandler to be used
00202        * @param complete if true (the default) the enclosing begin/end will
00203        *        be also read, otherwise only the data block will be read.
00204        * @return true if write was successful
00205        */
00206       virtual bool read(ioHandler& handler, const bool complete = true);
00207 
00208       /**
00209        * Returns a pointer to a clone of the parameters
00210        */
00211       virtual parameters* clone() const;
00212 
00213       /**
00214        * Returns a pointer to a new instance of the parameters
00215        */
00216       virtual parameters* newInstance() const;
00217 
00218       //***************************************
00219       // The parameters
00220       //***************************************
00221 
00222       /**
00223        * The CVR-Lib usually represents a data sample as a row of a
00224        * matrix. However, by choosing Columns here, you can achieve
00225        * the opposite behavior. This is not recommended since it is
00226        * confusing and much slower!
00227        *
00228        * Default value: Rows
00229        */
00230       eDataRepresentationType dataRepresentationType;
00231 
00232       /**
00233        * The type of the variance computation. If Empirical is used,
00234        * the empirical variance or covariance matrix is computed
00235        * (division by number of samples minus 1), otherwise, the
00236        * maximum likelihood estimator is computed (division by number
00237        * of samples).
00238        *
00239        * Default value: Empirical
00240        */
00241       eVarianceType varianceType;
00242 
00243       /**
00244        * If this flag is true, the covariance matrix is normalized
00245        * to contain the correlation coefficients instead of the
00246        * covariances.
00247        *
00248        * Default value: false
00249        */
00250       bool useCorrelation;
00251     };
00252 
00253     /**
00254      * Default constructor
00255      */
00256     secondOrderStatistics(eDataRepresentationType dataRep=Rows);
00257 
00258     /**
00259      * Copy constructor
00260      *
00261      * @param other the object to be copied
00262      */
00263     secondOrderStatistics(const secondOrderStatistics& other);
00264 
00265     /**
00266      * Constructor that set the parameters to \a param.
00267      *
00268      * @param param uses these parameters
00269      */
00270     secondOrderStatistics(const parameters& param);
00271 
00272     /**
00273      * Destructor
00274      */
00275     virtual ~secondOrderStatistics();
00276 
00277     /**
00278      * Copy data of "other" functor.
00279      *
00280      * @param other the functor to be copied
00281      * @return a reference to this functor object
00282      */
00283     secondOrderStatistics& copy(const secondOrderStatistics& other);
00284 
00285     /**
00286      * Alias for copy member
00287      *
00288      * \param other the functor to be copied
00289      * \return a reference to this functor object
00290      */
00291     secondOrderStatistics& operator=(const secondOrderStatistics& other);
00292 
00293     /**
00294      * Returns the name of this class.
00295      */
00296     const std::string& name() const;
00297 
00298     /**
00299      * Returns a pointer to a clone of this functor.
00300      */
00301     virtual secondOrderStatistics* clone() const;
00302 
00303     /**
00304      * Returns a pointer to a new instance of this functor.
00305      */
00306     virtual secondOrderStatistics* newInstance() const;
00307 
00308     /**
00309      * Returns used parameters
00310      */
00311     const parameters& getParameters() const;
00312 
00313     //--------------------------------------------------
00314     // Apply methods
00315     //--------------------------------------------------
00316 
00317     /**
00318      * Computes the \a mean and \a variance of the given vector.
00319      *
00320      * @param src vector<T> with the source data.
00321      * @param mean mean value of \a src elements
00322      * @param variance variance of the elements of \a src
00323      * @return true if successful, false otherwise.
00324      */
00325     bool apply(const vector<T>& src, U& mean, U& variance) const;
00326 
00327     /**
00328      * Computes the \a mean and \a variance of the elements in the given
00329      * matrix.
00330      *
00331      * @param src matrix<T> with the source data.
00332      * @param mean mean value of \a src elements
00333      * @param variance variance of the elements of \a src
00334      * @return true if successful, false otherwise.
00335      */
00336     bool apply(const matrix<T>& src, U& mean, U& variance) const;
00337 
00338     /**
00339      * Computes the mean %vector and each dimension's variance for the
00340      * data samples given in \a src.
00341      *
00342      * The representation of data sample (point) as Rows or Columns of
00343      * \a src can be chosen via parameters::dataRepresentationType but
00344      * is usually the default: Rows.
00345      *
00346      * In this case each dimension of \a mean and \a variance
00347      * corresponds to the mean value and variance of the
00348      * corresponding column of \a src.
00349      *
00350      * Depending on the value of parameters::varianceType the
00351      * Empirical of Maxlikely variances are calculated.
00352      *
00353      * @param src matrix<T> with the source data.
00354      * @param mean mean vector of \a src rows or columns
00355      * @param variance variances of each sample dimension
00356      * @return true if successful, false otherwise.
00357      */
00358     bool apply(const matrix<T>& src,
00359                vector<U>& mean, vector<U>& variance ) const;
00360 
00361     /**
00362      * Computes the mean %vector and the covariance %matrix for the
00363      * data samples given in \a src.
00364      *
00365      * The representation of data sample (point) as Rows or Columns of
00366      * \a src can be chosen via parameters::dataRepresentationType but
00367      * is usually the default: Rows.
00368      *
00369      * In this case each dimension of \a mean corresponds to the mean
00370      * value of the corresponding column of \a src. The \a covMatrix
00371      * has dimension src.columns() x src.columns().
00372      *
00373      * Depending on the value of parameters::varianceType the
00374      * Empirical of Maxlikely variances are calculated.
00375      *
00376      * If parameters::useCorrelation is true the correlation
00377      * coefficients are calculated instead of the covariances.
00378      *
00379      * @param src matrix<T> with the source data.
00380      * @param mean mean vector of \a src rows or columns
00381      * @param cov covariance matrix
00382      * @return true if successful, false otherwise.
00383      */
00384     bool apply(const matrix<T>& src,
00385                vector<U>& mean, matrix<U>& cov) const;
00386 
00387     //--------------------------------------------------
00388     // shortcut functions
00389     //--------------------------------------------------
00390 
00391     /**
00392      * Computes only the \a mean value of the given \a src %vector.
00393      *
00394      * Do not use this function if you also need the variances, as the
00395      * combined calculation via apply() is much faster.
00396      *
00397      * @param src vector<T> with the source data.
00398      * @param mean mean value of \a src elements
00399      * @return true if successful, false otherwise.
00400      */
00401     bool mean(const vector<T>& src, U& mean) const;
00402 
00403     /**
00404      * Computes only the \a mean %vector of the given \a src data.
00405      *
00406      * Do not use this function if you also need the variances, as the
00407      * combined calculation via apply() is much faster.
00408      *
00409      * Data samples in \a src can either be in Rows (default) or
00410      * Columns. This can be set in \a dataRep.
00411      *
00412      * @param src the source data.
00413      * @param mean mean vector of \a src
00414      * @param dataRep data representation, Rows or Columns
00415      * @return true if successful, false otherwise.
00416      */
00417     bool mean(const matrix<T>& src,
00418               vector<U>& mean,
00419               eDataRepresentationType dataRep=Rows) const;
00420 
00421     /**
00422      * Computes only the \a variance of the given \a src %vector.
00423      *
00424      * Do not use this function if you also need the mean value, as
00425      * the combined calculation via apply() is much faster.
00426      *
00427      * @param src vector<T> with the source data.
00428      * @param variance variance of the elements of \a src
00429      * @return true if successful, false otherwise.
00430      */
00431     bool variance(const vector<T>& src, U& variance) const;
00432 
00433     /**
00434      * Computes only the \a variance %vector of the given \a src data.
00435      *
00436      * Do not use this function if you also need the mean %vector, as
00437      * the combined calculation via apply() is much faster.
00438      *
00439      * Data samples in \a src can either be in Rows (default) or
00440      * Columns. This can be set in \a dataRep. Other parameters are
00441      * used as set in this functor's parameters (see apply()).
00442      *
00443      * @param src the source data.
00444      * @param variance variance of each dimension of \a src
00445      * @param dataRep data representation, Rows or Columns
00446      * @return true if successful, false otherwise.
00447      */
00448     bool variance(const matrix<T>& src,
00449                   vector<U>& variance,
00450                   eDataRepresentationType dataRep=Rows) const;
00451 
00452     /**
00453      * Computes only the covariance %matrix \a covMat of the given \a
00454      * src data.
00455      *
00456      * Do not use this function if you also need the mean %vector, as
00457      * the combined calculation via apply() is much faster.
00458      *
00459      * Data samples in \a src can either be in Rows (default) or
00460      * Columns. This can be set in \a dataRep. Other parameters are
00461      * used as set in this functor's parameters (see apply()).
00462      *
00463      * @param src the source data.
00464      * @param covMat covariance %matrix of \a src
00465      * @param dataRep data representation, Rows or Columns
00466      * @return true if successful, false otherwise.
00467      */
00468     bool covarianceMatrix(const matrix<T>& src,
00469                           matrix<U>& covMat,
00470                           eDataRepresentationType dataRep=Rows) const;
00471 
00472   protected:
00473 
00474     //--------------------------------------------------
00475     // helper functions
00476     //--------------------------------------------------
00477 
00478     /**
00479      * This function computes the mean and variance of each column of
00480      * the matrix. I.e. each row is a data point in n-dimensional
00481      * space).
00482      *
00483      * @param src matrix<T> with the source data.
00484      * @param mu mean row vector;
00485      * @param var variance of row vectors
00486      */
00487     bool applyRows(const matrix<T>& src, vector<U>& mu, vector<U>& var) const;
00488 
00489     /**
00490      * This function computes the mean and variance of each row of
00491      * the matrix. I.e. each column is a data point in n-dimensional
00492      * space).
00493      *
00494      * @param src matrix<T> with the source data.
00495      * @param mu mean column vector;
00496      * @param var variance of column vectors
00497      */
00498     bool applyColumns(const matrix<T>& src,
00499                       vector<U>& mu, vector<U>& var) const;
00500 
00501     /**
00502      * This function computes the mean and covariance matrix of the
00503      * row vectors of the matrix. I.e. each row is a data point in
00504      * n-dimensional space).
00505      *
00506      * @param src matrix<T> with the source data.
00507      * @param mu mean row vector;
00508      * @param cov covariance matrix of row vectors
00509      */
00510     bool applyRows(const matrix<T>& src, vector<U>& mu, matrix<U>& cov) const;
00511 
00512     /**
00513      * This function computes the mean and covariance matrix of the
00514      * row vectors of the matrix. I.e. each row is a data point in
00515      * n-dimensional space).
00516      *
00517      * @param src matrix<T> with the source data.
00518      * @param mu mean row vector;
00519      * @param cov covariance matrix of row vectors
00520      */
00521     bool applyColumns(const matrix<T>& src,
00522                       vector<U>& mu, matrix<U>& cov) const;
00523 
00524     /**
00525      * Converts a covariance matrix into a correlation coefficient matrix.
00526      */
00527     void covar2corrcoef(matrix<U>& cv) const;
00528 
00529   };
00530 }
00531 
00532 #include "cvrSecondOrderStatistics_template.h"
00533 
00534 #endif
00535