/**
 * @file
 * $Id$
 * $Revision$
 * $Author$
 * $Date$
 *
 * This file is part of The iWear Framework.
 *
 * The iWear Framework is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by the
 * Free Software Foundation as in version 2 of the License.

 * 
 * The iWear Framework is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 * 
 * You should have received a copy of the GNU General Public License along with
 * The iWear Framework; if not, write to the Free Software Foundation, Inc., 59
 * Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#ifndef __IWEAR_MMATRIX_H
#define __IWEAR_MMATRIX_H

#ifndef __IWEAR_MVECTOR_H
#include <iwear/mvector.h>
#endif

#ifndef __IWEAR_MAPLESTREAM_H
#include <iwear/maplestream.h>
#endif 

#ifndef __IWEAR_IWM_H
#include <iwear/iwm.h>
#endif

#include <algorithm>

namespace iwear
{
    namespace math
    {

template<class T>
ostream& operator<< ( ostream& o, const MathMatrix<T>& mm );

template<class T>
MapleStream& operator<< ( MapleStream& o, const MathMatrix<T>& mm );

template<class T>
class MatrixRepresentation/*{{{*/
{
friend class MathMatrix<T>;
private:
protected:
    uint16_t cdim;
    uint16_t rdim;
    mutable uint16_t references;

    void release( void );
    static MatrixRepresentation<T>* allocate(uint16_t c, uint16_t r);
    MatrixRepresentation<T>* ref_copy( void ) const;
    MatrixRepresentation<T>* real_copy( void ) const;


    const VectorRepresentation<T>* column_at( uint16_t c) const;
    VectorRepresentation<T>* column_at( uint16_t c);
    
public:
    ~MatrixRepresentation( ) { release(); }
    uint16_t rsize( void ) const { return rdim; }
    uint16_t csize( void ) const { return cdim; }
};/*}}}*/

template<class T>
void MatrixRepresentation<T>::release( void )/*{{{*/
{
    //ScopeTracer st(__PRETTY_FUNCTION__);
    if ( references == 0 )
    {
	const MatrixRepresentation<T>& me = *this;
	for ( uint16_t i = 0; i < cdim; i++ )
	{
	    for ( uint16_t j = 0; j < rdim; j++ )
	    {
		me.column_at(i)->elem(j).T::~T();
	    }
	}

	delete[] reinterpret_cast<char*>(this);
    }
    else
    {
	references--;
    }
}/*}}}*/

template<class T>
MatrixRepresentation<T>* MatrixRepresentation<T>::allocate(uint16_t c, uint16_t r)/*{{{*/
{
    //ScopeTracer st(__PRETTY_FUNCTION__);
    // Vectors are colums, so we allocate
    // 1 x Matrix Representation
    // and for each colum (c times)
    // 1 x VectorRepresentation<T> +
    // rows x T
    char* m = new char [c * (sizeof(VectorRepresentation<T>) + r * sizeof(T))  + sizeof(MatrixRepresentation<T>)];
    return reinterpret_cast<MatrixRepresentation<T>*> ( m );
}/*}}}*/

template<class T>
MatrixRepresentation<T>* MatrixRepresentation<T>::ref_copy( void ) const/*{{{*/
{
    //ScopeTracer st(__PRETTY_FUNCTION__);
    if ( references == numeric_limits<uint16_t>::max() )
    {
	return real_copy();
    }
    references++;
    return const_cast<MatrixRepresentation<T>*>(this);
}/*}}}*/

template<class T>
MatrixRepresentation<T>* MatrixRepresentation<T>::real_copy( void ) const/*{{{*/
{
    //ScopeTracer st(__PRETTY_FUNCTION__);
    uint16_t cd = cdim;
    uint16_t rd = rdim;
    MatrixRepresentation<T>* m = MatrixRepresentation<T>::allocate(cd,rd);
    m->references = 0;
    m->cdim = cd;
    m->rdim = rd;

    for ( uint16_t i = 0; i < cdim; i++ )
    {
	m->column_at(i)->references=-1;
	m->column_at(i)->vector_size = rdim;
	for ( uint16_t j = 0; j < rdim; j++ )
	{
	    put_new_element( &(m->column_at(i)->elem(j)), column_at(i)->elem(j));
	}
    }
    return m;
}/*}}}*/

template<class T>
const VectorRepresentation<T>* MatrixRepresentation<T>::column_at( uint16_t c) const/*{{{*/
{
    if ( c >= cdim ) THROW( bounds_error,("Access of unexistant vector in matrix",c,cdim));
    //ScopeTracer st(__PRETTY_FUNCTION__);
    return reinterpret_cast<const VectorRepresentation<T>*> (
    reinterpret_cast<const char*>(this+1) // Start of Vector data
	+ c*(sizeof(VectorRepresentation<T>) + rdim * sizeof(T))
	);
	    
}/*}}}*/

template<class T>
VectorRepresentation<T>* MatrixRepresentation<T>::column_at( uint16_t c)/*{{{*/
{
    if ( c >= cdim ) THROW( bounds_error,("Access of unexistant vector in matrix",c,cdim));
    //ScopeTracer st(__PRETTY_FUNCTION__);
    return reinterpret_cast<VectorRepresentation<T>*> (
    reinterpret_cast<char*>(this+1) // Start of Vector data
	+ c*(sizeof(VectorRepresentation<T>) + rdim * sizeof(T))
	);
	    
}/*}}}*/

template<class T>
class MathMatrix/*{{{*/
{
private:
protected:
    MatrixRepresentation<T>* m;

    VectorRepresentation<T>* column_at( uint16_t );
    const VectorRepresentation<T>* column_at( uint16_t ) const;

    void taint_me(void);

    MathMatrix( MatrixRepresentation<T>* m_ ) : m(m_) { }
public:

    uint16_t rsize( void ) const { return m->rsize(); }
    uint16_t csize( void ) const { return m->csize(); }

    /**
     * Constructs a matrix consisiting of cd columns, each with rd elements (
     * i.e. rd rows) and with default constructed elements...
     */
    MathMatrix( uint16_t cd, uint16_t rd );
    /*!
     * This is some doc
     * for something
     * -# topic one
     *  -# subtopic
     *   -# subsubtopic  
     * - topic two
     * - topic three
     */
    MathMatrix( uint16_t cd, uint16_t rd, const T& t );

    MathMatrix( const MathMatrix& mm );
    template <class S> MathMatrix( const MathMatrix<S>& mm );

    ~MathMatrix();

    /**
     * Access the element at column c, row r
     */
    const T& operator()( uint16_t c, uint16_t r ) const;
    T& operator()( uint16_t c, uint16_t r );

    T& elem( uint16_t c, uint16_t r );
    const T& elem( uint16_t c, uint16_t r ) const;

    /**
     * Returns a Column Vector of the Matrix of column col starting at 0
     */
    MathVectorReference<T> operator[]( uint16_t col );
    const MathVectorReference<T> operator[]( uint16_t col ) const;

    template<class S> MathVector<T> operator*( const MathVector<S>& mm) const;

    template<class S> MathMatrix<T> operator*( const MathMatrix<S>& mm) const;
    template<class S> MathMatrix<T> operator-( const MathMatrix<S>& mm) const;
    template<class S> MathMatrix<T> operator+( const MathMatrix<S>& mm) const;
    template<class S> MathMatrix<T>& operator=( const MathMatrix<S>& mm);
    MathMatrix<T>& operator=( const MathMatrix& mm);

    bool operator==( const MathMatrix& mm ) const;
    bool operator!=( const MathMatrix& mm ) const { return ! operator==(mm); }

    MathMatrix<T> transposed( void ) const;
    MathMatrix<T> inverse( void ) const;
    MathMatrix<T> inverse_gauss_jacobi( void ) const;
    
    MathMatrix<T> solve_gauss_jacobi( const MathMatrix<T>& sm ) const;
/*
    MathMatrix<T> solve_gauss_basic( const MathMatrix<T>& sm ) const;
*/
    MathMatrix<T> solve( const MathMatrix<T>& sm ) const;
    MathVector<T> solve( const MathVector<T>& sv ) const;
    friend ostream& operator<< <T> ( ostream& o, const MathMatrix<T>& mm );
    friend MapleStream& operator<< <T> ( MapleStream& o, const MathMatrix<T>& mm );

    static MathMatrix<T> unit( size_t );
};/*}}}*/

template<class T>
MathMatrix<T> MathMatrix<T>::unit( size_t n )
{
    MathMatrix<T> m(n,n);
    T t;
    for ( size_t i = 0; i < n; i++ )
    {
	m(i,i) = math::Help<T>::neutral_multiplication(t);
    }
    return m;
}

template<class T>
bool MathMatrix<T>::operator==( const MathMatrix& mm ) const/*{{{*/
{
    if ( m == mm.m ) return true;
    uint16_t rd = rsize();
    uint16_t cd = csize();
    if( (mm.csize() != cd) || ( mm.rsize() != rd)) return false;

    math::equals<T> me;
    for ( uint16_t c = 0; c < cd; c++ )
    {
	for ( uint16_t r = 0; r < rd; r++ )
	{
	    if ( ! me(m->column_at(c)->elem(r),mm.m->column_at(c)->elem(r))) return false;
	}
    }
    return true;
}/*}}}*/

template<class T>
inline MathMatrix<T>::MathMatrix( uint16_t cd, uint16_t rd )/*{{{*/
{
    //ScopeTracer st(__PRETTY_FUNCTION__);
    m = MatrixRepresentation<T>::allocate(cd,rd);
    m->references = 0;
    m->cdim = cd;
    m->rdim = rd;

    for ( uint16_t i = 0; i < cd; i++ )
    {
	column_at(i)->references=-1;
	column_at(i)->vector_size = rd;
	for ( uint16_t j = 0; j < rd; j++ )
	{
	    new (&(column_at(i)->elem(j))) T();
	}
    }
}/*}}}*/

template<class T>
inline MathMatrix<T>::MathMatrix( uint16_t cd, uint16_t rd, const T& t )/*{{{*/
{
    //ScopeTracer st(__PRETTY_FUNCTION__);
    m = MatrixRepresentation<T>::allocate(cd,rd);
    m->references = 0;
    m->cdim = cd;
    m->rdim = rd;

    for ( uint16_t i = 0; i < cd; i++ )
    {
	column_at(i)->references=-1;
	column_at(i)->vector_size = rd;
	for ( uint16_t j = 0; j < rd; j++ )
	{
	    put_new_element( &(column_at(i)->elem(j)),t);
	}
    }

}/*}}}*/

template<class T>
template<class S>
inline MathMatrix<T>::MathMatrix( const MathMatrix<S>& mm )/*{{{*/
{    
    //ScopeTracer st(__PRETTY_FUNCTION__);
    uint16_t cd = mm.csize();
    uint16_t rd = mm.rsize();
    m = MatrixRepresentation<T>::allocate(cd,rd);
    m->references = 0;
    m->cdim = cd;
    m->rdim = rd;

    for ( uint16_t i = 0; i < cd; i++ )
    {
	column_at(i)->references=-1;
	column_at(i)->vector_size = rd;
	for ( uint16_t j = 0; j < rd; j++ )
	{
	    put_new_element( &(column_at(i)->elem(j)), mm.elem(i,j));
	}
    }
}/*}}}*/

template<class T>
inline MathMatrix<T>::MathMatrix( const MathMatrix<T>& mm )/*{{{*/
{    
    //ScopeTracer st(__PRETTY_FUNCTION__);
    m = mm.m->ref_copy();
}/*}}}*/

template<class T>
inline void MathMatrix<T>::taint_me(void) /*{{{*/
{
    //ScopeTracer st(__PRETTY_FUNCTION__);
    //cout << "TAINTING MATRIX" << endl;
    if ( m->references > 0 )
    {
	//cout << "DOING REAL COPY" << endl;
	MatrixRepresentation<T>* om = m;
	m = m->real_copy();
	om->release();
    }
}/*}}}*/

template<class T>
inline VectorRepresentation<T>* MathMatrix<T>::column_at( uint16_t c )/*{{{*/
{
    //ScopeTracer st(__PRETTY_FUNCTION__);
    return m->column_at(c);
}/*}}}*/

template<class T>
inline const VectorRepresentation<T>* MathMatrix<T>::column_at( uint16_t c ) const/*{{{*/
{
    //ScopeTracer st(__PRETTY_FUNCTION__);
    return m->column_at(c);
}/*}}}*/

template<class T>
inline const MathVectorReference<T> MathMatrix<T>::operator[]( uint16_t col ) const/*{{{*/
{
    //ScopeTracer st(__PRETTY_FUNCTION__);
    return m->column_at(col);
}/*}}}*/

template<class T>
inline MathVectorReference<T> MathMatrix<T>::operator[]( uint16_t col )/*{{{*/
{
    //ScopeTracer st(__PRETTY_FUNCTION__);
    return m->column_at(col);
}/*}}}*/

template<class T>
inline T& MathMatrix<T>::operator()(uint16_t r, uint16_t c)/*{{{*/
{
    //ScopeTracer st(__PRETTY_FUNCTION__);
    return elem(r,c);
}/*}}}*/

template<class T>
inline const T& MathMatrix<T>::operator()(uint16_t r, uint16_t c) const/*{{{*/
{
    //ScopeTracer st(__PRETTY_FUNCTION__);
    return elem(r,c);
}/*}}}*/

template<class T>
inline T& MathMatrix<T>::elem(uint16_t r, uint16_t c)/*{{{*/
{
    //ScopeTracer st(__PRETTY_FUNCTION__);
    if ( r < rsize() )
    {
	taint_me();
	return column_at(c)->elem(r);
    }
    else
    {
	THROW( bounds_error,("Matrix Row Access out of bounds", r, rsize()));
    }
}/*}}}*/

template<class T>
inline const T& MathMatrix<T>::elem(uint16_t r, uint16_t c) const/*{{{*/
{
    //ScopeTracer st(__PRETTY_FUNCTION__);
    if ( r < rsize() )
    {
	return column_at(c)->elem(r);
    }
    else
    {
	THROW( bounds_error,("Matrix Row Access out of bounds", r, rsize()));
    }
}/*}}}*/

template<class T>
MathMatrix<T>& MathMatrix<T>::operator=( const MathMatrix& mm)/*{{{*/
{
    //ScopeTracer st(__PRETTY_FUNCTION__);
    if ( (rsize() != mm.rsize()) || (csize() != mm.csize())) THROW( iwdomain_error,("Matrix Assignment of incompatible size"));

    m->release();

    m = mm.m->ref_copy();
    return *this;
}/*}}}*/

template<class T>
template<class S> 
MathMatrix<T>& MathMatrix<T>::operator=( const MathMatrix<S>& mm)/*{{{*/
{
    //ScopeTracer st(__PRETTY_FUNCTION__);
    if ( (rsize() != mm.rsize()) || (csize() != mm.csize())) THROW( iwdomain_error,("Matrix Assignment of incompatible size"));

    if ( m->references > 0 )
    {
	m->references--;
	// This hack should allocate the memory and copy the data for us :)
	new (this) MathMatrix<T>(mm);
    }
    else
    {
	uint16_t drs = rsize();
	uint16_t dcs = csize();

	for ( uint16_t rs = 0; rs < drs; rs++ )
	{
	    for ( uint16_t cs = 0; cs < dcs; cs++)
	    {
		assign_mbc( m->column_at(cs)->elem(rs), mm.elem(cs,rs));
	    }
	}
    }
    return *this;
}/*}}}*/

template<class T>
template<class S>
inline MathMatrix<T> MathMatrix<T>::operator*( const MathMatrix<S>& mm) const/*{{{*/
{
    //ScopeTracer st(__PRETTY_FUNCTION__);
    if ( rsize() != mm.csize() ) THROW( iwdomain_error,("Matrix Multiplication of incompatible size"));

    uint16_t drs = mm.rsize();
    uint16_t dcs = csize();

    uint16_t mrs = rsize();
    
    MathMatrix<T> ret(drs,dcs);

    for ( uint16_t rs = 0; rs < drs; rs++)
    {
	for ( uint16_t cs = 0; cs < dcs; cs++ )
	{
	    T tmp = T();
//	   // cout << "c[" << rs << "," << cs << "]=";
	    for ( uint16_t tcc = 0; tcc < mrs; tcc++)
	    {
		tmp += static_cast<T>( 
			elem(rs,tcc) // A[tcc,rs] * 
			* mm(tcc,cs) );		// B[cs,tcc]
//		cout << column_at(rs)->elem(tcc) << "*";
//		cout << mm(cs,tcc) << "+" ;
//		cout << "a[" << tcc << "," << rs << "]*";
//		cout << "b[" << cs  << "," << tcc<< "] +";
	    }
	    ret(rs,cs) = tmp;
//	   // cout << endl;
	}
    }	
    return ret;
}/*}}}*/

template<class T>
template<class S>
MathVector<T> MathMatrix<T>::operator*( const MathVector<S>& mv) const/*{{{*/
{
    //ScopeTracer st(__PRETTY_FUNCTION__);
    if ( rsize() != mv.size() ) THROW( iwdomain_error,("Matrix Multiplication of incompatible size"));

    uint16_t drs = rsize();
    uint16_t dcs = csize();

    MathVector<T> ret(drs);
    for ( uint16_t rs = 0; rs < drs; rs++)
    {
	T tmp = T();	    
	for ( uint16_t tcc = 0; tcc < dcs; tcc++)
	{
	    tmp += static_cast<T>( 
			elem(tcc,rs)
			* mv[tcc] );	
	}
        ret[rs] = tmp;
    }

    return ret;
}/*}}}*/

template<class T>
template<class S>
inline MathMatrix<T> MathMatrix<T>::operator-( const MathMatrix<S>& mm) const/*{{{*/
{
    //ScopeTracer st(__PRETTY_FUNCTION__);
    if ( rsize() != mm.rsize() || csize() != mm.csize() ) 
	THROW( iwdomain_error,("Matrix Subtraction of incompatible size"));

    uint16_t drs = rsize();
    uint16_t dcs = csize();

    MathMatrix<T> ret(drs,dcs);

    for ( uint16_t rs = 0; rs < drs; rs++)
    {
	for ( uint16_t cs = 0; cs < dcs; cs++ )
	{
	    ret(rs,cs) = elem(rs,cs) - mm.elem(rs,cs);  
	}
    }	
    return ret;
}/*}}}*/

template<class T>
template<class S>
inline MathMatrix<T> MathMatrix<T>::operator+( const MathMatrix<S>& mm) const/*{{{*/
{
    //ScopeTracer st(__PRETTY_FUNCTION__);
    if ( rsize() != mm.rsize() || csize() != mm.csize() ) 
	THROW( iwdomain_error,("Matrix Subtraction of incompatible size"));

    uint16_t drs = rsize();
    uint16_t dcs = csize();

    MathMatrix<T> ret(drs,dcs);

    for ( uint16_t rs = 0; rs < drs; rs++)
    {
	for ( uint16_t cs = 0; cs < dcs; cs++ )
	{
	    ret(rs,cs) = elem(rs,cs) + mm.elem(rs,cs);  
	}
    }	
    return ret;
}/*}}}*/

/**
 * @addtogroup Output_Operators
 * @{
 */
template<class T>
inline ostream& operator<< ( ostream& o, const MathMatrix<T>& mm )/*{{{*/
{    
    //ScopeTracer st(__PRETTY_FUNCTION__);
    uint16_t drs = mm.rsize();
    uint16_t dcs = mm.csize();
    
    for ( uint16_t rs = 0; rs < drs; rs++)
    {
	for ( uint16_t cs = 0; cs < dcs; cs++ )
	{
	    o << mm(rs,cs) << "\t";
	}
	o << std::endl;
    }	
    return o;
}/*}}}*/

template<class T>
inline MapleStream& operator<< ( MapleStream& o, const MathMatrix<T>& mm )/*{{{*/
{    
    //ScopeTracer st(__PRETTY_FUNCTION__);
    uint16_t drs = mm.rsize();
    uint16_t dcs = mm.csize();
    
    o << "Matrix([";
    for ( uint16_t rs = 0; rs < drs; rs++)
    {
	if ( rs != 0 )
	{
	    o << ",";
	}
	o << "[";
	for ( uint16_t cs = 0; cs < dcs; cs++ )
	{
	    if ( cs != 0 ) 
	    {
		o << ",";
	    }
	    o << mm(rs,cs);
	}
	o << "]";
    }
    o << "])";
    return o;
}/*}}}*/
/** @} */

template<class T>
inline MathMatrix<T>::~MathMatrix( )/*{{{*/
{   
    //ScopeTracer st(__PRETTY_FUNCTION__);
    m->release();
}/*}}}*/

template<class T>
MathMatrix<T> MathMatrix<T>::transposed( void ) const/*{{{*/
{
    //ScopeTracer st(__PRETTY_FUNCTION__);
    uint16_t drs = rsize();
    uint16_t dcs = csize();

    MathMatrix<T> ret(dcs,drs);
    
    for ( uint16_t i = 0; i < drs; i++ )
    {
	for ( uint16_t j = 0; j < dcs; j++ )
	{
	    ret(j,i) = elem(i,j);
	}
    }
    return ret;
}/*}}}*/

template<class T>
MathMatrix<T> MathMatrix<T>::inverse( void ) const
{
    if ( rsize() != csize() )
    {
	THROW( iwruntime_error,("Can only invert square matrices"));
    }
    return inverse_gauss_jacobi();
}

template<class T>
MathMatrix<T> MathMatrix<T>::solve( const MathMatrix<T>& sm ) const
{
    return solve_gauss_jacobi(sm);
}

template<class T>
MathVector<T> MathMatrix<T>::solve( const MathVector<T>& sv ) const
{
    MathMatrix<T> o(1,sv.size());
    o[0] = sv;
    return solve(o)[0];
}

template<class T>
MathMatrix<T> MathMatrix<T>::solve_gauss_jacobi( const MathMatrix<T>& sm) const/*{{{*/
{

#define ELEM(x,c,r) x->column_at(c)->elem(rowmap[r])
#define ELEMraw(x,c,r) x->column_at(c)->elem(r)
    uint16_t r = rsize();
    uint16_t c = csize();

    MatrixRepresentation<T>* tm = m->real_copy();
    MatrixRepresentation<T>* m = tm;

    uint16_t rowmap[r];

    // Initialize the default rowmap
    uint16_t i;
    for ( i = 0; i < r; i++ )
    {
	rowmap[i] = i;
    }

    // Now search for the highest element and swap the first line if any
    uint16_t mx = 0;
    for ( i = 1; i < r; i++ )
    {
	if ( math::absolute_greater<T>()( ELEM(m,0,i) , ELEM(m,0,mx)) ) 
	{
	    mx = i;
	}
    }

    bool rowflips = false;

    if ( mx )
    {
	
	// Now, go and swap the lines internally... ;)
	rowmap[mx] = 0;
	rowmap[0] = mx;
	rowflips = true;
    }

    MathMatrix<T> slvd(sm.m->real_copy());
    uint16_t csz = slvd.csize();
//    uint16_t rsz = slvd.rsize();

    // Ok, now, go through every row and look how add/subtract them from the others...
    T one = math::Help<T>::neutral_multiplication(m->column_at(0)->elem(0));
    T fact = one;
    T mai = one;

    MathMatrix u(m);

    for ( i = 0; i < c; i++ )
    {
	//cout << "Entering main computation loop" << endl;
	if ( math::equals<T>()( ELEM(m,i,i),
		    math::Help<T>::neutral_addition(ELEM(m,i,i))))
	{
	   // cout << "Element is NUL need to swap rows" << endl;
	    uint16_t nr;
	    for ( nr = i+1; nr < r; nr++ )
	    {
		if ( ! math::equals<T>()( ELEM(m,i,nr),
		    math::Help<T>::neutral_addition(ELEM(m,i,i))))
		{
		    break;
		}
	    }
	    if ( nr == r )
	    {
		THROW( iwruntime_error,("Unable to compute slvderse"));
	    }

	    swap(rowmap[i],rowmap[nr]);
	    rowflips = true;
	}

	mai = ELEM(m,i,i); // Save the element to be set to one in the diagonale
	fact = math::Help<T>::neutral_multiplication(mai) / mai;
	// Now the row is normalized, and we can subtract it from all the others
	for ( uint16_t j = 0; j < r; j++ ) // Rows !!!!
	{
	    if ( j == i ) continue;
	    // multiplicate every element in the row with fact and then
	    // subtract the choosen from it
	    mai = fact * ELEM(m,i,j);
	    for ( uint16_t k = 0; k < c; k++ ) // Columns !!!
	    {
		ELEM(m,k,j) -= mai * ELEM(m,k,i);
		if ( k < csz )
		{
		    ELEM(slvd.m,k,j) -= mai * ELEM(slvd.m,k,i);
		}
	    }
	}
	// The actual row will be "normalized" by multiplicationg with this fact..
	// We start at i, since before it there should be 0 already...
	for ( uint16_t k = 0; k < c; k++ )
	{
	    // We choose to multiplicate with 1/mai instead of dividing through
	    // mai since in most cases for T the multiplication is cheaper than
	    // the division
	    ELEM(m,k,i) *= fact;
	    if ( k < csz )
	    {
		ELEM(slvd.m,k,i) *= fact;
	    }
	}
    }
    if ( rowflips )
    {
	// we need to really do the flippings in the output matrix
	// so we just exchange the rows. 
	// Another way would be to create a new matrix and then copy all the
	// values to the right row. This might be faster than swapping all the
	// rows, if we have more rows to swap. But if its just one flip, then
	// the swap could be faster. So we assume that we need fewer flips and
	// use the swapping way...

	for ( uint16_t rf = 0; rf < c; rf++ )
	{
	    if ( rowmap[rf] != rf )
	    {
		uint16_t f1 = rowmap[rf];
		uint16_t f2 = rowmap[rowmap[rf]];
		swap(rowmap[rf],rowmap[rowmap[rf]]);
		for ( uint16_t rs = 0; rs < csz; rs++ )
		{
		    swap(ELEMraw(slvd.m,rs,f1),ELEMraw(slvd.m,rs,f2));
		}
		rf--;
	    }
	}
    }

    return slvd;
}/*}}}*/

template<class T>
MathMatrix<T> MathMatrix<T>::inverse_gauss_jacobi( void ) const/*{{{*/
{

#define ELEM(x,c,r) x->column_at(c)->elem(rowmap[r])
#define ELEMraw(x,c,r) x->column_at(c)->elem(r)
    uint16_t r = rsize();
    uint16_t c = csize();

    MatrixRepresentation<T>* tm = m->real_copy();
    MatrixRepresentation<T>* m = tm;

    uint16_t rowmap[r];

    // Initialize the default rowmap
    uint16_t i;
    for ( i = 0; i < r; i++ )
    {
	rowmap[i] = i;
    }

    // Now search for the highest element and swap the first line if any
    uint16_t mx = 0;
    for ( i = 1; i < r; i++ )
    {
	if ( math::absolute_greater<T>()( ELEM(m,0,i) , ELEM(m,0,mx)) ) 
	{
	    mx = i;
	}
    }

    bool rowflips = false;

    if ( mx )
    {
	
	// Now, go and swap the lines internally... ;)
	rowmap[mx] = 0;
	rowmap[0] = mx;
	rowflips = true;
    }

    MathMatrix<T> inv(c,r); // This is our inverse, firt its a unit matrix
    for ( i = 0; i < c; i++ )
    {
	ELEMraw(inv.m,i,i) = math::Help<T>::neutral_multiplication(ELEMraw(m,i,i));
    }

    // Ok, now, go through every row and look how add/subtract them from the others...
    T one = math::Help<T>::neutral_multiplication(m->column_at(0)->elem(0));
    T fact = one;
    T mai = one;

    MathMatrix u(m);

    for ( i = 0; i < c; i++ )
    {
	//cout << "Entering main computation loop" << endl;
	if ( math::equals<T>()( ELEM(m,i,i),
		    math::Help<T>::neutral_addition(ELEM(m,i,i))))
	{
	   // cout << "Element is NUL need to swap rows" << endl;
	    uint16_t nr;
	    for ( nr = i+1; nr < r; nr++ )
	    {
		if ( ! math::equals<T>()( ELEM(m,i,nr),
		    math::Help<T>::neutral_addition(ELEM(m,i,i))))
		{
		    break;
		}
	    }
	    if ( nr == r )
	    {
		THROW(iwruntime_error,("Unable to compute inverse"));
	    }

	    swap(rowmap[i],rowmap[nr]);
	    rowflips = true;
	}

	mai = ELEM(m,i,i); // Save the element to be set to one in the diagonale
	fact = math::Help<T>::neutral_multiplication(mai) / mai;
	// Now the row is normalized, and we can subtract it from all the others
	for ( uint16_t j = 0; j < r; j++ ) // Rows !!!!
	{
	    if ( j == i ) continue;
	    // multiplicate every element in the row with fact and then
	    // subtract the choosen from it
	    mai = fact * ELEM(m,i,j);
	    for ( uint16_t k = 0; k < c; k++ ) // Columns !!!
	    {
		ELEM(m,k,j) -= mai * ELEM(m,k,i);
		ELEM(inv.m,k,j) -= mai * ELEM(inv.m,k,i);
	    }
	}
	// The actual row will be "normalized" by multiplicationg with this fact..
	// We start at i, since before it there should be 0 already...
	for ( uint16_t k = 0; k < c; k++ )
	{
	    // We choose to multiplicate with 1/mai instead of dividing through
	    // mai since in most cases for T the multiplication is cheaper than
	    // the division
	    ELEM(m,k,i) *= fact;
	    ELEM(inv.m,k,i) *= fact;
	}
    }
    if ( rowflips )
    {
	// we need to really do the flippings in the output matrix
	// so we just exchange the rows. 
	// Another way would be to create a new matrix and then copy all the
	// values to the right row. This might be faster than swapping all the
	// rows, if we have more rows to swap. But if its just one flip, then
	// the swap could be faster. So we assume that we need fewer flips and
	// use the swapping way...

	for ( uint16_t rf = 0; rf < c; rf++ )
	{
	    if ( rowmap[rf] != rf )
	    {
		uint16_t f1 = rowmap[rf];
		uint16_t f2 = rowmap[rowmap[rf]];
		swap(rowmap[rf],rowmap[rowmap[rf]]);
		for ( uint16_t rs = 0; rs < r; rs++ )
		{
		    swap(ELEMraw(inv.m,rs,f1),ELEMraw(inv.m,rs,f2));
		}
		rf--;
	    }
	}
    }

    return inv;
}/*}}}*/

/*
    MathMatrix<T> EM(rsize(),rsize());
    return solve(EM);
}

template<class T>
MathMatrix<T> MathMatrix<T>::solve_gauss_jacobi( const MathMatrix<T>& sm ) const
{
}

template<class T>
MathMatrix<T> MathMatrix<T>::solve_gauss_basic( const MathMatrix<T>& sm ) const
{
}

template<class T>
MathMatrix<T> MathMatrix<T>::solve( const MathMatrix<T>& sm ) const
{
    if ( rsize() == csize() )
    {
	return solve_gauss_jacobi(sm);
    }
    else
    {
	return solve_gauss_basic(sm);
    }
}
*/

}// namespace math
} // namespace iwear
#endif

