/*
    Structure from Motion with Deferred Feature Matching and Subset Bundle Adjustment
    Copyright (C) 2015 Andreas Ley <andy-ley@arcor.de>

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

/**
 * @file
 * @author Andreas Ley
 */

#include "PCVToolbox.hpp"


std::ostream &operator<<(std::ostream &stream, const LinAlg::Matrix<3, 3, double> &m)
{
    return output<3, 3, double>(stream, m);
}
std::ostream &operator<<(std::ostream &stream, const LinAlg::Matrix<3, 4, double> &m)
{
    return output<3, 4, double>(stream, m);
}

std::ostream &operator<<(std::ostream &stream, const LinAlg::Matrix<4, 4, double> &m)
{
    return output<4, 4, double>(stream, m);
}
std::ostream &operator<<(std::ostream &stream, const LinAlg::Matrix<16, 16, double> &m)
{
    return output<16, 16, double>(stream, m);
}


std::ostream &operator<<(std::ostream &stream, const LinAlg::Matrix<3, 3, float> &m)
{
    return output<3, 3, float>(stream, m);
}
std::ostream &operator<<(std::ostream &stream, const LinAlg::Matrix<3, 4, float> &m)
{
    return output<3, 4, float>(stream, m);
}

std::ostream &operator<<(std::ostream &stream, const LinAlg::Matrix<4, 4, float> &m)
{
    return output<4, 4, float>(stream, m);
}

std::ostream &operator<<(std::ostream &stream, const LinAlg::Matrix<6, 6, float> &m)
{
    return output<6, 6, float>(stream, m);
}


std::ostream &operator<<(std::ostream &stream, const LinAlg::Matrix<16, 16, float> &m)
{
    return output<16, 16, float>(stream, m);
}

namespace PCV {

template<>
float getError(const std::vector<LinAlg::Vector<3, float> > &first, const std::vector<LinAlg::Vector<3, float> > &second, const LinAlg::Matrix<3, 3, float> &fundamental, float exp)
{

    LinAlg::Matrix<3, 3, float> fundamentalT = fundamental.T();

    float sum = 0.0f;
    __m128 vsum = _mm_setzero_ps();

    for (unsigned i = 0; i < first.size()/4; i++) {

        __m128 u0 = _mm_setr_ps(first[i*4+0][0] / first[i*4+0][2],
                            first[i*4+1][0] / first[i*4+1][2],
                            first[i*4+2][0] / first[i*4+2][2],
                            first[i*4+3][0] / first[i*4+3][2]);
        __m128 u1 = _mm_setr_ps(first[i*4+0][1] / first[i*4+0][2],
                            first[i*4+1][1] / first[i*4+1][2],
                            first[i*4+2][1] / first[i*4+2][2],
                            first[i*4+3][1] / first[i*4+3][2]);

        __m128 v0 = _mm_setr_ps(second[i*4+0][0] / second[i*4+0][2],
                            second[i*4+1][0] / second[i*4+1][2],
                            second[i*4+2][0] / second[i*4+2][2],
                            second[i*4+3][0] / second[i*4+3][2]);
        __m128 v1 = _mm_setr_ps(second[i*4+0][1] / second[i*4+0][2],
                            second[i*4+1][1] / second[i*4+1][2],
                            second[i*4+2][1] / second[i*4+2][2],
                            second[i*4+3][1] / second[i*4+3][2]);

        __m128 Fx0 = _mm_mul_ps(u0, _mm_set1_ps(fundamental[0][0]));
        Fx0 = _mm_add_ps(Fx0, _mm_mul_ps(u1, _mm_set1_ps(fundamental[0][1])));
        Fx0 = _mm_add_ps(Fx0, _mm_set1_ps(fundamental[0][2]));

        __m128 Fx1 = _mm_mul_ps(u0, _mm_set1_ps(fundamental[1][0]));
        Fx1 = _mm_add_ps(Fx1, _mm_mul_ps(u1, _mm_set1_ps(fundamental[1][1])));
        Fx1 = _mm_add_ps(Fx1, _mm_set1_ps(fundamental[1][2]));

        __m128 Fx2 = _mm_mul_ps(u0, _mm_set1_ps(fundamental[2][0]));
        Fx2 = _mm_add_ps(Fx2, _mm_mul_ps(u1, _mm_set1_ps(fundamental[2][1])));
        Fx2 = _mm_add_ps(Fx2, _mm_set1_ps(fundamental[2][2]));

        __m128 FTxdash0 = _mm_mul_ps(v0, _mm_set1_ps(fundamental[0][0]));
        FTxdash0 = _mm_add_ps(FTxdash0, _mm_mul_ps(v1, _mm_set1_ps(fundamental[1][0])));
        FTxdash0 = _mm_add_ps(FTxdash0, _mm_set1_ps(fundamental[2][0]));

        __m128 FTxdash1 = _mm_mul_ps(v0, _mm_set1_ps(fundamental[0][1]));
        FTxdash1 = _mm_add_ps(FTxdash1, _mm_mul_ps(v1, _mm_set1_ps(fundamental[1][1])));
        FTxdash1 = _mm_add_ps(FTxdash1, _mm_set1_ps(fundamental[2][1]));

        __m128 FTxdash2 = _mm_mul_ps(v0, _mm_set1_ps(fundamental[0][2]));
        FTxdash2 = _mm_add_ps(FTxdash2, _mm_mul_ps(v1, _mm_set1_ps(fundamental[1][2])));
        FTxdash2 = _mm_add_ps(FTxdash2, _mm_set1_ps(fundamental[2][2]));


        __m128 f = _mm_mul_ps(Fx0, v0);
        f = _mm_add_ps(f, _mm_mul_ps(Fx1, v1));
        f = _mm_add_ps(f, Fx2);


        __m128 denom = _mm_mul_ps(Fx0, Fx0);
        denom = _mm_add_ps(denom, _mm_mul_ps(Fx1, Fx1));
        denom = _mm_add_ps(denom, _mm_mul_ps(Fx2, Fx2));
        denom = _mm_add_ps(denom, _mm_mul_ps(FTxdash0, FTxdash0));
        denom = _mm_add_ps(denom, _mm_mul_ps(FTxdash1, FTxdash1));
        denom = _mm_add_ps(denom, _mm_mul_ps(FTxdash2, FTxdash2));

        denom = _mm_max_ps(denom, _mm_set1_ps(1e-10f));

        f = _mm_div_ps(_mm_mul_ps(f, f), denom);

        vsum = _mm_add_ps(vsum,
                          exp_ps(_mm_mul_ps(log_ps(f), _mm_set1_ps(exp))));

    }
    for (unsigned i = first.size()/4*4; i < first.size(); i++) {
        LinAlg::Vector<3, float> Fx = fundamental * (first[i] / first[i][2]);
        LinAlg::Vector<3, float> FTxdash = fundamentalT * (second[i] / second[i][2]);

        float f = Fx * (second[i] / second[i][2]);
        float denom = Fx.StripHom().SQRLen() + FTxdash.StripHom().SQRLen();
        if (denom > 0.0) {
            sum += powf(f*f/denom, exp);
        }
    }

    sum += _mm_extract_ps(vsum, 0);
    sum += _mm_extract_ps(vsum, 1);
    sum += _mm_extract_ps(vsum, 2);
    sum += _mm_extract_ps(vsum, 3);

    return sum / first.size();
}

LinAlg::Matrix3x3f computeFundamental(const LinAlg::Matrix3x4f &P1, const LinAlg::Matrix3x4f &P2)
{
    LinAlg::Matrix3x3f dummy1;
    LinAlg::Matrix4x4f dummy2;
    LinAlg::Matrix4x4f dummy3;
    LinAlg::Vector3f camPos;

    PCV::decomposeProjectionMatrix(P1,
                        dummy1,
                        dummy2,
                        dummy3,
                        camPos);



    LinAlg::Vector3f epipole = P2 * camPos.AddHom(1.0f);
    //m_fundamentalMatrix = constructSkewSymmetric(P1.getCameraPosition())*P1.getProjectionMatrix()*computePseudoInverse<float>(P2.getProjectionMatrix());
    return constructSkewSymmetric(epipole)*P2*computePseudoInverse<float>(P1);
}


}

