/*
    Structure from Motion with Deferred Feature Matching and Subset Bundle Adjustment
    Copyright (C) 2015 Andreas Ley <andy-ley@arcor.de>

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

#include "InitialPatchOptimizer.h"

#include "../cudaKernels/initialTrackOptimizer.h"
#include "../tools/PCVToolbox.hpp"

#include <string.h>

#include "../tools/RasterImage.h"

InitialPatchOptimizer::InitialPatchOptimizer()
{
    m_codeModule.loadFromFile("../SFMBackend/kernels/Release/initialTrackOptimizer.fatbin");

    m_optimizeKernel = std::unique_ptr<CudaUtils::CudaKernel>(m_codeModule.getKernel("optimizeInitialTrack"));
    m_sourceImage1TexRef = std::unique_ptr<CudaUtils::CudaTextureReference>(m_codeModule.getTexReference("sourceImagePyramid1"));

    m_sourceImage1TexRef->setTexelFilterMode(CudaUtils::CudaTextureReference::FILTER_MODE_LINEAR);
    m_sourceImage1TexRef->setMipmapFilterMode(CudaUtils::CudaTextureReference::FILTER_MODE_LINEAR);
    m_sourceImage1TexRef->setCoordinateNormalization(true);


    m_sourceImage2TexRef = std::unique_ptr<CudaUtils::CudaTextureReference>(m_codeModule.getTexReference("sourceImagePyramid2"));

    m_sourceImage2TexRef->setTexelFilterMode(CudaUtils::CudaTextureReference::FILTER_MODE_LINEAR);
    m_sourceImage2TexRef->setMipmapFilterMode(CudaUtils::CudaTextureReference::FILTER_MODE_LINEAR);
    m_sourceImage2TexRef->setCoordinateNormalization(true);


    m_kernelConstantParams = std::unique_ptr<CudaUtils::CudaConstantMemory>(m_codeModule.getConstantMemory("optimizeInitialTrackParams"));

    m_computeErrorsKernel = std::unique_ptr<CudaUtils::CudaKernel>(m_codeModule.getKernel("computeErrors"));
    m_extractProjectionsKernel = std::unique_ptr<CudaUtils::CudaKernel>(m_codeModule.getKernel("extractProjections"));
    m_extractPreWarpedProjectionsKernel = std::unique_ptr<CudaUtils::CudaKernel>(m_codeModule.getKernel("extractPreWarpedProjections"));

    m_kernelExtractionPreWarpMatrices = std::unique_ptr<CudaUtils::CudaConstantMemory>(m_codeModule.getConstantMemory("ExtractionPreWarpMatrices"));
}

InitialPatchOptimizer::~InitialPatchOptimizer()
{
    //dtor
}

LinAlg::Vector3f computeCameraLocationAndFocalLength(const LinAlg::Matrix4x4f &P, float &focalLength)
{
    LinAlg::Matrix<3, 3, float> internalMatrix;
    LinAlg::Matrix<4, 4, float> worldToEyeSpace;
    LinAlg::Matrix<4, 4, float> cameraRotation;
    LinAlg::Vector<3, float> cameraPosition;
    PCV::decomposeProjectionMatrix<float>(P.dropRow(2),
                                internalMatrix,
                                worldToEyeSpace,
                                cameraRotation,
                                cameraPosition);

    focalLength = internalMatrix[0][0];

    return cameraPosition;
}


void InitialPatchOptimizer::optimizePatches(CudaUtils::CudaMipmappedTexture &image1, CudaUtils::CudaMipmappedTexture &image2,
                     const LinAlg::Matrix4x4f &P1, const LinAlg::Matrix4x4f &P2, std::vector<InitialTrack> &tracks)
{

    m_sourceImage1TexRef->bindMipmappedTexture(&image1);
    m_sourceImage1TexRef->setMinMaxMipLevel(0.0f, image1.getNumLevel()-1);

    m_sourceImage2TexRef->bindMipmappedTexture(&image2);
    m_sourceImage2TexRef->setMinMaxMipLevel(0.0f, image2.getNumLevel()-1);


    LinAlg::Matrix4x4f P1Inv;
    LinAlg::Matrix4x4f P2Inv;

    P1Inv = P1;
    P1Inv.GaussJordanInvert();
    P2Inv = P2;
    P2Inv.GaussJordanInvert();

    LinAlg::Matrix4x4f P1Norm, P2Norm;
    {
        float aspectRatio = image1.getLevel(0).getWidth() / (float)image1.getLevel(0).getHeight();
        LinAlg::Matrix4x4f imageMat;
        imageMat[0][0] = 0.5f;
        imageMat[0][3] = 0.5f;

        imageMat[1][1] = 0.5f * aspectRatio;
        imageMat[1][3] = 0.5f;

        P1Norm = imageMat * P1;
    }
    {
        float aspectRatio = image2.getLevel(0).getWidth() / (float)image2.getLevel(0).getHeight();
        LinAlg::Matrix4x4f imageMat;
        imageMat[0][0] = 0.5f;
        imageMat[0][3] = 0.5f;

        imageMat[1][1] = 0.5f * aspectRatio;
        imageMat[1][3] = 0.5f;

        P2Norm = imageMat * P2;
    }


    float focalLength1, focalLength2;
    LinAlg::Vector3f camPos1 = computeCameraLocationAndFocalLength(P1, focalLength1);
    LinAlg::Vector3f camPos2 = computeCameraLocationAndFocalLength(P2, focalLength2);

    m_trackData.resize(tracks.size() * sizeof(TrackData));


    std::vector<TrackData> cpuTrackData;
    cpuTrackData.resize(tracks.size());

    for (unsigned i = 0; i < tracks.size(); i++) {
        cpuTrackData[i].lod[0] = (int)tracks[i].screenSize[0];
        cpuTrackData[i].lod[1] = (int)tracks[i].screenSize[1];

        LinAlg::Vector3f normal = (camPos1 + camPos2) * 0.5f - tracks[i].worldSpacePosition;
        normal.normalize();
        LinAlg::Vector3f upVector = LinAlg::cross(normal, camPos1 - camPos2);
        upVector.normalize();
        LinAlg::Vector3f rightVector = LinAlg::cross(normal, upVector);
        rightVector.normalize();

if (i < 100) {
        std::cout << i << ":" << std::endl;
        std::cout << "size1 = " << tracks[i].screenSize[0] << std::endl;
        std::cout << "size2 = " << tracks[i].screenSize[1] << std::endl;
        std::cout << "normal = " << (std::string) normal << std::endl;
        std::cout << "upVector = " << (std::string) upVector << std::endl;
        std::cout << "rightVector = " << (std::string) rightVector << std::endl;
        std::cout << "tracks[i].worldSpacePosition = " << (std::string) tracks[i].worldSpacePosition << std::endl;
        {
            LinAlg::Vector4f p = P1Norm * tracks[i].worldSpacePosition.AddHom(1.0f);
            p /= p[3];
            std::cout << "screen1 = " << (std::string) p << std::endl;
            std::cout << "  == " << (std::string) ((tracks[i].screenSpacePositions[0] & LinAlg::Fill(0.5f, 0.5f * 4.0f / 3.0f)) + LinAlg::Fill(0.5f, 0.5f))  << std::endl;
        }
        {
            LinAlg::Vector4f p = P2Norm * tracks[i].worldSpacePosition.AddHom(1.0f);
            p /= p[3];
            std::cout << "screen2 = " << (std::string) p << std::endl;
            std::cout << "  == " << (std::string) ((tracks[i].screenSpacePositions[1] & LinAlg::Fill(0.5f, 0.5f * 4.0f / 3.0f)) + LinAlg::Fill(0.5f, 0.5f)) << std::endl;
        }
}

        cpuTrackData[i].surfaceToWorld[0*4+0] = upVector[0];
        cpuTrackData[i].surfaceToWorld[0*4+1] = rightVector[0];
        cpuTrackData[i].surfaceToWorld[0*4+2] = normal[0];

        cpuTrackData[i].surfaceToWorld[1*4+0] = upVector[1];
        cpuTrackData[i].surfaceToWorld[1*4+1] = rightVector[1];
        cpuTrackData[i].surfaceToWorld[1*4+2] = normal[1];

        cpuTrackData[i].surfaceToWorld[2*4+0] = upVector[2];
        cpuTrackData[i].surfaceToWorld[2*4+1] = rightVector[2];
        cpuTrackData[i].surfaceToWorld[2*4+2] = normal[2];

        cpuTrackData[i].surfaceToWorld[0*4+3] = tracks[i].worldSpacePosition[0];
        cpuTrackData[i].surfaceToWorld[1*4+3] = tracks[i].worldSpacePosition[1];
        cpuTrackData[i].surfaceToWorld[2*4+3] = tracks[i].worldSpacePosition[2];

        LinAlg::Vector4f SP1 = P1 * tracks[i].worldSpacePosition.AddHom(1.0f);
        SP1 /= SP1[3];
        LinAlg::Vector4f wp1 = P1Inv * LinAlg::Fill(tracks[i].screenSpacePositions[0][0], tracks[i].screenSpacePositions[0][1], SP1[2], 1.0f);

        LinAlg::Vector4f SP2 = P2 * tracks[i].worldSpacePosition.AddHom(1.0f);
        SP2 /= SP2[3];
        LinAlg::Vector4f wp2 = P2Inv * LinAlg::Fill(tracks[i].screenSpacePositions[1][0], tracks[i].screenSpacePositions[1][1], SP2[2], 1.0f);

        wp1 /= wp1[3];
        wp2 /= wp2[3];

        float epipolarDistance = (upVector * wp1.StripHom()) - (upVector * wp2.StripHom());

        float size1, size2;
        size1 = (1.0f + tracks[i].screenSize[0] - (unsigned)tracks[i].screenSize[0]) * (1 << (unsigned)tracks[i].screenSize[0]) / image1.getLevel(0).getWidth() * 2.0f;
        size2 = (1.0f + tracks[i].screenSize[1] - (unsigned)tracks[i].screenSize[1]) * (1 << (unsigned)tracks[i].screenSize[1]) / image2.getLevel(0).getWidth() * 2.0f;
if (i < 100) {
        std::cout << size1 << "  " << size2 << std::endl;
        std::cout << focalLength1 << "  " << focalLength2 << std::endl;
        std::cout << SP1[2] << "  " << SP2[2] << std::endl;

}
        cpuTrackData[i].size = ((size1 / SP1[2] / focalLength1) + (size2 / SP2[2] / focalLength2)) * 0.5f;

        cpuTrackData[i].epipolarOffsetHalf = epipolarDistance / cpuTrackData[i].size * 0.5f;
    }

    m_trackData.upload(&cpuTrackData[0], cpuTrackData.size() * sizeof(TrackData));

    const unsigned debugPatchSize = 32;

    CudaUtils::CudaDeviceMemory projectionVMem;
    projectionVMem.resize(tracks.size()*debugPatchSize*debugPatchSize*2*4);

    CudaUtils::CudaDeviceMemory errorVMem;
    errorVMem.resize(tracks.size()*4);

    ConstantParams constantParams;
    constantParams.numTracks = tracks.size();
    memcpy(constantParams.P1norm, &P1Norm, 4*4*4);
    memcpy(constantParams.P2norm, &P2Norm, 4*4*4);

    m_kernelConstantParams->upload(&constantParams, sizeof(constantParams));


    {
        TrackData *ptr = (TrackData*)m_trackData.getPtr();
        m_optimizeKernel->launch(LinAlg::Fill(8u, 8u, 1u),
                                           LinAlg::Fill<unsigned>(tracks.size(), 1u, 1u),
                                           &ptr, sizeof(ptr));
    }

    {
        ExtractProjectionsKernelParams params;
        params.trackData = (TrackData*)m_trackData.getPtr();
        params.dst = (uint32_t*)projectionVMem.getPtr();
        m_extractProjectionsKernel->launch(LinAlg::Fill(8u, 8u, 1u),
                                           LinAlg::Fill<unsigned>(tracks.size(), 1u, 1u),
                                           &params, sizeof(params));
    }

    {
        void* params[2];
        params[0] = m_trackData.getPtr();
        params[1] = errorVMem.getPtr();
        m_computeErrorsKernel->launch(LinAlg::Fill(8u, 8u, 1u),
                                           LinAlg::Fill<unsigned>(tracks.size(), 1u, 1u),
                                           &params, 2*8);
    }

    std::vector<LinAlg::Matrix4x4f> preWarpMatrices;
    preWarpMatrices.resize(11);
    preWarpMatrices[1] = LinAlg::RotateZ(-(float)M_PI / 4.0f * 0.0f) * LinAlg::Scale3D(LinAlg::Fill(1.5f, 1.0f, 1.0f)) * LinAlg::RotateZ((float)M_PI / 4.0f * 0.0f);
    preWarpMatrices[2] = LinAlg::RotateZ(-(float)M_PI / 4.0f * 1.0f) * LinAlg::Scale3D(LinAlg::Fill(1.5f, 1.0f, 1.0f)) * LinAlg::RotateZ((float)M_PI / 4.0f * 1.0f);
    preWarpMatrices[3] = LinAlg::RotateZ(-(float)M_PI / 4.0f * 2.0f) * LinAlg::Scale3D(LinAlg::Fill(1.5f, 1.0f, 1.0f)) * LinAlg::RotateZ((float)M_PI / 4.0f * 2.0f);
    preWarpMatrices[4] = LinAlg::RotateZ(-(float)M_PI / 4.0f * 3.0f) * LinAlg::Scale3D(LinAlg::Fill(1.5f, 1.0f, 1.0f)) * LinAlg::RotateZ((float)M_PI / 4.0f * 3.0f);
    preWarpMatrices[5] = LinAlg::RotateZ(-(float)M_PI / 6.0f * 0.0f) * LinAlg::Scale3D(LinAlg::Fill(2.0f, 1.0f, 1.0f)) * LinAlg::RotateZ((float)M_PI / 6.0f * 0.0f);
    preWarpMatrices[6] = LinAlg::RotateZ(-(float)M_PI / 6.0f * 1.0f) * LinAlg::Scale3D(LinAlg::Fill(2.0f, 1.0f, 1.0f)) * LinAlg::RotateZ((float)M_PI / 6.0f * 1.0f);
    preWarpMatrices[7] = LinAlg::RotateZ(-(float)M_PI / 6.0f * 2.0f) * LinAlg::Scale3D(LinAlg::Fill(2.0f, 1.0f, 1.0f)) * LinAlg::RotateZ((float)M_PI / 6.0f * 2.0f);
    preWarpMatrices[8] = LinAlg::RotateZ(-(float)M_PI / 6.0f * 3.0f) * LinAlg::Scale3D(LinAlg::Fill(2.0f, 1.0f, 1.0f)) * LinAlg::RotateZ((float)M_PI / 6.0f * 3.0f);
    preWarpMatrices[9] = LinAlg::RotateZ(-(float)M_PI / 6.0f * 4.0f) * LinAlg::Scale3D(LinAlg::Fill(2.0f, 1.0f, 1.0f)) * LinAlg::RotateZ((float)M_PI / 6.0f * 4.0f);
    preWarpMatrices[10] = LinAlg::RotateZ(-(float)M_PI / 6.0f * 5.0f) * LinAlg::Scale3D(LinAlg::Fill(2.0f, 1.0f, 1.0f)) * LinAlg::RotateZ((float)M_PI / 6.0f * 5.0f);
    m_kernelExtractionPreWarpMatrices->upload(&preWarpMatrices[0][0][0], 11*4*4*4);

    CudaUtils::CudaDeviceMemory warpedProjectionVMem;
    warpedProjectionVMem.resize(tracks.size()*16*16*4*11);
    {
        ExtractProjectionsKernelParams params;
        params.trackData = (TrackData*)m_trackData.getPtr();
        params.dst = (uint32_t*)warpedProjectionVMem.getPtr();
        m_extractPreWarpedProjectionsKernel->launch(LinAlg::Fill(8u, 8u, 1u),
                                           LinAlg::Fill<unsigned>(tracks.size(), 1u, 1u),
                                           &params, sizeof(params));
    }


    std::vector<float> errorData;
    errorData.resize(tracks.size());
    errorVMem.download(&errorData[0], tracks.size()*4);


    std::vector<uint32_t> projectionData;
    projectionData.resize(tracks.size()*debugPatchSize*debugPatchSize*2);
    projectionVMem.download(&projectionData[0], tracks.size()*debugPatchSize*debugPatchSize*2*4);

    {
        RasterImage debugImage;
        unsigned numTrackInImage = std::min<unsigned>(tracks.size(), 200u);
        debugImage.resize(numTrackInImage*(debugPatchSize+4), (debugPatchSize+4)*2);
        for (unsigned i = 0; i < numTrackInImage; i++) {

            LinAlg::Vector<3, unsigned char> color = LinAlg::clampColor(LinAlg::ColorRamp(errorData[i]*10.0f));

            uint32_t colorUint32 = (color[0] << 0) |
                                   (color[1] << 8) |
                                   (color[2] << 16) |
                                   (0xFF << 24);

            debugImage.drawBox(LinAlg::Fill<int>(i*(debugPatchSize+4), 0),LinAlg::Fill<int>(i*(debugPatchSize+4)+(debugPatchSize+3), (debugPatchSize+4)*2-1), colorUint32, true);

            for (unsigned y = 0; y < debugPatchSize; y++)
                for (unsigned x = 0; x < debugPatchSize; x++) {
                    const unsigned char *srcPixel = (const unsigned char*) &projectionData[i*debugPatchSize*debugPatchSize*2+y*debugPatchSize+x];
                    unsigned char *dstPixel = (unsigned char*) &debugImage.getData()[(0+y+2)*debugImage.getWidth() + i*(debugPatchSize+4)+x+2];
                    dstPixel[0] = std::min(srcPixel[0] * srcPixel[3]*4/255, 255);
                    dstPixel[1] = std::min(srcPixel[1] * srcPixel[3]*4/255, 255);
                    dstPixel[2] = std::min(srcPixel[2] * srcPixel[3]*4/255, 255);
                    dstPixel[3] = 255;
                }

            for (unsigned y = 0; y < debugPatchSize; y++)
                for (unsigned x = 0; x < debugPatchSize; x++) {
                    const unsigned char *srcPixel = (const unsigned char*) &projectionData[i*debugPatchSize*debugPatchSize*2+debugPatchSize*debugPatchSize+y*debugPatchSize+x];
                    unsigned char *dstPixel = (unsigned char*) &debugImage.getData()[(debugPatchSize+4+y+2)*debugImage.getWidth() + i*(debugPatchSize+4)+x+2];
                    dstPixel[0] = std::min(srcPixel[0] * srcPixel[3]*4/255, 255);
                    dstPixel[1] = std::min(srcPixel[1] * srcPixel[3]*4/255, 255);
                    dstPixel[2] = std::min(srcPixel[2] * srcPixel[3]*4/255, 255);
                    dstPixel[3] = 255;
                }
        }
        debugImage.writeToFile("projectedPatches.png");
    }

    std::vector<uint32_t> preWarpedProjectionData;
    preWarpedProjectionData.resize(tracks.size()*16*16*11);
    warpedProjectionVMem.download(&preWarpedProjectionData[0], tracks.size()*16*16*11*4);

    {
        RasterImage debugImage;
        unsigned numTrackInImage = std::min<unsigned>(tracks.size(), 200u);
        debugImage.resize(numTrackInImage*(16+4), (16+4)*11);
        for (unsigned i = 0; i < numTrackInImage; i++) {

            LinAlg::Vector<3, unsigned char> color = LinAlg::clampColor(LinAlg::ColorRamp(errorData[i]*10.0f));

            uint32_t colorUint32 = (color[0] << 0) |
                                   (color[1] << 8) |
                                   (color[2] << 16) |
                                   (0xFF << 24);

            debugImage.drawBox(LinAlg::Fill<int>(i*(16+4), 0),LinAlg::Fill<int>(i*(16+4)+(16+3), (16+4)*11-1), colorUint32, true);

            for (unsigned warp = 0; warp < 11; warp++) {
                for (unsigned y = 0; y < 16; y++)
                    for (unsigned x = 0; x < 16; x++) {
                        const unsigned char *srcPixel = (const unsigned char*) &preWarpedProjectionData[i*16*16*11+warp*16*16+y*16+x];
                        unsigned char *dstPixel = (unsigned char*) &debugImage.getData()[(warp*(16+4)+y+2)*debugImage.getWidth() + i*(16+4)+x+2];
                        dstPixel[0] = std::min(srcPixel[0] * srcPixel[3]*4/255, 255);
                        dstPixel[1] = std::min(srcPixel[1] * srcPixel[3]*4/255, 255);
                        dstPixel[2] = std::min(srcPixel[2] * srcPixel[3]*4/255, 255);
                        dstPixel[3] = 255;
                    }
            }
        }
        debugImage.writeToFile("warpedprojectedPatches.png");
    }


    m_trackData.download(&cpuTrackData[0], cpuTrackData.size() * sizeof(TrackData));


    {
        RasterImage debugImage1;
        debugImage1.resize(image1.getLevel(0).getWidth(), image1.getLevel(0).getHeight());

        RasterImage debugImage2;
        debugImage2.resize(image2.getLevel(0).getWidth(), image2.getLevel(0).getHeight());
        for (unsigned i = 0; i < tracks.size(); i++) {
            /*
            LinAlg::Vector3f wsPos = LinAlg::Fill(cpuTrackData[i].surfaceToWorld[0*4+3],
                                                  cpuTrackData[i].surfaceToWorld[1*4+3],
                                                  cpuTrackData[i].surfaceToWorld[2*4+3]);
            */
            LinAlg::Vector3f upVector = LinAlg::Fill(cpuTrackData[i].surfaceToWorld[0*4+0], cpuTrackData[i].surfaceToWorld[1*4+0], cpuTrackData[i].surfaceToWorld[2*4+0]);

            LinAlg::Vector4f SP1 = P1 * (tracks[i].worldSpacePosition + upVector * cpuTrackData[i].size * cpuTrackData[i].epipolarOffsetHalf).AddHom(1.0f);
            SP1 /= SP1[3];

            LinAlg::Vector4f SP2 = P2 * (tracks[i].worldSpacePosition - upVector * cpuTrackData[i].size * cpuTrackData[i].epipolarOffsetHalf).AddHom(1.0f);
            SP2 /= SP2[3];


            {
                debugImage1.drawCircle(
                            LinAlg::Fill<int>(tracks[i].screenSpacePositions[0][0] * 0.5f * image1.getLevel(0).getWidth() + image1.getLevel(0).getWidth()*0.5f,
                                              tracks[i].screenSpacePositions[0][1] * 0.5f * image1.getLevel(0).getWidth() + image1.getLevel(0).getHeight()*0.5f),
                            cpuTrackData[i].size * image1.getLevel(0).getWidth() * 16.0f * 2.0f,
                            0xFF0000FF);
                debugImage1.drawLine(
                            LinAlg::Fill<int>(tracks[i].screenSpacePositions[0][0] * 0.5f * image1.getLevel(0).getWidth() + image1.getLevel(0).getWidth()*0.5f,
                                              tracks[i].screenSpacePositions[0][1] * 0.5f * image1.getLevel(0).getWidth() + image1.getLevel(0).getHeight()*0.5f),
                            LinAlg::Fill<int>(SP1[0] * 0.5f * image1.getLevel(0).getWidth() + image1.getLevel(0).getWidth()*0.5f,
                                              SP1[1] * 0.5f * image1.getLevel(0).getWidth() + image1.getLevel(0).getHeight()*0.5f),
                            0xFF0000FF);
            }
            {
                debugImage2.drawCircle(
                            LinAlg::Fill<int>(tracks[i].screenSpacePositions[1][0] * 0.5f * image2.getLevel(0).getWidth() + image2.getLevel(0).getWidth()*0.5f,
                                              tracks[i].screenSpacePositions[1][1] * 0.5f * image2.getLevel(0).getWidth() + image2.getLevel(0).getHeight()*0.5f),
                            cpuTrackData[i].size * image2.getLevel(0).getWidth() * 16.0f * 2.0f,
                            0xFF0000FF);
                debugImage2.drawLine(
                            LinAlg::Fill<int>(tracks[i].screenSpacePositions[1][0] * 0.5f * image2.getLevel(0).getWidth() + image2.getLevel(0).getWidth()*0.5f,
                                              tracks[i].screenSpacePositions[1][1] * 0.5f * image2.getLevel(0).getWidth() + image2.getLevel(0).getHeight()*0.5f),
                            LinAlg::Fill<int>(SP2[0] * 0.5f * image2.getLevel(0).getWidth() + image2.getLevel(0).getWidth()*0.5f,
                                              SP2[1] * 0.5f * image2.getLevel(0).getWidth() + image2.getLevel(0).getHeight()*0.5f),
                            0xFF0000FF);
            }
        }
        debugImage1.writeToFile("movements1.png");
        debugImage2.writeToFile("movements2.png");
    }

    for (unsigned i = 0; i < tracks.size(); i++) {
        tracks[i].worldSpacePosition[0] = cpuTrackData[i].surfaceToWorld[0*4+3];
        tracks[i].worldSpacePosition[1] = cpuTrackData[i].surfaceToWorld[1*4+3];
        tracks[i].worldSpacePosition[2] = cpuTrackData[i].surfaceToWorld[2*4+3];

        tracks[i].orientation[0][0] = cpuTrackData[i].surfaceToWorld[0*4+0];
        tracks[i].orientation[0][1] = cpuTrackData[i].surfaceToWorld[0*4+1];
        tracks[i].orientation[0][2] = cpuTrackData[i].surfaceToWorld[0*4+2];
        tracks[i].orientation[1][0] = cpuTrackData[i].surfaceToWorld[1*4+0];
        tracks[i].orientation[1][1] = cpuTrackData[i].surfaceToWorld[1*4+1];
        tracks[i].orientation[1][2] = cpuTrackData[i].surfaceToWorld[1*4+2];
        tracks[i].orientation[2][0] = cpuTrackData[i].surfaceToWorld[2*4+0];
        tracks[i].orientation[2][1] = cpuTrackData[i].surfaceToWorld[2*4+1];
        tracks[i].orientation[2][2] = cpuTrackData[i].surfaceToWorld[2*4+2];

        tracks[i].size = cpuTrackData[i].size;

        LinAlg::Vector3f upVector = LinAlg::Fill(cpuTrackData[i].surfaceToWorld[0*4+0], cpuTrackData[i].surfaceToWorld[1*4+0], cpuTrackData[i].surfaceToWorld[2*4+0]);

        LinAlg::Vector4f SP1 = P1 * (tracks[i].worldSpacePosition + upVector * cpuTrackData[i].size * cpuTrackData[i].epipolarOffsetHalf).AddHom(1.0f);
        SP1 /= SP1[3];

        LinAlg::Vector4f SP2 = P2 * (tracks[i].worldSpacePosition - upVector * cpuTrackData[i].size * cpuTrackData[i].epipolarOffsetHalf).AddHom(1.0f);
        SP2 /= SP2[3];

        {
            std::cout << "Track " << i<<" has an epipolar offset of " << cpuTrackData[i].epipolarOffsetHalf*2.0f << " patch-pixels" << std::endl;
            std::cout << "Track " << i<<" moved in image 1 by " << std::sqrt((tracks[i].screenSpacePositions[0] - SP1.StripHom().StripHom()).SQRLen())*image1.getLevel(0).getWidth()*0.5f << " pixels" << std::endl;
        }
        tracks[i].screenSpacePositions[0] = SP1.StripHom().StripHom();
        tracks[i].screenSpacePositions[1] = SP2.StripHom().StripHom();
    }
    {
        RasterImage debugImage1;
        debugImage1.resize(image1.getLevel(0).getWidth(), image1.getLevel(0).getHeight());

        RasterImage debugImage2;
        debugImage2.resize(image2.getLevel(0).getWidth(), image2.getLevel(0).getHeight());
        for (unsigned i = 0; i < tracks.size(); i++) {
            /*
            LinAlg::Vector3f wsPos = LinAlg::Fill(cpuTrackData[i].surfaceToWorld[0*4+3],
                                                  cpuTrackData[i].surfaceToWorld[1*4+3],
                                                  cpuTrackData[i].surfaceToWorld[2*4+3]);
            LinAlg::Vector3f upVector = LinAlg::Fill(cpuTrackData[i].surfaceToWorld[0*4+0], cpuTrackData[i].surfaceToWorld[1*4+0], cpuTrackData[i].surfaceToWorld[2*4+0]);
            */

            LinAlg::Vector4f SP1 = P1 * tracks[i].worldSpacePosition.AddHom(1.0f);
            SP1 /= SP1[3];

            LinAlg::Vector4f SP2 = P2 * tracks[i].worldSpacePosition.AddHom(1.0f);
            SP2 /= SP2[3];

            LinAlg::Vector<3, unsigned char> color = LinAlg::clampColor(LinAlg::ColorRamp(errorData[i]*10.0f));

            uint32_t colorUint32 = (color[0] << 0) |
                                   (color[1] << 8) |
                                   (color[2] << 16) |
                                   (0xFF << 24);

            {
                debugImage1.drawCircle(
                            LinAlg::Fill<int>(tracks[i].screenSpacePositions[0][0] * 0.5f * image1.getLevel(0).getWidth() + image1.getLevel(0).getWidth()*0.5f,
                                              tracks[i].screenSpacePositions[0][1] * 0.5f * image1.getLevel(0).getWidth() + image1.getLevel(0).getHeight()*0.5f),
                            cpuTrackData[i].size * image1.getLevel(0).getWidth() * 16.0f * 2.0f,
                            colorUint32);
                debugImage1.drawLine(
                            LinAlg::Fill<int>(tracks[i].screenSpacePositions[0][0] * 0.5f * image1.getLevel(0).getWidth() + image1.getLevel(0).getWidth()*0.5f,
                                              tracks[i].screenSpacePositions[0][1] * 0.5f * image1.getLevel(0).getWidth() + image1.getLevel(0).getHeight()*0.5f),
                            LinAlg::Fill<int>(SP1[0] * 0.5f * image1.getLevel(0).getWidth() + image1.getLevel(0).getWidth()*0.5f,
                                              SP1[1] * 0.5f * image1.getLevel(0).getWidth() + image1.getLevel(0).getHeight()*0.5f),
                            0xFF0000FF);
            }
            {
                debugImage2.drawCircle(
                            LinAlg::Fill<int>(tracks[i].screenSpacePositions[1][0] * 0.5f * image2.getLevel(0).getWidth() + image2.getLevel(0).getWidth()*0.5f,
                                              tracks[i].screenSpacePositions[1][1] * 0.5f * image2.getLevel(0).getWidth() + image2.getLevel(0).getHeight()*0.5f),
                            cpuTrackData[i].size * image2.getLevel(0).getWidth() * 16.0f * 2.0f,
                            colorUint32);
                debugImage2.drawLine(
                            LinAlg::Fill<int>(tracks[i].screenSpacePositions[1][0] * 0.5f * image2.getLevel(0).getWidth() + image2.getLevel(0).getWidth()*0.5f,
                                              tracks[i].screenSpacePositions[1][1] * 0.5f * image2.getLevel(0).getWidth() + image2.getLevel(0).getHeight()*0.5f),
                            LinAlg::Fill<int>(SP2[0] * 0.5f * image2.getLevel(0).getWidth() + image2.getLevel(0).getWidth()*0.5f,
                                              SP2[1] * 0.5f * image2.getLevel(0).getWidth() + image2.getLevel(0).getHeight()*0.5f),
                            0xFF0000FF);
            }
        }
        debugImage1.writeToFile("offsets1.png");
        debugImage2.writeToFile("offsets2.png");
    }



}
