/*
    Structure from Motion with Deferred Feature Matching and Subset Bundle Adjustment
    Copyright (C) 2015 Andreas Ley <andy-ley@arcor.de>

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

#include "HomographyImageResampling.cuh"

texture<uchar4, 2, cudaReadModeNormalizedFloat> sourceImage;

extern "C" __global__ void HomographyImageResampling(HomographyImageResamplingKernelParams kernelParams)
{
    unsigned x = blockIdx.x * 16 + threadIdx.x;
    unsigned y = blockIdx.y * 16 + threadIdx.y;

    float fx = x+0.5f;
    float fy = y+0.5f;

    float Tx = fx * kernelParams.homography[0*3+0] +
               fy * kernelParams.homography[0*3+1] +
                    kernelParams.homography[0*3+2];

    float Ty = fx * kernelParams.homography[1*3+0] +
               fy * kernelParams.homography[1*3+1] +
                    kernelParams.homography[1*3+2];

    float Tw = fx * kernelParams.homography[2*3+0] +
               fy * kernelParams.homography[2*3+1] +
                    kernelParams.homography[2*3+2];

    float u = Tx / Tw;
    float v = Ty / Tw;

    float scaledU = u * kernelParams.srcWidth;
    float scaledV = v * kernelParams.srcHeight;

    float dudx = scaledU - __shfl_xor(scaledU, 1); // sign may be flipped
    float dvdx = scaledV - __shfl_xor(scaledV, 1); // sign may be flipped

    float dudy = scaledU - __shfl_xor(scaledU, 16); // sign may be flipped
    float dvdy = scaledV - __shfl_xor(scaledV, 16); // sign may be flipped

    float sqrScale = fmax(dudx*dudx + dvdx*dvdx, dudy*dudy + dvdy*dvdy);


    if ((x < kernelParams.width) && (y < kernelParams.height)) {
        float lod = log2f(sqrScale) * 0.5f + kernelParams.LODbias;

        float4 sample = tex2DLod(sourceImage, u, v, lod);

        float r = sample.x * (sample.w * 256.0f);
        float g = sample.y * (sample.w * 256.0f);
        float b = sample.z * (sample.w * 256.0f);

        r = fmin(r + 0.5f, 255);
        g = fmin(g + 0.5f, 255);
        b = fmin(b + 0.5f, 255);

        unsigned ir = r;
        unsigned ig = g;
        unsigned ib = b;

        unsigned packed = (ir << 0) |
                          (ig << 8) |
                          (ib << 16) |
                          (0xFFu << 24);

        kernelParams.dstRGBA[y*kernelParams.width+x] = packed;
    }
}
