/*
    Structure from Motion with Deferred Feature Matching and Subset Bundle Adjustment
    Copyright (C) 2015 Andreas Ley <andy-ley@arcor.de>

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

#ifndef WARPLOADSTORE_HPP_INCLUDED
#define WARPLOADSTORE_HPP_INCLUDED


#include <stdio.h>

#include "cuUtilHelpers.hpp"
#include "../cub/util_ptx_reduced.cuh"

namespace cuUtils {


template<unsigned size>
__device__ void warpMemcpy(void * __restrict__ dst, const void * __restrict__ src) {

    const unsigned numFullWords = size/4;
    const unsigned numRemainingBytes = size - numFullWords*4;

    unsigned *wordDst = (unsigned*) dst;
    const unsigned *wordSrc = (const unsigned*) src;

    const unsigned numFullWarpCopys = numFullWords / WARP_SIZE;
    const unsigned remainigWords = numFullWords - numFullWarpCopys*WARP_SIZE;

    #pragma unroll
    for (int i = 0; i < (int)numFullWarpCopys; i++) {
        wordDst[i*WARP_SIZE+cub::LaneId()] = wordSrc[i*WARP_SIZE+cub::LaneId()];
    }

    if ((int)cub::LaneId() < (int)remainigWords) {
        wordDst[numFullWarpCopys*WARP_SIZE + cub::LaneId()] = wordSrc[numFullWarpCopys*WARP_SIZE + cub::LaneId()];
    }

    if (numRemainingBytes > 0) {
        unsigned char *byteDst = (unsigned char*) dst;
        const unsigned char *byteSrc = (const unsigned char*) src;
        if ((int)cub::LaneId() < (int)numRemainingBytes) {
            byteDst[numFullWords*4+cub::LaneId()] = byteSrc[numFullWords*4+cub::LaneId()];
        }
    }
}


template<typename StructType>
__device__ void warpCopy(StructType *dst, const StructType *src) {
    warpMemcpy<sizeof(StructType)>(dst, src);
}



}


#endif // WARPLOADSTORE_HPP_INCLUDED
