/*
    Structure from Motion with Deferred Feature Matching and Subset Bundle Adjustment
    Copyright (C) 2015 Andreas Ley <andy-ley@arcor.de>

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/
#include "CudaTextureMemory.h"
#include "CudaDriver.h"
#include <string.h>
#include "../tools/misc.hpp"

namespace CudaUtils {

unsigned BaseCudaTextureMemory::getFormatSize(CUarray_format format)
{
    switch (format) {
        case CU_AD_FORMAT_UNSIGNED_INT8:
        case CU_AD_FORMAT_SIGNED_INT8:
            return 1;
        case CU_AD_FORMAT_UNSIGNED_INT16:
        case CU_AD_FORMAT_SIGNED_INT16:
        case CU_AD_FORMAT_HALF:
            return 2;
        case CU_AD_FORMAT_UNSIGNED_INT32:
        case CU_AD_FORMAT_SIGNED_INT32:
        case CU_AD_FORMAT_FLOAT:
            return 4;
        default:
            throw std::runtime_error("Invalid format");
    }
}

BaseCudaTextureMemory::BaseCudaTextureMemory()
{

}

BaseCudaTextureMemory::~BaseCudaTextureMemory()
{

}

void BaseCudaTextureMemory::syncUploadAll(const void *src, unsigned pitchInBytes, unsigned heightInPixels)
{
    if (m_descriptor.Depth > 0) {
        CUDA_MEMCPY3D descriptor;
        memset(&descriptor, 0, sizeof(descriptor));
        descriptor.srcMemoryType = CU_MEMORYTYPE_HOST;
        descriptor.srcPitch = pitchInBytes;
        descriptor.srcHeight = heightInPixels;
        descriptor.srcHost = src;

        descriptor.WidthInBytes = m_descriptor.Width * getFormatSize(m_descriptor.Format) * m_descriptor.NumChannels;
        descriptor.Height = m_descriptor.Height;
        descriptor.Depth = m_descriptor.Depth;

        descriptor.dstMemoryType = CU_MEMORYTYPE_ARRAY;
        descriptor.dstArray = m_handle;

        CudaDriver::throwOnCudaError(cuMemcpy3D(&descriptor), __FILE__, __LINE__);
    } else {
        if (m_descriptor.Height > 0) {
            CUDA_MEMCPY2D descriptor;
            memset(&descriptor, 0, sizeof(descriptor));
            descriptor.srcMemoryType = CU_MEMORYTYPE_HOST;
            descriptor.srcPitch = pitchInBytes;
            descriptor.srcHost = src;

            descriptor.WidthInBytes = m_descriptor.Width * getFormatSize(m_descriptor.Format) * m_descriptor.NumChannels;
            descriptor.Height = m_descriptor.Height;

            descriptor.dstMemoryType = CU_MEMORYTYPE_ARRAY;
            descriptor.dstArray = m_handle;

            CudaDriver::throwOnCudaError(cuMemcpy2D(&descriptor), __FILE__, __LINE__);
        } else {
            CudaDriver::throwOnCudaError(cuMemcpyHtoA(m_handle, 0, src, m_descriptor.Width * getFormatSize(m_descriptor.Format) * m_descriptor.NumChannels), __FILE__, __LINE__);
        }
    }
}

void BaseCudaTextureMemory::syncDownloadAll(void *dst, unsigned pitchInBytes, unsigned heightInPixels)
{
    if (m_descriptor.Depth > 0) {
        CUDA_MEMCPY3D descriptor;
        memset(&descriptor, 0, sizeof(descriptor));
        descriptor.dstMemoryType = CU_MEMORYTYPE_HOST;
        descriptor.dstPitch = pitchInBytes;
        descriptor.dstHeight = heightInPixels;
        descriptor.dstHost = dst;

        descriptor.WidthInBytes = m_descriptor.Width * getFormatSize(m_descriptor.Format) * m_descriptor.NumChannels;
        descriptor.Height = m_descriptor.Height;
        descriptor.Depth = m_descriptor.Depth;

        descriptor.srcMemoryType = CU_MEMORYTYPE_ARRAY;
        descriptor.srcArray = m_handle;

        CudaDriver::throwOnCudaError(cuMemcpy3D(&descriptor), __FILE__, __LINE__);
    } else {
        if (m_descriptor.Height > 0) {
            CUDA_MEMCPY2D descriptor;
            memset(&descriptor, 0, sizeof(descriptor));
            descriptor.dstMemoryType = CU_MEMORYTYPE_HOST;
            descriptor.dstPitch = pitchInBytes;
            descriptor.dstHost = dst;

            descriptor.WidthInBytes = m_descriptor.Width * getFormatSize(m_descriptor.Format) * m_descriptor.NumChannels;
            descriptor.Height = m_descriptor.Height;

            descriptor.srcMemoryType = CU_MEMORYTYPE_ARRAY;
            descriptor.srcArray = m_handle;

            CudaDriver::throwOnCudaError(cuMemcpy2D(&descriptor), __FILE__, __LINE__);
        } else {
            CudaDriver::throwOnCudaError(cuMemcpyAtoH(dst, m_handle, 0, m_descriptor.Width * getFormatSize(m_descriptor.Format) * m_descriptor.NumChannels), __FILE__, __LINE__);
        }
    }
}


void BaseCudaTextureMemory::syncDownloadSingleLayer(void *dst, unsigned pitchInBytes, unsigned layer, unsigned heightInPixels)
{
    throw std::runtime_error("This is broken!");

    if ((m_descriptor.Format & CUDA_ARRAY3D_LAYERED) == 0)
        throw std::runtime_error("Texture is not a layered texture!");

    if (m_descriptor.Depth > 0) {
        CUDA_MEMCPY3D descriptor;
        memset(&descriptor, 0, sizeof(descriptor));
        descriptor.dstMemoryType = CU_MEMORYTYPE_HOST;
        descriptor.dstPitch = pitchInBytes;
        descriptor.dstHeight = heightInPixels;
        descriptor.dstHost = dst;

        descriptor.WidthInBytes = m_descriptor.Width * getFormatSize(m_descriptor.Format) * m_descriptor.NumChannels;
        descriptor.Height = m_descriptor.Height;
        descriptor.Depth = 1;

        descriptor.srcMemoryType = CU_MEMORYTYPE_ARRAY;
        descriptor.srcArray = m_handle;
        descriptor.srcZ = layer;

        CudaDriver::throwOnCudaError(cuMemcpy3D(&descriptor), __FILE__, __LINE__);
    } else {
        throw std::runtime_error("Texture is not a 3D texture!");
    }
}



CudaTextureMemory::CudaTextureMemory()
{
    m_handleValid = false;
}

CudaTextureMemory::~CudaTextureMemory()
{
    if (m_handleValid) {
        CudaDriver::throwOnCudaError(cuArrayDestroy(m_handle), __FILE__, __LINE__);
    }
}

void CudaTextureMemory::resize(unsigned width, unsigned height, unsigned depth, CUarray_format format, unsigned numComponents, unsigned flags)
{
    if (!m_handleValid ||
        (width != m_descriptor.Width) ||
        (height != m_descriptor.Height) ||
        (depth != m_descriptor.Depth) ||
        (format != m_descriptor.Format) ||
        (numComponents != m_descriptor.NumChannels) ||
        (flags != m_descriptor.Flags)) {


        if (m_handleValid) {
            CudaDriver::throwOnCudaError(cuArrayDestroy(m_handle), __FILE__, __LINE__);
        }

        m_descriptor.Width = width;
        m_descriptor.Height = height;
        m_descriptor.Depth = depth;
        m_descriptor.Format = format;
        m_descriptor.NumChannels = numComponents;
        m_descriptor.Flags = flags;

        CudaDriver::throwOnCudaError(cuArray3DCreate(&m_handle, &m_descriptor), __FILE__, __LINE__);
        m_handleValid = true;
    }

}

void CudaMipmappedTextureLevelMemory::setup(CudaMipmappedTexture *mipmappedTexture, unsigned level)
{
    m_mipmappedTexture = mipmappedTexture;
    m_level = level;
    CudaDriver::throwOnCudaError(cuMipmappedArrayGetLevel(&m_handle, m_mipmappedTexture->getHandle(), m_level), __FILE__, __LINE__);
    CudaDriver::throwOnCudaError(cuArray3DGetDescriptor(&m_descriptor, m_handle), __FILE__, __LINE__);
}



CudaMipmappedTexture::CudaMipmappedTexture()
{
    m_handleValid = false;
}

CudaMipmappedTexture::~CudaMipmappedTexture()
{
    if (m_handleValid)
        CudaDriver::throwOnCudaError(cuMipmappedArrayDestroy(m_handle), __FILE__, __LINE__);
}

void CudaMipmappedTexture::resize(unsigned width, unsigned height, unsigned depth, CUarray_format format, unsigned numComponents, unsigned flags, unsigned numLevel)
{
    if (numLevel == (unsigned)-1)
        numLevel = getMaxMipLevelCount(width, height);

    if (!m_handleValid ||
        (width != m_descriptor.Width) ||
        (height != m_descriptor.Height) ||
        (depth != m_descriptor.Depth) ||
        (format != m_descriptor.Format) ||
        (numComponents != m_descriptor.NumChannels) ||
        (flags != m_descriptor.Flags) ||
        m_level.size() != numLevel) {


        if (m_handleValid) {
            CudaDriver::throwOnCudaError(cuMipmappedArrayDestroy(m_handle), __FILE__, __LINE__);
        }

        m_descriptor.Width = width;
        m_descriptor.Height = height;
        m_descriptor.Depth = depth;
        m_descriptor.Format = format;
        m_descriptor.NumChannels = numComponents;
        m_descriptor.Flags = flags;

        CudaDriver::throwOnCudaError(cuMipmappedArrayCreate(&m_handle, &m_descriptor, numLevel), __FILE__, __LINE__);
        m_handleValid = true;

        m_level.resize(numLevel);
        for (unsigned i = 0; i < m_level.size(); i++) {
            m_level[i].setup(this, i);
        }
    }

}

unsigned CudaMipmappedTexture::getMaxMipLevelCount(unsigned width, unsigned height)
{
    return ilog2(std::max(width, height));
}

}
