/*
 * JNpp - Java bindings for NPP, to be used with JCuda
 *
 * Copyright (c) 2010-2012 Marco Hutter - http://www.jcuda.org
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */

package jcuda.jnpp.utilnpp;

import static jcuda.jnpp.JNppUtils.*;
import static jcuda.runtime.JCuda.cudaMemcpy2D;
import static jcuda.runtime.cudaError.cudaSuccess;
import static jcuda.runtime.cudaMemcpyKind.*;
import jcuda.jnpp.*;
import jcuda.jnpp.types.*;

/**
 * This class contains factory methods for creating CPU (host)
 * memory allocators for different data types. <br />
 * <br />
 * Note: These correspond to concrete template instantiations
 * of the classes that are defined in the ImageAllocatorsNPP 
 * header file.  
 */
class ImageAllocatorsNPP
{
    /**
     * Creates and returns a new Allocator
     * 
     * @return The allocator
     */
    static Allocator<Npp8u> allocator8u1()
    {
        return new Allocator_8u_1();
    }
    
    /**
     * Creates and returns a new Allocator
     * 
     * @return The allocator
     */
    static Allocator<Npp8u> allocator8u2()
    {
        return new Allocator_8u_2();
    }
    
    /**
     * Creates and returns a new Allocator
     * 
     * @return The allocator
     */
    static Allocator<Npp8u> allocator8u3()
    {
        return new Allocator_8u_3();
    }
    
    /**
     * Creates and returns a new Allocator
     * 
     * @return The allocator
     */
    static Allocator<Npp8u> allocator8u4()
    {
        return new Allocator_8u_4();
    }
    

    /**
     * Creates and returns a new Allocator
     * 
     * @return The allocator
     */
    static Allocator<Npp16u> allocator16u1()
    {
        return new Allocator_16u_1();
    }

    /**
     * Creates and returns a new Allocator
     * 
     * @return The allocator
     */
    static Allocator<Npp16u> allocator16u2()
    {
        return new Allocator_16u_2();
    }
    
    /**
     * Creates and returns a new Allocator
     * 
     * @return The allocator
     */
    static Allocator<Npp16u> allocator16u3()
    {
        return new Allocator_16u_3();
    }
    
    /**
     * Creates and returns a new Allocator
     * 
     * @return The allocator
     */
    static Allocator<Npp16u> allocator16u4()
    {
        return new Allocator_16u_4();
    }
    

    /**
     * Creates and returns a new Allocator
     * 
     * @return The allocator
     */
    static Allocator<Npp16s> allocator16s1()
    {
        return new Allocator_16s_1();
    }
    
    /**
     * Creates and returns a new Allocator
     * 
     * @return The allocator
     */
    static Allocator<Npp16s> allocator16s2()
    {
        return new Allocator_16s_2();
    }
    
    /**
     * Creates and returns a new Allocator
     * 
     * @return The allocator
     */
    static Allocator<Npp16s> allocator16s4()
    {
        return new Allocator_16s_4();
    }
    

    /**
     * Creates and returns a new Allocator
     * 
     * @return The allocator
     */
    static Allocator<Npp32s> allocator32s1()
    {
        return new Allocator_32s_1();
    }
    
    /**
     * Creates and returns a new Allocator
     * 
     * @return The allocator
     */
    static Allocator<Npp32s> allocator32s3()
    {
        return new Allocator_32s_3();
    }
    
    /**
     * Creates and returns a new Allocator
     * 
     * @return The allocator
     */
    static Allocator<Npp32s> allocator32s4()
    {
        return new Allocator_32s_4();
    }
    

    /**
     * Creates and returns a new Allocator
     * 
     * @return The allocator
     */
    static Allocator<Npp32f> allocator32f1()
    {
        return new Allocator_32f_1();
    }

    /**
     * Creates and returns a new Allocator
     * 
     * @return The allocator
     */
    static Allocator<Npp32f> allocator32f2()
    {
        return new Allocator_32f_2();
    }
    
    /**
     * Creates and returns a new Allocator
     * 
     * @return The allocator
     */
    static Allocator<Npp32f> allocator32f3()
    {
        return new Allocator_32f_3();
    }
    
    /**
     * Creates and returns a new Allocator
     * 
     * @return The allocator
     */
    static Allocator<Npp32f> allocator32f4()
    {
        return new Allocator_32f_4();
    }
    
    
    /**
     * Implementation of an Allocator
     */
    private static class Allocator_8u_1 implements Allocator<Npp8u>
    {
        /**
         * Allocates memory
         *
         * @param nWidth The width of the memory region
         * @param nHeight The width of the memory region
         * @param pPitch Returns the pitch of the memory
         */
        public TypedPointer<Npp8u> Malloc2D(int nWidth, int nHeight, int pPitch[])
        {
            assert(nWidth * nHeight > 0);
    
            TypedPointer<Npp8u> pResult =
                JNppi.nppiMalloc_8u_C1(nWidth, nHeight, pPitch);
            NPP_ASSERT(pResult != null);
            return pResult;
        }
    
        /**
         * Frees memory
         *
         * @param pPixels The memory to free
         */
        public void Free2D(TypedPointer<Npp8u> pPixels)
        {
            JNppi.nppiFree(pPixels);
        }
    
        /**
         * Copies memory from the device to the device
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void Copy2D(TypedPointer<Npp8u> pDst, long nDstPitch, TypedPointer<Npp8u> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 1 * sizeof(Npp8u.class), nHeight, cudaMemcpyDeviceToDevice);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
        /**
         * Copies memory from the host to the device
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void HostToDeviceCopy2D(TypedPointer<Npp8u> pDst, long nDstPitch, TypedPointer<Npp8u> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 1 * sizeof(Npp8u.class), nHeight, cudaMemcpyHostToDevice);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
        /**
         * Copies memory from the device to the host
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void DeviceToHostCopy2D(TypedPointer<Npp8u> pDst, long nDstPitch, TypedPointer<Npp8u> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 1 * sizeof(Npp8u.class), nHeight, cudaMemcpyDeviceToHost);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
    
    }


    /**
     * Implementation of an Allocator
     */
    private static class Allocator_8u_2 implements Allocator<Npp8u>
    {
        /**
         * Allocates memory
         *
         * @param nWidth The width of the memory region
         * @param nHeight The width of the memory region
         * @param pPitch Returns the pitch of the memory
         */
        public TypedPointer<Npp8u> Malloc2D(int nWidth, int nHeight, int pPitch[])
        {
            assert(nWidth * nHeight > 0);
    
            TypedPointer<Npp8u> pResult =
                JNppi.nppiMalloc_8u_C2(nWidth, nHeight, pPitch);
            NPP_ASSERT(pResult != null);
            return pResult;
        }
    
        /**
         * Frees memory
         *
         * @param pPixels The memory to free
         */
        public void Free2D(TypedPointer<Npp8u> pPixels)
        {
            JNppi.nppiFree(pPixels);
        }
    
        /**
         * Copies memory from the device to the device
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void Copy2D(TypedPointer<Npp8u> pDst, long nDstPitch, TypedPointer<Npp8u> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 2 * sizeof(Npp8u.class), nHeight, cudaMemcpyDeviceToDevice);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
        /**
         * Copies memory from the host to the device
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void HostToDeviceCopy2D(TypedPointer<Npp8u> pDst, long nDstPitch, TypedPointer<Npp8u> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 2 * sizeof(Npp8u.class), nHeight, cudaMemcpyHostToDevice);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
        /**
         * Copies memory from the device to the host
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void DeviceToHostCopy2D(TypedPointer<Npp8u> pDst, long nDstPitch, TypedPointer<Npp8u> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 2 * sizeof(Npp8u.class), nHeight, cudaMemcpyDeviceToHost);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
    
    }


    /**
     * Implementation of an Allocator
     */
    private static class Allocator_8u_3 implements Allocator<Npp8u>
    {
        /**
         * Allocates memory
         *
         * @param nWidth The width of the memory region
         * @param nHeight The width of the memory region
         * @param pPitch Returns the pitch of the memory
         */
        public TypedPointer<Npp8u> Malloc2D(int nWidth, int nHeight, int pPitch[])
        {
            assert(nWidth * nHeight > 0);
    
            TypedPointer<Npp8u> pResult =
                JNppi.nppiMalloc_8u_C3(nWidth, nHeight, pPitch);
            NPP_ASSERT(pResult != null);
            return pResult;
        }
    
        /**
         * Frees memory
         *
         * @param pPixels The memory to free
         */
        public void Free2D(TypedPointer<Npp8u> pPixels)
        {
            JNppi.nppiFree(pPixels);
        }
    
        /**
         * Copies memory from the device to the device
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void Copy2D(TypedPointer<Npp8u> pDst, long nDstPitch, TypedPointer<Npp8u> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 3 * sizeof(Npp8u.class), nHeight, cudaMemcpyDeviceToDevice);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
        /**
         * Copies memory from the host to the device
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void HostToDeviceCopy2D(TypedPointer<Npp8u> pDst, long nDstPitch, TypedPointer<Npp8u> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 3 * sizeof(Npp8u.class), nHeight, cudaMemcpyHostToDevice);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
        /**
         * Copies memory from the device to the host
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void DeviceToHostCopy2D(TypedPointer<Npp8u> pDst, long nDstPitch, TypedPointer<Npp8u> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 3 * sizeof(Npp8u.class), nHeight, cudaMemcpyDeviceToHost);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
    
    }


    /**
     * Implementation of an Allocator
     */
    private static class Allocator_8u_4 implements Allocator<Npp8u>
    {
        /**
         * Allocates memory
         *
         * @param nWidth The width of the memory region
         * @param nHeight The width of the memory region
         * @param pPitch Returns the pitch of the memory
         */
        public TypedPointer<Npp8u> Malloc2D(int nWidth, int nHeight, int pPitch[])
        {
            assert(nWidth * nHeight > 0);
    
            TypedPointer<Npp8u> pResult =
                JNppi.nppiMalloc_8u_C4(nWidth, nHeight, pPitch);
            NPP_ASSERT(pResult != null);
            return pResult;
        }
    
        /**
         * Frees memory
         *
         * @param pPixels The memory to free
         */
        public void Free2D(TypedPointer<Npp8u> pPixels)
        {
            JNppi.nppiFree(pPixels);
        }
    
        /**
         * Copies memory from the device to the device
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void Copy2D(TypedPointer<Npp8u> pDst, long nDstPitch, TypedPointer<Npp8u> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 4 * sizeof(Npp8u.class), nHeight, cudaMemcpyDeviceToDevice);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
        /**
         * Copies memory from the host to the device
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void HostToDeviceCopy2D(TypedPointer<Npp8u> pDst, long nDstPitch, TypedPointer<Npp8u> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 4 * sizeof(Npp8u.class), nHeight, cudaMemcpyHostToDevice);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
        /**
         * Copies memory from the device to the host
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void DeviceToHostCopy2D(TypedPointer<Npp8u> pDst, long nDstPitch, TypedPointer<Npp8u> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 4 * sizeof(Npp8u.class), nHeight, cudaMemcpyDeviceToHost);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
    
    }


    /**
     * Implementation of an Allocator
     */
    private static class Allocator_16u_1 implements Allocator<Npp16u>
    {
        /**
         * Allocates memory
         *
         * @param nWidth The width of the memory region
         * @param nHeight The width of the memory region
         * @param pPitch Returns the pitch of the memory
         */
        public TypedPointer<Npp16u> Malloc2D(int nWidth, int nHeight, int pPitch[])
        {
            assert(nWidth * nHeight > 0);
    
            TypedPointer<Npp16u> pResult =
                JNppi.nppiMalloc_16u_C1(nWidth, nHeight, pPitch);
            NPP_ASSERT(pResult != null);
            return pResult;
        }
    
        /**
         * Frees memory
         *
         * @param pPixels The memory to free
         */
        public void Free2D(TypedPointer<Npp16u> pPixels)
        {
            JNppi.nppiFree(pPixels);
        }
    
        /**
         * Copies memory from the device to the device
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void Copy2D(TypedPointer<Npp16u> pDst, long nDstPitch, TypedPointer<Npp16u> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 1 * sizeof(Npp16u.class), nHeight, cudaMemcpyDeviceToDevice);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
        /**
         * Copies memory from the host to the device
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void HostToDeviceCopy2D(TypedPointer<Npp16u> pDst, long nDstPitch, TypedPointer<Npp16u> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 1 * sizeof(Npp16u.class), nHeight, cudaMemcpyHostToDevice);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
        /**
         * Copies memory from the device to the host
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void DeviceToHostCopy2D(TypedPointer<Npp16u> pDst, long nDstPitch, TypedPointer<Npp16u> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 1 * sizeof(Npp16u.class), nHeight, cudaMemcpyDeviceToHost);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
    
    }


    /**
     * Implementation of an Allocator
     */
    private static class Allocator_16u_2 implements Allocator<Npp16u>
    {
        /**
         * Allocates memory
         *
         * @param nWidth The width of the memory region
         * @param nHeight The width of the memory region
         * @param pPitch Returns the pitch of the memory
         */
        public TypedPointer<Npp16u> Malloc2D(int nWidth, int nHeight, int pPitch[])
        {
            assert(nWidth * nHeight > 0);
    
            TypedPointer<Npp16u> pResult =
                JNppi.nppiMalloc_16u_C2(nWidth, nHeight, pPitch);
            NPP_ASSERT(pResult != null);
            return pResult;
        }
    
        /**
         * Frees memory
         *
         * @param pPixels The memory to free
         */
        public void Free2D(TypedPointer<Npp16u> pPixels)
        {
            JNppi.nppiFree(pPixels);
        }
    
        /**
         * Copies memory from the device to the device
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void Copy2D(TypedPointer<Npp16u> pDst, long nDstPitch, TypedPointer<Npp16u> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 2 * sizeof(Npp16u.class), nHeight, cudaMemcpyDeviceToDevice);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
        /**
         * Copies memory from the host to the device
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void HostToDeviceCopy2D(TypedPointer<Npp16u> pDst, long nDstPitch, TypedPointer<Npp16u> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 2 * sizeof(Npp16u.class), nHeight, cudaMemcpyHostToDevice);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
        /**
         * Copies memory from the device to the host
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void DeviceToHostCopy2D(TypedPointer<Npp16u> pDst, long nDstPitch, TypedPointer<Npp16u> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 2 * sizeof(Npp16u.class), nHeight, cudaMemcpyDeviceToHost);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
    
    }


    /**
     * Implementation of an Allocator
     */
    private static class Allocator_16u_3 implements Allocator<Npp16u>
    {
        /**
         * Allocates memory
         *
         * @param nWidth The width of the memory region
         * @param nHeight The width of the memory region
         * @param pPitch Returns the pitch of the memory
         */
        public TypedPointer<Npp16u> Malloc2D(int nWidth, int nHeight, int pPitch[])
        {
            assert(nWidth * nHeight > 0);
    
            TypedPointer<Npp16u> pResult =
                JNppi.nppiMalloc_16u_C3(nWidth, nHeight, pPitch);
            NPP_ASSERT(pResult != null);
            return pResult;
        }
    
        /**
         * Frees memory
         *
         * @param pPixels The memory to free
         */
        public void Free2D(TypedPointer<Npp16u> pPixels)
        {
            JNppi.nppiFree(pPixels);
        }
    
        /**
         * Copies memory from the device to the device
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void Copy2D(TypedPointer<Npp16u> pDst, long nDstPitch, TypedPointer<Npp16u> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 3 * sizeof(Npp16u.class), nHeight, cudaMemcpyDeviceToDevice);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
        /**
         * Copies memory from the host to the device
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void HostToDeviceCopy2D(TypedPointer<Npp16u> pDst, long nDstPitch, TypedPointer<Npp16u> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 3 * sizeof(Npp16u.class), nHeight, cudaMemcpyHostToDevice);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
        /**
         * Copies memory from the device to the host
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void DeviceToHostCopy2D(TypedPointer<Npp16u> pDst, long nDstPitch, TypedPointer<Npp16u> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 3 * sizeof(Npp16u.class), nHeight, cudaMemcpyDeviceToHost);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
    
    }


    /**
     * Implementation of an Allocator
     */
    private static class Allocator_16u_4 implements Allocator<Npp16u>
    {
        /**
         * Allocates memory
         *
         * @param nWidth The width of the memory region
         * @param nHeight The width of the memory region
         * @param pPitch Returns the pitch of the memory
         */
        public TypedPointer<Npp16u> Malloc2D(int nWidth, int nHeight, int pPitch[])
        {
            assert(nWidth * nHeight > 0);
    
            TypedPointer<Npp16u> pResult =
                JNppi.nppiMalloc_16u_C4(nWidth, nHeight, pPitch);
            NPP_ASSERT(pResult != null);
            return pResult;
        }
    
        /**
         * Frees memory
         *
         * @param pPixels The memory to free
         */
        public void Free2D(TypedPointer<Npp16u> pPixels)
        {
            JNppi.nppiFree(pPixels);
        }
    
        /**
         * Copies memory from the device to the device
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void Copy2D(TypedPointer<Npp16u> pDst, long nDstPitch, TypedPointer<Npp16u> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 4 * sizeof(Npp16u.class), nHeight, cudaMemcpyDeviceToDevice);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
        /**
         * Copies memory from the host to the device
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void HostToDeviceCopy2D(TypedPointer<Npp16u> pDst, long nDstPitch, TypedPointer<Npp16u> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 4 * sizeof(Npp16u.class), nHeight, cudaMemcpyHostToDevice);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
        /**
         * Copies memory from the device to the host
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void DeviceToHostCopy2D(TypedPointer<Npp16u> pDst, long nDstPitch, TypedPointer<Npp16u> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 4 * sizeof(Npp16u.class), nHeight, cudaMemcpyDeviceToHost);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
    
    }


    /**
     * Implementation of an Allocator
     */
    private static class Allocator_16s_1 implements Allocator<Npp16s>
    {
        /**
         * Allocates memory
         *
         * @param nWidth The width of the memory region
         * @param nHeight The width of the memory region
         * @param pPitch Returns the pitch of the memory
         */
        public TypedPointer<Npp16s> Malloc2D(int nWidth, int nHeight, int pPitch[])
        {
            assert(nWidth * nHeight > 0);
    
            TypedPointer<Npp16s> pResult =
                JNppi.nppiMalloc_16s_C1(nWidth, nHeight, pPitch);
            NPP_ASSERT(pResult != null);
            return pResult;
        }
    
        /**
         * Frees memory
         *
         * @param pPixels The memory to free
         */
        public void Free2D(TypedPointer<Npp16s> pPixels)
        {
            JNppi.nppiFree(pPixels);
        }
    
        /**
         * Copies memory from the device to the device
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void Copy2D(TypedPointer<Npp16s> pDst, long nDstPitch, TypedPointer<Npp16s> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 1 * sizeof(Npp16s.class), nHeight, cudaMemcpyDeviceToDevice);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
        /**
         * Copies memory from the host to the device
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void HostToDeviceCopy2D(TypedPointer<Npp16s> pDst, long nDstPitch, TypedPointer<Npp16s> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 1 * sizeof(Npp16s.class), nHeight, cudaMemcpyHostToDevice);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
        /**
         * Copies memory from the device to the host
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void DeviceToHostCopy2D(TypedPointer<Npp16s> pDst, long nDstPitch, TypedPointer<Npp16s> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 1 * sizeof(Npp16s.class), nHeight, cudaMemcpyDeviceToHost);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
    
    }


    /**
     * Implementation of an Allocator
     */
    private static class Allocator_16s_2 implements Allocator<Npp16s>
    {
        /**
         * Allocates memory
         *
         * @param nWidth The width of the memory region
         * @param nHeight The width of the memory region
         * @param pPitch Returns the pitch of the memory
         */
        public TypedPointer<Npp16s> Malloc2D(int nWidth, int nHeight, int pPitch[])
        {
            assert(nWidth * nHeight > 0);
    
            TypedPointer<Npp16s> pResult =
                JNppi.nppiMalloc_16s_C2(nWidth, nHeight, pPitch);
            NPP_ASSERT(pResult != null);
            return pResult;
        }
    
        /**
         * Frees memory
         *
         * @param pPixels The memory to free
         */
        public void Free2D(TypedPointer<Npp16s> pPixels)
        {
            JNppi.nppiFree(pPixels);
        }
    
        /**
         * Copies memory from the device to the device
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void Copy2D(TypedPointer<Npp16s> pDst, long nDstPitch, TypedPointer<Npp16s> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 2 * sizeof(Npp16s.class), nHeight, cudaMemcpyDeviceToDevice);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
        /**
         * Copies memory from the host to the device
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void HostToDeviceCopy2D(TypedPointer<Npp16s> pDst, long nDstPitch, TypedPointer<Npp16s> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 2 * sizeof(Npp16s.class), nHeight, cudaMemcpyHostToDevice);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
        /**
         * Copies memory from the device to the host
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void DeviceToHostCopy2D(TypedPointer<Npp16s> pDst, long nDstPitch, TypedPointer<Npp16s> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 2 * sizeof(Npp16s.class), nHeight, cudaMemcpyDeviceToHost);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
    
    }


    /**
     * Implementation of an Allocator
     */
    private static class Allocator_16s_4 implements Allocator<Npp16s>
    {
        /**
         * Allocates memory
         *
         * @param nWidth The width of the memory region
         * @param nHeight The width of the memory region
         * @param pPitch Returns the pitch of the memory
         */
        public TypedPointer<Npp16s> Malloc2D(int nWidth, int nHeight, int pPitch[])
        {
            assert(nWidth * nHeight > 0);
    
            TypedPointer<Npp16s> pResult =
                JNppi.nppiMalloc_16s_C4(nWidth, nHeight, pPitch);
            NPP_ASSERT(pResult != null);
            return pResult;
        }
    
        /**
         * Frees memory
         *
         * @param pPixels The memory to free
         */
        public void Free2D(TypedPointer<Npp16s> pPixels)
        {
            JNppi.nppiFree(pPixels);
        }
    
        /**
         * Copies memory from the device to the device
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void Copy2D(TypedPointer<Npp16s> pDst, long nDstPitch, TypedPointer<Npp16s> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 4 * sizeof(Npp16s.class), nHeight, cudaMemcpyDeviceToDevice);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
        /**
         * Copies memory from the host to the device
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void HostToDeviceCopy2D(TypedPointer<Npp16s> pDst, long nDstPitch, TypedPointer<Npp16s> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 4 * sizeof(Npp16s.class), nHeight, cudaMemcpyHostToDevice);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
        /**
         * Copies memory from the device to the host
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void DeviceToHostCopy2D(TypedPointer<Npp16s> pDst, long nDstPitch, TypedPointer<Npp16s> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 4 * sizeof(Npp16s.class), nHeight, cudaMemcpyDeviceToHost);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
    
    }


    /**
     * Implementation of an Allocator
     */
    private static class Allocator_32s_1 implements Allocator<Npp32s>
    {
        /**
         * Allocates memory
         *
         * @param nWidth The width of the memory region
         * @param nHeight The width of the memory region
         * @param pPitch Returns the pitch of the memory
         */
        public TypedPointer<Npp32s> Malloc2D(int nWidth, int nHeight, int pPitch[])
        {
            assert(nWidth * nHeight > 0);
    
            TypedPointer<Npp32s> pResult =
                JNppi.nppiMalloc_32s_C1(nWidth, nHeight, pPitch);
            NPP_ASSERT(pResult != null);
            return pResult;
        }
    
        /**
         * Frees memory
         *
         * @param pPixels The memory to free
         */
        public void Free2D(TypedPointer<Npp32s> pPixels)
        {
            JNppi.nppiFree(pPixels);
        }
    
        /**
         * Copies memory from the device to the device
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void Copy2D(TypedPointer<Npp32s> pDst, long nDstPitch, TypedPointer<Npp32s> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 1 * sizeof(Npp32s.class), nHeight, cudaMemcpyDeviceToDevice);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
        /**
         * Copies memory from the host to the device
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void HostToDeviceCopy2D(TypedPointer<Npp32s> pDst, long nDstPitch, TypedPointer<Npp32s> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 1 * sizeof(Npp32s.class), nHeight, cudaMemcpyHostToDevice);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
        /**
         * Copies memory from the device to the host
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void DeviceToHostCopy2D(TypedPointer<Npp32s> pDst, long nDstPitch, TypedPointer<Npp32s> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 1 * sizeof(Npp32s.class), nHeight, cudaMemcpyDeviceToHost);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
    
    }


    /**
     * Implementation of an Allocator
     */
    private static class Allocator_32s_3 implements Allocator<Npp32s>
    {
        /**
         * Allocates memory
         *
         * @param nWidth The width of the memory region
         * @param nHeight The width of the memory region
         * @param pPitch Returns the pitch of the memory
         */
        public TypedPointer<Npp32s> Malloc2D(int nWidth, int nHeight, int pPitch[])
        {
            assert(nWidth * nHeight > 0);
    
            TypedPointer<Npp32s> pResult =
                JNppi.nppiMalloc_32s_C3(nWidth, nHeight, pPitch);
            NPP_ASSERT(pResult != null);
            return pResult;
        }
    
        /**
         * Frees memory
         *
         * @param pPixels The memory to free
         */
        public void Free2D(TypedPointer<Npp32s> pPixels)
        {
            JNppi.nppiFree(pPixels);
        }
    
        /**
         * Copies memory from the device to the device
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void Copy2D(TypedPointer<Npp32s> pDst, long nDstPitch, TypedPointer<Npp32s> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 3 * sizeof(Npp32s.class), nHeight, cudaMemcpyDeviceToDevice);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
        /**
         * Copies memory from the host to the device
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void HostToDeviceCopy2D(TypedPointer<Npp32s> pDst, long nDstPitch, TypedPointer<Npp32s> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 3 * sizeof(Npp32s.class), nHeight, cudaMemcpyHostToDevice);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
        /**
         * Copies memory from the device to the host
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void DeviceToHostCopy2D(TypedPointer<Npp32s> pDst, long nDstPitch, TypedPointer<Npp32s> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 3 * sizeof(Npp32s.class), nHeight, cudaMemcpyDeviceToHost);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
    
    }


    /**
     * Implementation of an Allocator
     */
    private static class Allocator_32s_4 implements Allocator<Npp32s>
    {
        /**
         * Allocates memory
         *
         * @param nWidth The width of the memory region
         * @param nHeight The width of the memory region
         * @param pPitch Returns the pitch of the memory
         */
        public TypedPointer<Npp32s> Malloc2D(int nWidth, int nHeight, int pPitch[])
        {
            assert(nWidth * nHeight > 0);
    
            TypedPointer<Npp32s> pResult =
                JNppi.nppiMalloc_32s_C4(nWidth, nHeight, pPitch);
            NPP_ASSERT(pResult != null);
            return pResult;
        }
    
        /**
         * Frees memory
         *
         * @param pPixels The memory to free
         */
        public void Free2D(TypedPointer<Npp32s> pPixels)
        {
            JNppi.nppiFree(pPixels);
        }
    
        /**
         * Copies memory from the device to the device
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void Copy2D(TypedPointer<Npp32s> pDst, long nDstPitch, TypedPointer<Npp32s> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 4 * sizeof(Npp32s.class), nHeight, cudaMemcpyDeviceToDevice);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
        /**
         * Copies memory from the host to the device
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void HostToDeviceCopy2D(TypedPointer<Npp32s> pDst, long nDstPitch, TypedPointer<Npp32s> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 4 * sizeof(Npp32s.class), nHeight, cudaMemcpyHostToDevice);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
        /**
         * Copies memory from the device to the host
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void DeviceToHostCopy2D(TypedPointer<Npp32s> pDst, long nDstPitch, TypedPointer<Npp32s> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 4 * sizeof(Npp32s.class), nHeight, cudaMemcpyDeviceToHost);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
    
    }


    /**
     * Implementation of an Allocator
     */
    private static class Allocator_32f_1 implements Allocator<Npp32f>
    {
        /**
         * Allocates memory
         *
         * @param nWidth The width of the memory region
         * @param nHeight The width of the memory region
         * @param pPitch Returns the pitch of the memory
         */
        public TypedPointer<Npp32f> Malloc2D(int nWidth, int nHeight, int pPitch[])
        {
            assert(nWidth * nHeight > 0);
    
            TypedPointer<Npp32f> pResult =
                JNppi.nppiMalloc_32f_C1(nWidth, nHeight, pPitch);
            NPP_ASSERT(pResult != null);
            return pResult;
        }
    
        /**
         * Frees memory
         *
         * @param pPixels The memory to free
         */
        public void Free2D(TypedPointer<Npp32f> pPixels)
        {
            JNppi.nppiFree(pPixels);
        }
    
        /**
         * Copies memory from the device to the device
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void Copy2D(TypedPointer<Npp32f> pDst, long nDstPitch, TypedPointer<Npp32f> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 1 * sizeof(Npp32f.class), nHeight, cudaMemcpyDeviceToDevice);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
        /**
         * Copies memory from the host to the device
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void HostToDeviceCopy2D(TypedPointer<Npp32f> pDst, long nDstPitch, TypedPointer<Npp32f> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 1 * sizeof(Npp32f.class), nHeight, cudaMemcpyHostToDevice);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
        /**
         * Copies memory from the device to the host
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void DeviceToHostCopy2D(TypedPointer<Npp32f> pDst, long nDstPitch, TypedPointer<Npp32f> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 1 * sizeof(Npp32f.class), nHeight, cudaMemcpyDeviceToHost);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
    
    }


    /**
     * Implementation of an Allocator
     */
    private static class Allocator_32f_2 implements Allocator<Npp32f>
    {
        /**
         * Allocates memory
         *
         * @param nWidth The width of the memory region
         * @param nHeight The width of the memory region
         * @param pPitch Returns the pitch of the memory
         */
        public TypedPointer<Npp32f> Malloc2D(int nWidth, int nHeight, int pPitch[])
        {
            assert(nWidth * nHeight > 0);
    
            TypedPointer<Npp32f> pResult =
                JNppi.nppiMalloc_32f_C2(nWidth, nHeight, pPitch);
            NPP_ASSERT(pResult != null);
            return pResult;
        }
    
        /**
         * Frees memory
         *
         * @param pPixels The memory to free
         */
        public void Free2D(TypedPointer<Npp32f> pPixels)
        {
            JNppi.nppiFree(pPixels);
        }
    
        /**
         * Copies memory from the device to the device
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void Copy2D(TypedPointer<Npp32f> pDst, long nDstPitch, TypedPointer<Npp32f> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 2 * sizeof(Npp32f.class), nHeight, cudaMemcpyDeviceToDevice);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
        /**
         * Copies memory from the host to the device
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void HostToDeviceCopy2D(TypedPointer<Npp32f> pDst, long nDstPitch, TypedPointer<Npp32f> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 2 * sizeof(Npp32f.class), nHeight, cudaMemcpyHostToDevice);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
        /**
         * Copies memory from the device to the host
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void DeviceToHostCopy2D(TypedPointer<Npp32f> pDst, long nDstPitch, TypedPointer<Npp32f> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 2 * sizeof(Npp32f.class), nHeight, cudaMemcpyDeviceToHost);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
    
    }


    /**
     * Implementation of an Allocator
     */
    private static class Allocator_32f_3 implements Allocator<Npp32f>
    {
        /**
         * Allocates memory
         *
         * @param nWidth The width of the memory region
         * @param nHeight The width of the memory region
         * @param pPitch Returns the pitch of the memory
         */
        public TypedPointer<Npp32f> Malloc2D(int nWidth, int nHeight, int pPitch[])
        {
            assert(nWidth * nHeight > 0);
    
            TypedPointer<Npp32f> pResult =
                JNppi.nppiMalloc_32f_C3(nWidth, nHeight, pPitch);
            NPP_ASSERT(pResult != null);
            return pResult;
        }
    
        /**
         * Frees memory
         *
         * @param pPixels The memory to free
         */
        public void Free2D(TypedPointer<Npp32f> pPixels)
        {
            JNppi.nppiFree(pPixels);
        }
    
        /**
         * Copies memory from the device to the device
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void Copy2D(TypedPointer<Npp32f> pDst, long nDstPitch, TypedPointer<Npp32f> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 3 * sizeof(Npp32f.class), nHeight, cudaMemcpyDeviceToDevice);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
        /**
         * Copies memory from the host to the device
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void HostToDeviceCopy2D(TypedPointer<Npp32f> pDst, long nDstPitch, TypedPointer<Npp32f> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 3 * sizeof(Npp32f.class), nHeight, cudaMemcpyHostToDevice);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
        /**
         * Copies memory from the device to the host
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void DeviceToHostCopy2D(TypedPointer<Npp32f> pDst, long nDstPitch, TypedPointer<Npp32f> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 3 * sizeof(Npp32f.class), nHeight, cudaMemcpyDeviceToHost);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
    
    }


    /**
     * Implementation of an Allocator
     */
    private static class Allocator_32f_4 implements Allocator<Npp32f>
    {
        /**
         * Allocates memory
         *
         * @param nWidth The width of the memory region
         * @param nHeight The width of the memory region
         * @param pPitch Returns the pitch of the memory
         */
        public TypedPointer<Npp32f> Malloc2D(int nWidth, int nHeight, int pPitch[])
        {
            assert(nWidth * nHeight > 0);
    
            TypedPointer<Npp32f> pResult =
                JNppi.nppiMalloc_32f_C4(nWidth, nHeight, pPitch);
            NPP_ASSERT(pResult != null);
            return pResult;
        }
    
        /**
         * Frees memory
         *
         * @param pPixels The memory to free
         */
        public void Free2D(TypedPointer<Npp32f> pPixels)
        {
            JNppi.nppiFree(pPixels);
        }
    
        /**
         * Copies memory from the device to the device
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void Copy2D(TypedPointer<Npp32f> pDst, long nDstPitch, TypedPointer<Npp32f> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 4 * sizeof(Npp32f.class), nHeight, cudaMemcpyDeviceToDevice);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
        /**
         * Copies memory from the host to the device
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void HostToDeviceCopy2D(TypedPointer<Npp32f> pDst, long nDstPitch, TypedPointer<Npp32f> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 4 * sizeof(Npp32f.class), nHeight, cudaMemcpyHostToDevice);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
        /**
         * Copies memory from the device to the host
         *
         * @param pDst The destination pointer
         * @param nDstPitch The pitch of the destination
         * @param pSrc The source pointer
         * @param nSrcPitch The pitch of the source
         * @param nWidth The width of the memory region
         * @param nHeight The height of the memory region
         */
        public void DeviceToHostCopy2D(TypedPointer<Npp32f> pDst, long nDstPitch, TypedPointer<Npp32f> pSrc, long nSrcPitch, long nWidth, long nHeight)
        {
            int eResult;
            eResult = cudaMemcpy2D(pDst, nDstPitch, pSrc, nSrcPitch, nWidth * 4 * sizeof(Npp32f.class), nHeight, cudaMemcpyDeviceToHost);
            NPP_ASSERT(cudaSuccess == eResult);
        }
    
    
    }
    
}
