Struct rcudnn::API

source ·
pub struct API;
Expand description

Defines the Cuda cuDNN API.

Implementations§

source§

impl API

source

pub fn create_activation_descriptor( ) -> Result<cudnnActivationDescriptor_t, Error>

Create a generic CUDA cuDNN ActivationDescriptor.

source

pub fn destroy_activation_descriptor( desc: cudnnActivationDescriptor_t ) -> Result<(), Error>

Destroys a CUDA cuDNN Activation Descriptor.

Should be called when freeing a CUDA::Descriptor to not trash up the CUDA device.

source

pub fn set_activation_descriptor( desc: cudnnActivationDescriptor_t, mode: cudnnActivationMode_t, relu_nan_opt: cudnnNanPropagation_t, relu_ceiling: f64 ) -> Result<(), Error>

Initializes a generic CUDA cuDNN Activation Descriptor with specific properties.

source

pub fn activation_forward( handle: cudnnHandle_t, activation_desc: cudnnActivationDescriptor_t, alpha: *const c_void, x_desc: cudnnTensorDescriptor_t, x: *const c_void, beta: *const c_void, y_desc: cudnnTensorDescriptor_t, y: *mut c_void ) -> Result<(), Error>

Computes an activation forward function.

source

pub fn activation_backward( handle: cudnnHandle_t, activation_desc: cudnnActivationDescriptor_t, alpha: *const c_void, y_desc: cudnnTensorDescriptor_t, y: *const c_void, dy_desc: cudnnTensorDescriptor_t, dy: *const c_void, beta: *const c_void, x_desc: cudnnTensorDescriptor_t, x: *const c_void, dx_desc: cudnnTensorDescriptor_t, dx: *mut c_void ) -> Result<(), Error>

Computes an activation backward function.

source§

impl API

source

pub fn create_filter_descriptor() -> Result<cudnnFilterDescriptor_t, Error>

Creates a generic CUDA cuDNN Filter Descriptor.

source

pub fn destroy_filter_descriptor( desc: cudnnFilterDescriptor_t ) -> Result<(), Error>

Destroys a CUDA cuDNN Filter Descriptor.

Should be called when freeing a CUDA::Descriptor to not trash up the CUDA device.

source

pub fn set_filter_descriptor( desc: cudnnFilterDescriptor_t, data_type: cudnnDataType_t, tensor_format: cudnnTensorFormat_t, nb_dims: c_int, filter_dim_a: *const c_int ) -> Result<(), Error>

Initializes a generic CUDA cuDNN Filter Descriptor with specific properties.

source

pub fn find_convolution_forward_algorithm( handle: cudnnHandle_t, filter_desc: cudnnFilterDescriptor_t, conv_desc: cudnnConvolutionDescriptor_t, src_desc: cudnnTensorDescriptor_t, dest_desc: cudnnTensorDescriptor_t ) -> Result<Vec<cudnnConvolutionFwdAlgoPerf_t>, Error>

cuDNN Convolution Configuration

Returns the most performant convolutional forward algorithm, for the given scenario.

source

pub fn get_convolution_forward_workspace_size( handle: cudnnHandle_t, algo: cudnnConvolutionFwdAlgo_t, filter_desc: cudnnFilterDescriptor_t, conv_desc: cudnnConvolutionDescriptor_t, src_desc: cudnnTensorDescriptor_t, dest_desc: cudnnTensorDescriptor_t ) -> Result<usize, Error>

Returns the workspace size in byte, which are needed for the given convolutional algorithm.

source

pub fn find_convolution_backward_filter_algorithm( handle: cudnnHandle_t, filter_desc: cudnnFilterDescriptor_t, conv_desc: cudnnConvolutionDescriptor_t, src_desc: cudnnTensorDescriptor_t, dest_desc: cudnnTensorDescriptor_t ) -> Result<Vec<cudnnConvolutionBwdFilterAlgoPerf_t>, Error>

Returns the most performant convolutional backward data algorithm, for the given scenario.

source

pub fn get_convolution_backward_filter_workspace_size( handle: cudnnHandle_t, algo: cudnnConvolutionBwdFilterAlgo_t, filter_desc: cudnnFilterDescriptor_t, conv_desc: cudnnConvolutionDescriptor_t, src_desc: cudnnTensorDescriptor_t, dest_desc: cudnnTensorDescriptor_t ) -> Result<usize, Error>

Returns the workspace size in byte, which are needed for the given convolutional algorithm.

source

pub fn find_convolution_backward_data_algorithm( handle: cudnnHandle_t, filter_desc: cudnnFilterDescriptor_t, conv_desc: cudnnConvolutionDescriptor_t, src_desc: cudnnTensorDescriptor_t, dest_desc: cudnnTensorDescriptor_t ) -> Result<Vec<cudnnConvolutionBwdDataAlgoPerf_t>, Error>

Returns the most performant convolutional backward data algorithm, for the given scenario.

source

pub fn get_convolution_backward_data_workspace_size( handle: cudnnHandle_t, algo: cudnnConvolutionBwdDataAlgo_t, filter_desc: cudnnFilterDescriptor_t, conv_desc: cudnnConvolutionDescriptor_t, src_desc: cudnnTensorDescriptor_t, dest_desc: cudnnTensorDescriptor_t ) -> Result<usize, Error>

Returns the workspace size in byte, which are needed for the given convolutional algorithm.

source

pub fn create_convolution_descriptor( ) -> Result<cudnnConvolutionDescriptor_t, Error>

Creates a generic CUDA cuDNN Convolution Descriptor.

source

pub fn destroy_convolution_descriptor( desc: cudnnConvolutionDescriptor_t ) -> Result<(), Error>

Destroys a CUDA cuDNN Convolution Descriptor.

Should be called when freeing a CUDA::Descriptor to not trash up the CUDA device.

source

pub fn set_convolution_descriptor( desc: cudnnConvolutionDescriptor_t, data_type: cudnnDataType_t, mode: cudnnConvolutionMode_t, array_length: c_int, pad_a: *const c_int, filter_stride_a: *const c_int, upscale_a: *const c_int ) -> Result<(), Error>

Initializes a generic CUDA cuDNN Convolution Descriptor with specific properties.

source

pub fn convolution_forward( handle: cudnnHandle_t, algo: cudnnConvolutionFwdAlgo_t, conv_desc: cudnnConvolutionDescriptor_t, work_space: *mut c_void, work_size_in_bytes: size_t, alpha: *const c_void, src_desc: cudnnTensorDescriptor_t, src_data: *const c_void, filter_desc: cudnnFilterDescriptor_t, filter_data: *const c_void, beta: *const c_void, dest_desc: cudnnTensorDescriptor_t, dest_data: *mut c_void ) -> Result<(), Error>

Computes a convolution forward function.

source

pub fn convolution_backward_bias( handle: cudnnHandle_t, alpha: *const c_void, src_desc: cudnnTensorDescriptor_t, src_data: *const c_void, beta: *const c_void, dest_desc: cudnnTensorDescriptor_t, dest_data: *mut c_void ) -> Result<(), Error>

Computes a convolution backward function w.r.t the bias.

source

pub fn convolution_backward_filter( handle: cudnnHandle_t, algo: cudnnConvolutionBwdFilterAlgo_t, conv_desc: cudnnConvolutionDescriptor_t, work_space: *mut c_void, work_size_in_bytes: size_t, alpha: *const c_void, src_desc: cudnnTensorDescriptor_t, src_data: *const c_void, diff_desc: cudnnTensorDescriptor_t, diff_data: *const c_void, beta: *const c_void, grad_desc: cudnnFilterDescriptor_t, grad_data: *mut c_void ) -> Result<(), Error>

Computes a convolution backward function w.r.t filter coefficient.

source

pub fn convolution_backward_data( handle: cudnnHandle_t, algo: cudnnConvolutionBwdDataAlgo_t, conv_desc: cudnnConvolutionDescriptor_t, work_space: *mut c_void, work_size_in_bytes: size_t, alpha: *const c_void, filter_desc: cudnnFilterDescriptor_t, filter_data: *const c_void, diff_desc: cudnnTensorDescriptor_t, diff_data: *const c_void, beta: *const c_void, grad_desc: cudnnTensorDescriptor_t, grad_data: *mut c_void ) -> Result<(), Error>

Computes a convolution backward function w.r.t the output tensor.

source§

impl API

source

pub fn cuda_allocate_device_memory(bytes: usize) -> Result<*mut c_void, Error>

Initialize the CUDA cuDNN API with needed context and resources.

The returned handle must be provided to future CUDA cuDNN API calls. Call this method outside of performance critical routines.

source

pub fn cuda_free_device_memory(ptr: *mut c_void) -> Result<(), Error>

Destroys the CUDA cuDNN context and resources associated with the handle.

Frees up resources and will call cudaDeviceSynchronize internaly. Therefore, use this method outside of performance critical routines.

source§

impl API

source

pub fn create_dropout_descriptor() -> Result<cudnnDropoutDescriptor_t, Error>

Create a generic CUDA cuDNN DropoutDescriptor

source

pub fn destroy_dropout_descriptor( dropout_desc: cudnnDropoutDescriptor_t ) -> Result<(), Error>

Destroys a CUDA cuDNN Dropout Descriptor.

Should be called when freeing a CUDA::Descriptor to not trash up the CUDA device.

source

pub fn dropout_get_states_size(handle: cudnnHandle_t) -> Result<usize, Error>

Get the states size (GPU memory).

source

pub fn dropout_get_reserve_space_size( xdesc: cudnnTensorDescriptor_t ) -> Result<usize, Error>

Get the reserve space size.

source

pub fn set_dropout_descriptor( dropout_desc: cudnnDropoutDescriptor_t, handle: cudnnHandle_t, dropout: f32, states: *mut c_void, state_size_in_bytes: usize, seed: c_ulonglong ) -> Result<(), Error>

Initializes a generic CUDA cuDNN Activation Descriptor with specific properties.

source

pub fn dropout_forward( handle: cudnnHandle_t, dropout_desc: cudnnDropoutDescriptor_t, xdesc: cudnnTensorDescriptor_t, x: *const c_void, ydesc: cudnnTensorDescriptor_t, y: *mut c_void, reserve_space: *mut c_void, reserve_space_size_in_bytes: usize ) -> Result<(), Error>

Computes the dropout forward function.

source

pub fn dropout_backward( handle: cudnnHandle_t, dropout_desc: cudnnDropoutDescriptor_t, dydesc: cudnnTensorDescriptor_t, dy: *const c_void, dxdesc: cudnnTensorDescriptor_t, dx: *mut c_void, reserve_space: *mut c_void, reserve_space_size_in_bytes: usize ) -> Result<(), Error>

Computes the dropout backward function.

source§

impl API

source

pub fn create_lrn_descriptor() -> Result<cudnnLRNDescriptor_t, Error>

Creates a generic CUDA cuDNN LRN Descriptor.

source

pub fn destroy_lrn_descriptor(desc: cudnnLRNDescriptor_t) -> Result<(), Error>

Destroys a CUDA cuDNN LRN Descriptor.

Should be called when freeing a CUDA::Descriptor to not trash up the CUDA device.

source

pub fn set_lrn_descriptor( desc: cudnnLRNDescriptor_t, lrn_n: c_uint, lrn_alpha: c_double, lrn_beta: c_double, lrn_k: c_double ) -> Result<(), Error>

Initializes a generic CUDA cuDNN LRN Descriptor with specific properties.

source

pub fn lrn_cross_channel_forward( handle: cudnnHandle_t, norm_desc: cudnnLRNDescriptor_t, mode: cudnnLRNMode_t, alpha: *const c_void, x_desc: cudnnTensorDescriptor_t, x: *const c_void, beta: *const c_void, y_desc: cudnnTensorDescriptor_t, y: *mut c_void ) -> Result<(), Error>

Computes an LRN cross channel forward function.

source

pub fn lrn_cross_channel_backward( handle: cudnnHandle_t, norm_desc: cudnnLRNDescriptor_t, mode: cudnnLRNMode_t, alpha: *const c_void, x_desc: cudnnTensorDescriptor_t, x: *const c_void, dx_desc: cudnnTensorDescriptor_t, dx: *const c_void, beta: *const c_void, y_desc: cudnnTensorDescriptor_t, y: *const c_void, dy_desc: cudnnTensorDescriptor_t, dy: *mut c_void ) -> Result<(), Error>

Computes an LRN cross channel backward function.

source

pub fn divisive_normalization_forward( handle: cudnnHandle_t, norm_desc: cudnnLRNDescriptor_t, mode: cudnnDivNormMode_t, alpha: *const c_void, x_desc: cudnnTensorDescriptor_t, x: *const c_void, means: *const c_void, temp: *mut c_void, temp2: *mut c_void, beta: *const c_void, y_desc: cudnnTensorDescriptor_t, y: *mut c_void ) -> Result<(), Error>

Computes an devisive normalization forward function.

source

pub fn divisive_normalization_backward( handle: cudnnHandle_t, norm_desc: cudnnLRNDescriptor_t, mode: cudnnDivNormMode_t, alpha: *const c_void, x_desc: cudnnTensorDescriptor_t, x: *const c_void, means: *const c_void, dy: *const c_void, temp: *mut c_void, temp2: *mut c_void, beta: *const c_void, dx_dmeans_desc: cudnnTensorDescriptor_t, dx: *mut c_void, dmeans: *mut c_void ) -> Result<(), Error>

Computes an devisive normalization backward function.

source§

impl API

source

pub fn create_pooling_descriptor() -> Result<cudnnPoolingDescriptor_t, Error>

Creates a generic CUDA cuDNN Pooling Descriptor.

source

pub fn destroy_pooling_descriptor( desc: cudnnPoolingDescriptor_t ) -> Result<(), Error>

Destroys a CUDA cuDNN Pooling Descriptor.

Should be called when freeing a CUDA::Descriptor to not trash up the CUDA device.

source

pub fn set_pooling_descriptor( desc: cudnnPoolingDescriptor_t, mode: cudnnPoolingMode_t, maxpooling_nan_opt: cudnnNanPropagation_t, nb_dims: c_int, window: *const c_int, padding: *const c_int, stride: *const c_int ) -> Result<(), Error>

Initializes a generic CUDA cuDNN Pooling Descriptor with specific properties.

source

pub fn get_pooling_descriptor( desc: cudnnPoolingDescriptor_t, nb_dims_requested: c_int, mode: *mut cudnnPoolingMode_t, maxpooling_nan_opt: *mut cudnnNanPropagation_t, nb_dims: *mut c_int, window: *mut c_int, padding: *mut c_int, stride: *mut c_int ) -> Result<(), Error>

Return information about a generic CUDA cuDNN Pooling Descriptor.

source

pub fn set_pooling_2d_descriptor( desc: cudnnPoolingDescriptor_t, mode: cudnnPoolingMode_t, nan_propagation: cudnnNanPropagation_t, window_height: c_int, window_width: c_int, vertical_padding: c_int, horizontal_padding: c_int, vertical_stride: c_int, horizontal_stride: c_int ) -> Result<(), Error>

Initializes a generic CUDA cuDNN Pooling Descriptor with specific properties.

source

pub fn get_pooling_2d_descriptor( desc: cudnnPoolingDescriptor_t, mode: *mut cudnnPoolingMode_t, nan_propagation: *mut cudnnNanPropagation_t, window_height: *mut c_int, window_width: *mut c_int, vertical_padding: *mut c_int, horizontal_padding: *mut c_int, vertical_stride: *mut c_int, horizontal_stride: *mut c_int ) -> Result<(), Error>

Return information about a generic CUDA cuDNN Pooling Descriptor.

source

pub fn get_pooling_forward_output_dim( pooling_desc: cudnnPoolingDescriptor_t, input_desc: cudnnTensorDescriptor_t, nb_dims: c_int, out_dim_a: *mut c_int ) -> Result<(), Error>

Initializes a generic CUDA cuDNN Pooling Descriptor with specific properties.

source

pub fn pooling_forward( handle: cudnnHandle_t, pooling_desc: cudnnPoolingDescriptor_t, alpha: *const c_void, src_desc: cudnnTensorDescriptor_t, src_data: *const c_void, beta: *const c_void, dest_desc: cudnnTensorDescriptor_t, dest_data: *mut c_void ) -> Result<(), Error>

Computes a pooling forward function.

source

pub fn pooling_backward( handle: cudnnHandle_t, pooling_desc: cudnnPoolingDescriptor_t, alpha: *const c_void, src_desc: cudnnTensorDescriptor_t, src_data: *const c_void, src_diff_desc: cudnnTensorDescriptor_t, src_diff_data: *const c_void, beta: *const c_void, dest_desc: cudnnTensorDescriptor_t, dest_data: *const c_void, dest_diff_desc: cudnnTensorDescriptor_t, dest_diff_data: *mut c_void ) -> Result<(), Error>

Computes a pooling backward function.

source§

impl API

source

pub fn get_rnn_temp_space_sizes( handle: cudnnHandle_t, rnn_desc: cudnnRNNDescriptor_t, mode: cudnnForwardMode_t, x_desc: cudnnRNNDataDescriptor_t ) -> Result<(usize, usize), Error>

This function computes the work and reserve space buffer sizes based on the RNN network geometry stored in rnnDesc, designated usage (inference or training) defined by the fMode argument, and the current RNN data dimensions (maxSeqLength, batchSize) retrieved from xDesc. When RNN data dimensions change, the cudnnGetRNNTempSpaceSizes() must be called again because RNN temporary buffer sizes are not monotonic.

source

pub fn get_rnn_workspace_size( handle: cudnnHandle_t, rnn_desc: cudnnRNNDescriptor_t, unroll_sequence_length: i32, x_desc: Vec<cudnnTensorDescriptor_t> ) -> Result<usize, Error>

Returns the workspace size in byte, which are needed for the given rnnal algorithm.

§Arguments
  • rnn_desc Previously initialised RNN Descriptor
  • unroll_sequence_length Length of iterations
  • x_desc An array of tensor descriptors describing the input to each recurrent iteration (one descriptor per iteration). The first dimension (batch size) of the tensors may decrease from element n to element n+1 but may not increase. For example, if you have multiple time series in a batch, they can be different lengths. This dimension is the batch size for the particular iteration of the sequence, and so it should decrease when a sequence in the batch has been terminated.
source

pub fn get_rnn_training_reserve_size( handle: cudnnHandle_t, rnn_desc: cudnnRNNDescriptor_t, seq_length: c_int, x_desc: Vec<cudnnTensorDescriptor_t> ) -> Result<usize, Error>

Size of Reserve Space for RNN Training [cudnnGetRNNTrainingReserveSize][1]

§Arguments
  • handle Handle to cudNN Library Descriptor
  • rnn_desc Previously initialised RNN Descriptor
  • seq_length Number of iterations to unroll over - must not exceed workspace size seq_len
  • x_desc Array of tensor descriptors describing each recurrent iteration - one per element in the RNN sequence [1]: https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnGetRNNTrainingReserveSize
source

pub fn get_rnn_params_size( handle: cudnnHandle_t, rnn_desc: cudnnRNNDescriptor_t, x_desc: cudnnTensorDescriptor_t, data_type: DataType ) -> Result<usize, Error>

cudnnGetRNNParamsSize[1] Query the amount of parameter space needed to execute the RNN for rnnDesc, given xDesc

§Parameters
  • handle CUDNN Handle
  • rnn_desc Descriptor for the RNN
  • x_desc Input Tensor
  • dataType Data Type for the Input Tensor [1]: https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnGetRNNParamsSize
source§

impl API

source

pub fn create_rnn_descriptor() -> Result<cudnnRNNDescriptor_t, Error>

Creates a generic CUDA cuDNN RNN Descriptor.

source

pub fn create_rnn_data_descriptor() -> Result<cudnnRNNDataDescriptor_t, Error>

cudnnCreateRNNDataDescriptor() https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnCreateRNNDataDescriptor

source

pub fn set_rnn_data_descriptor( rnn_data_descriptor: cudnnRNNDataDescriptor_t, data_type: cudnnDataType_t, layout: cudnnRNNDataLayout_t, max_sequence_length: i32, batch_size: i32, vector_size: i32, sequence_length_array: &[i32], _padding: *mut c_void ) -> Result<cudnnRNNDataDescriptor_t, Error>

source

pub fn destroy_rnn_descriptor(desc: cudnnRNNDescriptor_t) -> Result<(), Error>

Destroys a CUDA cuDNN RNN Descriptor.

Should be called when freeing a CUDA::Descriptor to not trash up the CUDA device.

source

pub fn set_rnn_descriptor( handle: cudnnHandle_t, desc: cudnnRNNDescriptor_t, hidden_size: i32, num_layers: i32, dropout_desc: cudnnDropoutDescriptor_t, input_mode: cudnnRNNInputMode_t, direction: cudnnDirectionMode_t, mode: cudnnRNNMode_t, algorithm: cudnnRNNAlgo_t, data_type: DataType ) -> Result<(), Error>

Initializes a generic CUDA cuDNN RNN Descriptor with specific properties.

source

pub fn set_rnn_matrix_math_type( rnn_desc: cudnnRNNDescriptor_t, math_type: cudnnMathType_t ) -> Result<(), Error>

Set RNN Matrix Math Type cudnnSetRNNMatrixMathType Required for RNN Operations2

source

pub fn set_rnn_padding_mode( rnn_desc: cudnnRNNDescriptor_t, padding_mode: cudnnRNNPaddingMode_t ) -> Result<(), Error>

Set RNN Padding Model cudnnSetRNNPaddingMode This function enables or disables the padded RNN input/output for a previously created and initialized RNN descriptor. This information is required before calling the cudnnGetRNNWorkspaceSize() and cudnnGetRNNTrainingReserveSize() functions, to determine whether additional workspace and training reserve space is needed. By default, the padded RNN input/output is not enabled.

source§

impl API

source

pub fn rnn_forward_training( handle: cudnnHandle_t, rnn_desc: cudnnRNNDescriptor_t, seq_length: c_int, x_desc: Vec<cudnnTensorDescriptor_t>, x: *const c_void, hx_desc: cudnnTensorDescriptor_t, hx: *const c_void, cx_desc: cudnnTensorDescriptor_t, cx: *const c_void, w_desc: cudnnFilterDescriptor_t, w: *const c_void, y_desc: Vec<cudnnTensorDescriptor_t>, y: *mut c_void, hy_desc: cudnnTensorDescriptor_t, hy: *mut c_void, cy_desc: cudnnTensorDescriptor_t, cy: *mut c_void, workspace: *mut c_void, workspace_size_in_bytes: usize, reserve: *mut c_void, reserve_size_in_bytes: usize ) -> Result<(), Error>

Trains a RNN through the Forward Process

§Arguments

handle Handle to a previously created cudNN context [0] rnn_desc A previously initialised RNN descriptor [1] seq_length Number of iterations for the RNN to unroll over. x_desc Array of seqLength packed tensor descriptors [1]. Each descriptor should have 3D that describe the input data format to one recurrent iterator - one descriptor per RNN time-step. [Batch Size, Input Size, 1] Input vectors should be column-major, so should be set strideA[0]=inputSize, strideA[1]=1, strideA[2]=1 x Data Pointer to GPU memory associated with the input. hx_desc Fully packed tensor descriptor for the initial hidden state of the RNN. hx Data pointer for initial hidden state - if null will initialize state to zero. cx_desc Tensor descriptor for the initial cell state for an LSTM network. cx Data pointer for initial cell state - if null will initialize state to zero.A w_desc Handle to descriptors for weights w Data Pointer to weights y_desc Output for each recurrent iteration. Second dimension should match size of the hidden layer. First dimension should match the first dimension of the tensor in input. y Output Memory hy_desc Final hidden state of the RNN hy Memory for final hidden state cy_desc Final cell state for the RNN cy Memory for the final cell state - can be NULL. workspace Data pointer to GPU memory to be used as a workspace for this call workspace_in_bytes Size in bytes of the provided workspace reserve_space Data pointer for GPU memory to be used as a reserve space for this call reserve_space_in_bytes Size in bytes for reserve_space [0] https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnHandle_t [1] https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnRNNDescriptor_t [2] https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnFilterDescriptor_t

source

pub fn rnn_forward_inference( handle: cudnnHandle_t, rnn_desc: cudnnRNNDescriptor_t, seq_length: c_int, x_desc: *const cudnnTensorDescriptor_t, x: *mut c_void, hx_desc: cudnnTensorDescriptor_t, hx: *mut c_void, cx_desc: cudnnTensorDescriptor_t, cx: *mut c_void, w_desc: cudnnFilterDescriptor_t, w: *mut c_void, y_desc: *const cudnnTensorDescriptor_t, y: *mut c_void, hy_desc: cudnnTensorDescriptor_t, hy: *mut c_void, cy_desc: cudnnTensorDescriptor_t, cy: *mut c_void, work_space: *mut c_void, work_size_in_bytes: size_t ) -> Result<(), Error>

Execute a RNN without Training This routine executes the recurrent neural network described by rnnDesc with inputs x, hx, and cx, weights w and outputs y, hy, and cy. workspace is required for intermediate storage. This function does not store intermediate data required for training; cudnnRNNForwardTraining() should be used for that purpose

§Arguments

handle Handle to a previously created cudNN context [0] rnn_desc A previously initialised RNN descriptor [1] seq_length Number of iterations for the RNN to unroll over. x_desc Array of seqLength packed tensor descriptors [1]. Each descriptor should have 3D that describe the input data format to one recurrent iterator - one descriptor per RNN time-step. [Batch Size, Input Size, 1] Input vectors should be column-major, so should be set strideA 0 = inputSize, strideA 1 = 1, strideA 2 =1 x Data Pointer to GPU memory associated with the input. hx_desc Fully packed tensor descriptor for the initial hidden state of the RNN. hx Data pointer for initial hidden state - if null will initialize state to zero. cx_desc Tensor descriptor for the initial cell state for an LSTM network. cx Data pointer for initial cell state - if null will initialize state to zero.A w_desc Handle to descriptors for weights w Data Pointer to weights y_desc Output for each recurrent iteration. Second dimension should match size of the hidden layer. First dimension should match the first dimension of the tensor in input. y Output Memory hy_desc Final hidden state of the RNN hy Memory for final hidden state cy_desc Final cell state for the RNN cy Memory for the final cell state - can be NULL. workspace Data pointer to GPU memory to be used as a workspace for this call workspace_in_bytes Size in bytes of the provided workspace [0] https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnHandle_t [1] https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnRNNDescriptor_t

source§

impl API

source

pub fn rnn_backward_data( handle: cudnnHandle_t, rnn_desc: cudnnRNNDescriptor_t, seq_length: c_int, y_desc: *const cudnnTensorDescriptor_t, y: *const c_void, dy_desc: *const cudnnTensorDescriptor_t, dy: *const c_void, dhy_desc: cudnnTensorDescriptor_t, dhy: *const c_void, dcy_desc: cudnnTensorDescriptor_t, dcy: *const c_void, w_desc: cudnnFilterDescriptor_t, w: *const c_void, hx_desc: cudnnTensorDescriptor_t, hx: *const c_void, cx_desc: cudnnTensorDescriptor_t, cx: *const c_void, dx_desc: *const cudnnTensorDescriptor_t, dx: *mut c_void, dhx_desc: cudnnTensorDescriptor_t, dhx: *mut c_void, dcx_desc: cudnnTensorDescriptor_t, dcx: *mut c_void, workspace: *mut c_void, workspace_size_in_bytes: usize, reserve_space: *mut c_void, reserve_space_size_in_bytes: usize ) -> Result<(), Error>

CUDNN Rnn Backward Data This routine executes the recurrent neural network described by rnnDesc with output gradients dy, dhy, and dhc, weights w and input gradients dx, dhx, and dcx. Workspace is required for intermediate storage. The data in reserveSpace must have previously been generated by cudnnRNNForwardTraining(). The same reserveSpace data must be used for future calls to cudnnRNNBackwardWeights() if they execute on the same input data.

§Arguments

handle Handle to a previously created [cudNN context][0] rnn_desc A previously initialised [RNN descriptor][1] seq_length Number of iterations for the RNN to unroll over. y_desc Array of packed [tensor descriptors][1] describing the output from each recurrent iteration. y Data pointer to GPU memory for output at each iteration dy_desc Array of packed [tensor descriptors][1] describing the gradient at the output from each recurrent iteration. dy Data pointer to GPU memory for gradient at output iterations dhy_desc Array of packed [tensor descriptors][1] describing the gradients at the final hidden state of the RNN. dhy Data pointer to GPU memory for gradient at the final hidden state of the network. If this is a NULL pointer, the gradients at the final hidden state of the network will be initialised to zero. dcy_desc Array of packed [tensor descriptors][1] describing the gradients at the final cell state of the RNN. dcy Data pointer to GPU memory for gradients at the final cell state of the RNN. w_desc Handle to a previously initialized filter descriptor for the weights in the RNN w Data pointer to GPU memory for the filter descriptor for the weights. hx_desc Fully packed tensor descriptor for the initial hidden state of the RNN. hx Data pointer for initial hidden state - if null will initialize state to zero. cx_desc Tensor descriptor for the initial cell state for an LSTM network. cx Data pointer for initial cell state - if null will initialize state to zero. dx_desc Array of fully packed tensor descriptors for the gradient at the input of each iteration. dx Data pointer for the gradient of the input of each recurrent iteration. dhx_desc Fully packed tensor for the gradient of the initial hidden state of the RNN. dhx Data pointer for gradient of the initial hidden state of the RNN. workspace Data pointer to GPU memory to be used as a workspace for this call workspace_in_bytes Size in bytes of the provided workspace reserve_space Data pointer for GPU memory to be used as a reserve space for this call reserve_space_in_bytes Size in bytes for reserve_space [0]:https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnHandle_t [1]:https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnRNNDescriptor_t

source

pub fn rnn_backward_weights( handle: cudnnHandle_t, rnn_desc: cudnnRNNDescriptor_t, seq_length: c_int, x_desc: *const cudnnTensorDescriptor_t, x: *const c_void, hx_desc: cudnnTensorDescriptor_t, hx: *const c_void, y_desc: *const cudnnTensorDescriptor_t, y: *const c_void, workspace: *const c_void, work_space_size_in_bytes: usize, dw_desc: cudnnFilterDescriptor_t, dw: *mut c_void, reserve_space: *const c_void, reserve_space_size_in_bytes: usize ) -> Result<(), Error>

CUDNN Rnn Backward Weights This routine accumulates weight gradients dw from the recurrent neural network described by rnnDesc with inputs x, hx and outputs y. The mode of operation in this case is additive, the weight gradients calculated will be added to those already existing in dw. Workspace is required for intermediate storage. The data in reserveSpace must have previously been generated by cudnnRNNBackwardData().

§Arguments

handle Handle to a previously created [cudNN context][0] rnn_desc A previously initialised [RNN descriptor][1] seq_length Number of iterations for the RNN to unroll over. x_desc Array of packed tensor descriptors. x Data pointer for Input hx_desc Fully packed tensor descriptor for the initial hidden state of the RNN. hx Data pointer for initial hidden state - if null will initialize state to zero. y_desc Array of packed [tensor descriptors][1] describing the output from each recurrent iteration. y Data pointer to GPU memory for output at each iteration dw_desc Handle to previously initialized filter descriptor for the gradient of the weights. dw Data pointer to GPU memory for the descriptor of the gradient of the weights. workspace Data pointer to GPU memory to be used as a workspace for this call workspace_in_bytes Size in bytes of the provided workspace reserve_space Data pointer for GPU memory to be used as a reserve space for this call reserve_space_in_bytes Size in bytes for reserve_space [0]:https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnHandle_t [1]:https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnRNNDescriptor_t

source§

impl API

source

pub fn softmax_forward( handle: cudnnHandle_t, algorithm: cudnnSoftmaxAlgorithm_t, mode: cudnnSoftmaxMode_t, alpha: *const c_void, src_desc: cudnnTensorDescriptor_t, src_data: *const c_void, beta: *const c_void, dest_desc: cudnnTensorDescriptor_t, dest_data: *mut c_void ) -> Result<(), Error>

Computes an softmax forward function.

source

pub fn softmax_backward( handle: cudnnHandle_t, algorithm: cudnnSoftmaxAlgorithm_t, mode: cudnnSoftmaxMode_t, alpha: *const c_void, src_desc: cudnnTensorDescriptor_t, src_data: *const c_void, src_diff_desc: cudnnTensorDescriptor_t, src_diff_data: *const c_void, beta: *const c_void, dest_diff_desc: cudnnTensorDescriptor_t, dest_diff_data: *mut c_void ) -> Result<(), Error>

Computes an softmax backward function.

source§

impl API

source

pub fn create_tensor_descriptor() -> Result<cudnnTensorDescriptor_t, Error>

Creates a generic CUDA cuDNN Tensor Descriptor.

source

pub fn destroy_tensor_descriptor( tensor_desc: cudnnTensorDescriptor_t ) -> Result<(), Error>

Destroys a CUDA cuDNN Tensor Descriptor.

Should be called when freeing a CUDA::Descriptor to not trash up the CUDA device.

source

pub fn set_tensor_descriptor( tensor_desc: cudnnTensorDescriptor_t, data_type: cudnnDataType_t, nb_dims: c_int, dim_a: *const c_int, stride_a: *const c_int ) -> Result<(), Error>

Initializes a generic CUDA cuDNN Tensor Descriptor with specific properties.

source

pub fn get_tensor_descriptor( tensor_desc: cudnnTensorDescriptor_t, nb_dims_requested: c_int, data_type: *mut cudnnDataType_t, nb_dims: *mut c_int, dim_a: *mut c_int, stride_a: *mut c_int ) -> Result<(), Error>

Returns informations about a generic CUDA cuDNN Tensor Descriptor.

source

pub fn transform_tensor( handle: cudnnHandle_t, alpha: *const c_void, src_desc: cudnnTensorDescriptor_t, src_data: *const c_void, beta: *const c_void, dest_desc: cudnnTensorDescriptor_t, dest_data: *mut c_void ) -> Result<(), Error>

Transforms a CUDA cuDNN Tensor from to another Tensor with a different layout.

This function copies the scaled data from one tensor to another tensor with a different layout. Those descriptors need to have the same dimensions but not necessarily the same strides. The input and output tensors must not overlap in any way (i.e., tensors cannot be transformed in place). This function can be used to convert a tensor with an unsupported format to a supported one.

source

pub fn add_tensor( handle: cudnnHandle_t, alpha: *const c_void, bias_desc: cudnnTensorDescriptor_t, bias_data: *const c_void, beta: *const c_void, src_dest_desc: cudnnTensorDescriptor_t, src_dest_data: *mut c_void ) -> Result<(), Error>

Adds the scaled values from one a CUDA cuDNN Tensor to another.

Up to dimension 5, all tensor formats are supported. Beyond those dimensions, this routine is not supported.

This function adds the scaled values of one bias tensor to another tensor. Each dimension of the bias tensor must match the coresponding dimension of the src_dest tensor or must be equal to 1. In the latter case, the same value from the bias tensor for thoses dimensions will be used to blend into the src_dest tensor.

source

pub fn set_tensor( handle: cudnnHandle_t, src_dest_desc: cudnnTensorDescriptor_t, src_dest_data: *mut c_void, value: *const c_void ) -> Result<(), Error>

Sets all elements of a tensor to a given value.

source

pub fn scale_tensor( handle: cudnnHandle_t, src_dest_desc: cudnnTensorDescriptor_t, src_dest_data: *mut c_void, alpha: *const c_void ) -> Result<(), Error>

Scales all elements of a tensor by a given factor.

source§

impl API

source

pub fn init() -> Result<cudnnHandle_t, Error>

Initialize the CUDA cuDNN API with needed context and resources.

The returned handle must be provided to future CUDA cuDNN API calls. Call this method outside of performance critical routines.

source

pub fn destroy(handle: cudnnHandle_t) -> Result<(), Error>

Destroys the CUDA cuDNN context and resources associated with the handle.

Frees up resources and will call cudaDeviceSynchronize internaly. Therefore, use this method outside of performance critical routines.

source

pub fn get_version() -> usize

Returns the version of the CUDA cuDNN API.

source§

impl API

source

pub const fn cudnn_data_type(data_type: DataType) -> cudnnDataType_t

Convert to CUDNN Data Type

Trait Implementations§

source§

impl Clone for API

source§

fn clone(&self) -> API

Returns a copy of the value. Read more
1.0.0 · source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
source§

impl Debug for API

source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
source§

impl Copy for API

Auto Trait Implementations§

§

impl RefUnwindSafe for API

§

impl Send for API

§

impl Sync for API

§

impl Unpin for API

§

impl UnwindSafe for API

Blanket Implementations§

source§

impl<T> Any for T
where T: 'static + ?Sized,

source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
source§

impl<T> Borrow<T> for T
where T: ?Sized,

source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
source§

impl<T> From<T> for T

source§

fn from(t: T) -> T

Returns the argument unchanged.

source§

impl<T, U> Into<U> for T
where U: From<T>,

source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

source§

impl<T> ToOwned for T
where T: Clone,

§

type Owned = T

The resulting type after obtaining ownership.
source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

§

type Error = Infallible

The type returned in the event of a conversion error.
source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.