Struct rcudnn::API

impl API

pub fn create_activation_descriptor( ) -> Result<cudnnActivationDescriptor_t, Error>

Create a generic CUDA cuDNN ActivationDescriptor.

pub fn destroy_activation_descriptor( desc: cudnnActivationDescriptor_t ) -> Result<(), Error>

Destroys a CUDA cuDNN Activation Descriptor.

Should be called when freeing a CUDA::Descriptor to not trash up the CUDA device.

pub fn set_activation_descriptor( desc: cudnnActivationDescriptor_t, mode: cudnnActivationMode_t, relu_nan_opt: cudnnNanPropagation_t, relu_ceiling: f64 ) -> Result<(), Error>

Initializes a generic CUDA cuDNN Activation Descriptor with specific properties.

pub fn activation_forward( handle: cudnnHandle_t, activation_desc: cudnnActivationDescriptor_t, alpha: const c_void, x_desc: cudnnTensorDescriptor_t, x: const c_void, beta: const c_void, y_desc: cudnnTensorDescriptor_t, y: mut c_void ) -> Result<(), Error>

Computes an activation forward function.

pub fn activation_backward( handle: cudnnHandle_t, activation_desc: cudnnActivationDescriptor_t, alpha: const c_void, y_desc: cudnnTensorDescriptor_t, y: const c_void, dy_desc: cudnnTensorDescriptor_t, dy: const c_void, beta: const c_void, x_desc: cudnnTensorDescriptor_t, x: const c_void, dx_desc: cudnnTensorDescriptor_t, dx: mut c_void ) -> Result<(), Error>

Computes an activation backward function.

impl API

pub fn create_filter_descriptor() -> Result<cudnnFilterDescriptor_t, Error>

Creates a generic CUDA cuDNN Filter Descriptor.

pub fn destroy_filter_descriptor( desc: cudnnFilterDescriptor_t ) -> Result<(), Error>

Destroys a CUDA cuDNN Filter Descriptor.

Should be called when freeing a CUDA::Descriptor to not trash up the CUDA device.

pub fn set_filter_descriptor( desc: cudnnFilterDescriptor_t, data_type: cudnnDataType_t, tensor_format: cudnnTensorFormat_t, nb_dims: c_int, filter_dim_a: *const c_int ) -> Result<(), Error>

Initializes a generic CUDA cuDNN Filter Descriptor with specific properties.

pub fn find_convolution_forward_algorithm( handle: cudnnHandle_t, filter_desc: cudnnFilterDescriptor_t, conv_desc: cudnnConvolutionDescriptor_t, src_desc: cudnnTensorDescriptor_t, dest_desc: cudnnTensorDescriptor_t ) -> Result<Vec<cudnnConvolutionFwdAlgoPerf_t>, Error>

cuDNN Convolution Configuration

Returns the most performant convolutional forward algorithm, for the given scenario.

pub fn get_convolution_forward_workspace_size( handle: cudnnHandle_t, algo: cudnnConvolutionFwdAlgo_t, filter_desc: cudnnFilterDescriptor_t, conv_desc: cudnnConvolutionDescriptor_t, src_desc: cudnnTensorDescriptor_t, dest_desc: cudnnTensorDescriptor_t ) -> Result<usize, Error>

Returns the workspace size in byte, which are needed for the given convolutional algorithm.

pub fn find_convolution_backward_filter_algorithm( handle: cudnnHandle_t, filter_desc: cudnnFilterDescriptor_t, conv_desc: cudnnConvolutionDescriptor_t, src_desc: cudnnTensorDescriptor_t, dest_desc: cudnnTensorDescriptor_t ) -> Result<Vec<cudnnConvolutionBwdFilterAlgoPerf_t>, Error>

Returns the most performant convolutional backward data algorithm, for the given scenario.

pub fn get_convolution_backward_filter_workspace_size( handle: cudnnHandle_t, algo: cudnnConvolutionBwdFilterAlgo_t, filter_desc: cudnnFilterDescriptor_t, conv_desc: cudnnConvolutionDescriptor_t, src_desc: cudnnTensorDescriptor_t, dest_desc: cudnnTensorDescriptor_t ) -> Result<usize, Error>

Returns the workspace size in byte, which are needed for the given convolutional algorithm.

pub fn find_convolution_backward_data_algorithm( handle: cudnnHandle_t, filter_desc: cudnnFilterDescriptor_t, conv_desc: cudnnConvolutionDescriptor_t, src_desc: cudnnTensorDescriptor_t, dest_desc: cudnnTensorDescriptor_t ) -> Result<Vec<cudnnConvolutionBwdDataAlgoPerf_t>, Error>

Returns the most performant convolutional backward data algorithm, for the given scenario.

pub fn get_convolution_backward_data_workspace_size( handle: cudnnHandle_t, algo: cudnnConvolutionBwdDataAlgo_t, filter_desc: cudnnFilterDescriptor_t, conv_desc: cudnnConvolutionDescriptor_t, src_desc: cudnnTensorDescriptor_t, dest_desc: cudnnTensorDescriptor_t ) -> Result<usize, Error>

Returns the workspace size in byte, which are needed for the given convolutional algorithm.

pub fn create_convolution_descriptor( ) -> Result<cudnnConvolutionDescriptor_t, Error>

Creates a generic CUDA cuDNN Convolution Descriptor.

pub fn destroy_convolution_descriptor( desc: cudnnConvolutionDescriptor_t ) -> Result<(), Error>

Destroys a CUDA cuDNN Convolution Descriptor.

Should be called when freeing a CUDA::Descriptor to not trash up the CUDA device.

pub fn set_convolution_descriptor( desc: cudnnConvolutionDescriptor_t, data_type: cudnnDataType_t, mode: cudnnConvolutionMode_t, array_length: c_int, pad_a: const c_int, filter_stride_a: const c_int, upscale_a: *const c_int ) -> Result<(), Error>

Initializes a generic CUDA cuDNN Convolution Descriptor with specific properties.

pub fn convolution_forward( handle: cudnnHandle_t, algo: cudnnConvolutionFwdAlgo_t, conv_desc: cudnnConvolutionDescriptor_t, work_space: mut c_void, work_size_in_bytes: size_t, alpha: const c_void, src_desc: cudnnTensorDescriptor_t, src_data: const c_void, filter_desc: cudnnFilterDescriptor_t, filter_data: const c_void, beta: const c_void, dest_desc: cudnnTensorDescriptor_t, dest_data: mut c_void ) -> Result<(), Error>

Computes a convolution forward function.

pub fn convolution_backward_bias( handle: cudnnHandle_t, alpha: const c_void, src_desc: cudnnTensorDescriptor_t, src_data: const c_void, beta: const c_void, dest_desc: cudnnTensorDescriptor_t, dest_data: mut c_void ) -> Result<(), Error>

Computes a convolution backward function w.r.t the bias.

pub fn convolution_backward_filter( handle: cudnnHandle_t, algo: cudnnConvolutionBwdFilterAlgo_t, conv_desc: cudnnConvolutionDescriptor_t, work_space: mut c_void, work_size_in_bytes: size_t, alpha: const c_void, src_desc: cudnnTensorDescriptor_t, src_data: const c_void, diff_desc: cudnnTensorDescriptor_t, diff_data: const c_void, beta: const c_void, grad_desc: cudnnFilterDescriptor_t, grad_data: mut c_void ) -> Result<(), Error>

Computes a convolution backward function w.r.t filter coefficient.

pub fn convolution_backward_data( handle: cudnnHandle_t, algo: cudnnConvolutionBwdDataAlgo_t, conv_desc: cudnnConvolutionDescriptor_t, work_space: mut c_void, work_size_in_bytes: size_t, alpha: const c_void, filter_desc: cudnnFilterDescriptor_t, filter_data: const c_void, diff_desc: cudnnTensorDescriptor_t, diff_data: const c_void, beta: const c_void, grad_desc: cudnnTensorDescriptor_t, grad_data: mut c_void ) -> Result<(), Error>

Computes a convolution backward function w.r.t the output tensor.

impl API

pub fn cuda_allocate_device_memory(bytes: usize) -> Result<*mut c_void, Error>

Initialize the CUDA cuDNN API with needed context and resources.

The returned handle must be provided to future CUDA cuDNN API calls. Call this method outside of performance critical routines.

pub fn cuda_free_device_memory(ptr: *mut c_void) -> Result<(), Error>

Destroys the CUDA cuDNN context and resources associated with the handle.

Frees up resources and will call cudaDeviceSynchronize internaly. Therefore, use this method outside of performance critical routines.

impl API

pub fn create_dropout_descriptor() -> Result<cudnnDropoutDescriptor_t, Error>

Create a generic CUDA cuDNN DropoutDescriptor

pub fn destroy_dropout_descriptor( dropout_desc: cudnnDropoutDescriptor_t ) -> Result<(), Error>

Destroys a CUDA cuDNN Dropout Descriptor.

Should be called when freeing a CUDA::Descriptor to not trash up the CUDA device.

pub fn dropout_get_states_size(handle: cudnnHandle_t) -> Result<usize, Error>

Get the states size (GPU memory).

pub fn dropout_get_reserve_space_size( xdesc: cudnnTensorDescriptor_t ) -> Result<usize, Error>

Get the reserve space size.

pub fn set_dropout_descriptor( dropout_desc: cudnnDropoutDescriptor_t, handle: cudnnHandle_t, dropout: f32, states: *mut c_void, state_size_in_bytes: usize, seed: c_ulonglong ) -> Result<(), Error>

Initializes a generic CUDA cuDNN Activation Descriptor with specific properties.

pub fn dropout_forward( handle: cudnnHandle_t, dropout_desc: cudnnDropoutDescriptor_t, xdesc: cudnnTensorDescriptor_t, x: const c_void, ydesc: cudnnTensorDescriptor_t, y: mut c_void, reserve_space: *mut c_void, reserve_space_size_in_bytes: usize ) -> Result<(), Error>

Computes the dropout forward function.

pub fn dropout_backward( handle: cudnnHandle_t, dropout_desc: cudnnDropoutDescriptor_t, dydesc: cudnnTensorDescriptor_t, dy: const c_void, dxdesc: cudnnTensorDescriptor_t, dx: mut c_void, reserve_space: *mut c_void, reserve_space_size_in_bytes: usize ) -> Result<(), Error>

Computes the dropout backward function.

impl API

pub fn create_lrn_descriptor() -> Result<cudnnLRNDescriptor_t, Error>

Creates a generic CUDA cuDNN LRN Descriptor.

pub fn destroy_lrn_descriptor(desc: cudnnLRNDescriptor_t) -> Result<(), Error>

Destroys a CUDA cuDNN LRN Descriptor.

Should be called when freeing a CUDA::Descriptor to not trash up the CUDA device.

pub fn set_lrn_descriptor( desc: cudnnLRNDescriptor_t, lrn_n: c_uint, lrn_alpha: c_double, lrn_beta: c_double, lrn_k: c_double ) -> Result<(), Error>

Initializes a generic CUDA cuDNN LRN Descriptor with specific properties.

pub fn lrn_cross_channel_forward( handle: cudnnHandle_t, norm_desc: cudnnLRNDescriptor_t, mode: cudnnLRNMode_t, alpha: const c_void, x_desc: cudnnTensorDescriptor_t, x: const c_void, beta: const c_void, y_desc: cudnnTensorDescriptor_t, y: mut c_void ) -> Result<(), Error>

Computes an LRN cross channel forward function.

pub fn lrn_cross_channel_backward( handle: cudnnHandle_t, norm_desc: cudnnLRNDescriptor_t, mode: cudnnLRNMode_t, alpha: const c_void, x_desc: cudnnTensorDescriptor_t, x: const c_void, dx_desc: cudnnTensorDescriptor_t, dx: const c_void, beta: const c_void, y_desc: cudnnTensorDescriptor_t, y: const c_void, dy_desc: cudnnTensorDescriptor_t, dy: mut c_void ) -> Result<(), Error>

Computes an LRN cross channel backward function.

pub fn divisive_normalization_forward( handle: cudnnHandle_t, norm_desc: cudnnLRNDescriptor_t, mode: cudnnDivNormMode_t, alpha: const c_void, x_desc: cudnnTensorDescriptor_t, x: const c_void, means: const c_void, temp: mut c_void, temp2: mut c_void, beta: const c_void, y_desc: cudnnTensorDescriptor_t, y: *mut c_void ) -> Result<(), Error>

Computes an devisive normalization forward function.

pub fn divisive_normalization_backward( handle: cudnnHandle_t, norm_desc: cudnnLRNDescriptor_t, mode: cudnnDivNormMode_t, alpha: const c_void, x_desc: cudnnTensorDescriptor_t, x: const c_void, means: const c_void, dy: const c_void, temp: mut c_void, temp2: mut c_void, beta: const c_void, dx_dmeans_desc: cudnnTensorDescriptor_t, dx: mut c_void, dmeans: *mut c_void ) -> Result<(), Error>

Computes an devisive normalization backward function.

impl API

pub fn create_pooling_descriptor() -> Result<cudnnPoolingDescriptor_t, Error>

Creates a generic CUDA cuDNN Pooling Descriptor.

pub fn destroy_pooling_descriptor( desc: cudnnPoolingDescriptor_t ) -> Result<(), Error>

Destroys a CUDA cuDNN Pooling Descriptor.

Should be called when freeing a CUDA::Descriptor to not trash up the CUDA device.

pub fn set_pooling_descriptor( desc: cudnnPoolingDescriptor_t, mode: cudnnPoolingMode_t, maxpooling_nan_opt: cudnnNanPropagation_t, nb_dims: c_int, window: const c_int, padding: const c_int, stride: *const c_int ) -> Result<(), Error>

Initializes a generic CUDA cuDNN Pooling Descriptor with specific properties.

pub fn get_pooling_descriptor( desc: cudnnPoolingDescriptor_t, nb_dims_requested: c_int, mode: mut cudnnPoolingMode_t, maxpooling_nan_opt: mut cudnnNanPropagation_t, nb_dims: mut c_int, window: mut c_int, padding: mut c_int, stride: mut c_int ) -> Result<(), Error>

Return information about a generic CUDA cuDNN Pooling Descriptor.

pub fn set_pooling_2d_descriptor( desc: cudnnPoolingDescriptor_t, mode: cudnnPoolingMode_t, nan_propagation: cudnnNanPropagation_t, window_height: c_int, window_width: c_int, vertical_padding: c_int, horizontal_padding: c_int, vertical_stride: c_int, horizontal_stride: c_int ) -> Result<(), Error>

Initializes a generic CUDA cuDNN Pooling Descriptor with specific properties.

pub fn get_pooling_2d_descriptor( desc: cudnnPoolingDescriptor_t, mode: mut cudnnPoolingMode_t, nan_propagation: mut cudnnNanPropagation_t, window_height: mut c_int, window_width: mut c_int, vertical_padding: mut c_int, horizontal_padding: mut c_int, vertical_stride: mut c_int, horizontal_stride: mut c_int ) -> Result<(), Error>

Return information about a generic CUDA cuDNN Pooling Descriptor.

pub fn get_pooling_forward_output_dim( pooling_desc: cudnnPoolingDescriptor_t, input_desc: cudnnTensorDescriptor_t, nb_dims: c_int, out_dim_a: *mut c_int ) -> Result<(), Error>

Initializes a generic CUDA cuDNN Pooling Descriptor with specific properties.

pub fn pooling_forward( handle: cudnnHandle_t, pooling_desc: cudnnPoolingDescriptor_t, alpha: const c_void, src_desc: cudnnTensorDescriptor_t, src_data: const c_void, beta: const c_void, dest_desc: cudnnTensorDescriptor_t, dest_data: mut c_void ) -> Result<(), Error>

Computes a pooling forward function.

pub fn pooling_backward( handle: cudnnHandle_t, pooling_desc: cudnnPoolingDescriptor_t, alpha: const c_void, src_desc: cudnnTensorDescriptor_t, src_data: const c_void, src_diff_desc: cudnnTensorDescriptor_t, src_diff_data: const c_void, beta: const c_void, dest_desc: cudnnTensorDescriptor_t, dest_data: const c_void, dest_diff_desc: cudnnTensorDescriptor_t, dest_diff_data: mut c_void ) -> Result<(), Error>

Computes a pooling backward function.

impl API

pub fn get_rnn_temp_space_sizes( handle: cudnnHandle_t, rnn_desc: cudnnRNNDescriptor_t, mode: cudnnForwardMode_t, x_desc: cudnnRNNDataDescriptor_t ) -> Result<(usize, usize), Error>

This function computes the work and reserve space buffer sizes based on the RNN network geometry stored in rnnDesc, designated usage (inference or training) defined by the fMode argument, and the current RNN data dimensions (maxSeqLength, batchSize) retrieved from xDesc. When RNN data dimensions change, the cudnnGetRNNTempSpaceSizes() must be called again because RNN temporary buffer sizes are not monotonic.

pub fn get_rnn_workspace_size( handle: cudnnHandle_t, rnn_desc: cudnnRNNDescriptor_t, unroll_sequence_length: i32, x_desc: Vec<cudnnTensorDescriptor_t> ) -> Result<usize, Error>

Returns the workspace size in byte, which are needed for the given rnnal algorithm.

§Arguments

rnn_desc Previously initialised RNN Descriptor
unroll_sequence_length Length of iterations
x_desc An array of tensor descriptors describing the input to each recurrent iteration (one descriptor per iteration). The first dimension (batch size) of the tensors may decrease from element n to element n+1 but may not increase. For example, if you have multiple time series in a batch, they can be different lengths. This dimension is the batch size for the particular iteration of the sequence, and so it should decrease when a sequence in the batch has been terminated.

pub fn get_rnn_training_reserve_size( handle: cudnnHandle_t, rnn_desc: cudnnRNNDescriptor_t, seq_length: c_int, x_desc: Vec<cudnnTensorDescriptor_t> ) -> Result<usize, Error>

Size of Reserve Space for RNN Training [cudnnGetRNNTrainingReserveSize][1]

§Arguments

handle Handle to cudNN Library Descriptor
rnn_desc Previously initialised RNN Descriptor
seq_length Number of iterations to unroll over - must not exceed workspace size seq_len
x_desc Array of tensor descriptors describing each recurrent iteration - one per element in the RNN sequence [1]: https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnGetRNNTrainingReserveSize

pub fn get_rnn_params_size( handle: cudnnHandle_t, rnn_desc: cudnnRNNDescriptor_t, x_desc: cudnnTensorDescriptor_t, data_type: DataType ) -> Result<usize, Error>

cudnnGetRNNParamsSize[1] Query the amount of parameter space needed to execute the RNN for rnnDesc, given xDesc

§Parameters

handle CUDNN Handle
rnn_desc Descriptor for the RNN
x_desc Input Tensor
dataType Data Type for the Input Tensor [1]: https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnGetRNNParamsSize

impl API

pub fn create_rnn_descriptor() -> Result<cudnnRNNDescriptor_t, Error>

Creates a generic CUDA cuDNN RNN Descriptor.

pub fn create_rnn_data_descriptor() -> Result<cudnnRNNDataDescriptor_t, Error>

cudnnCreateRNNDataDescriptor() https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnCreateRNNDataDescriptor

pub fn set_rnn_data_descriptor( rnn_data_descriptor: cudnnRNNDataDescriptor_t, data_type: cudnnDataType_t, layout: cudnnRNNDataLayout_t, max_sequence_length: i32, batch_size: i32, vector_size: i32, sequence_length_array: &[i32], _padding: *mut c_void ) -> Result<cudnnRNNDataDescriptor_t, Error>

pub fn destroy_rnn_descriptor(desc: cudnnRNNDescriptor_t) -> Result<(), Error>

Destroys a CUDA cuDNN RNN Descriptor.

Should be called when freeing a CUDA::Descriptor to not trash up the CUDA device.

pub fn set_rnn_descriptor( handle: cudnnHandle_t, desc: cudnnRNNDescriptor_t, hidden_size: i32, num_layers: i32, dropout_desc: cudnnDropoutDescriptor_t, input_mode: cudnnRNNInputMode_t, direction: cudnnDirectionMode_t, mode: cudnnRNNMode_t, algorithm: cudnnRNNAlgo_t, data_type: DataType ) -> Result<(), Error>

Initializes a generic CUDA cuDNN RNN Descriptor with specific properties.

pub fn set_rnn_matrix_math_type( rnn_desc: cudnnRNNDescriptor_t, math_type: cudnnMathType_t ) -> Result<(), Error>

Set RNN Matrix Math Type cudnnSetRNNMatrixMathType Required for RNN Operations2

pub fn set_rnn_padding_mode( rnn_desc: cudnnRNNDescriptor_t, padding_mode: cudnnRNNPaddingMode_t ) -> Result<(), Error>

Set RNN Padding Model cudnnSetRNNPaddingMode This function enables or disables the padded RNN input/output for a previously created and initialized RNN descriptor. This information is required before calling the cudnnGetRNNWorkspaceSize() and cudnnGetRNNTrainingReserveSize() functions, to determine whether additional workspace and training reserve space is needed. By default, the padded RNN input/output is not enabled.

impl API

pub fn rnn_forward_training( handle: cudnnHandle_t, rnn_desc: cudnnRNNDescriptor_t, seq_length: c_int, x_desc: Vec<cudnnTensorDescriptor_t>, x: const c_void, hx_desc: cudnnTensorDescriptor_t, hx: const c_void, cx_desc: cudnnTensorDescriptor_t, cx: const c_void, w_desc: cudnnFilterDescriptor_t, w: const c_void, y_desc: Vec<cudnnTensorDescriptor_t>, y: mut c_void, hy_desc: cudnnTensorDescriptor_t, hy: mut c_void, cy_desc: cudnnTensorDescriptor_t, cy: mut c_void, workspace: mut c_void, workspace_size_in_bytes: usize, reserve: *mut c_void, reserve_size_in_bytes: usize ) -> Result<(), Error>

Trains a RNN through the Forward Process

§Arguments

handle Handle to a previously created cudNN context [0] rnn_desc A previously initialised RNN descriptor [1] seq_length Number of iterations for the RNN to unroll over. x_desc Array of seqLength packed tensor descriptors [1]. Each descriptor should have 3D that describe the input data format to one recurrent iterator - one descriptor per RNN time-step. [Batch Size, Input Size, 1] Input vectors should be column-major, so should be set strideA[0]=inputSize, strideA[1]=1, strideA[2]=1 x Data Pointer to GPU memory associated with the input. hx_desc Fully packed tensor descriptor for the initial hidden state of the RNN. hx Data pointer for initial hidden state - if null will initialize state to zero. cx_desc Tensor descriptor for the initial cell state for an LSTM network. cx Data pointer for initial cell state - if null will initialize state to zero.A w_desc Handle to descriptors for weights w Data Pointer to weights y_desc Output for each recurrent iteration. Second dimension should match size of the hidden layer. First dimension should match the first dimension of the tensor in input. y Output Memory hy_desc Final hidden state of the RNN hy Memory for final hidden state cy_desc Final cell state for the RNN cy Memory for the final cell state - can be NULL. workspace Data pointer to GPU memory to be used as a workspace for this call workspace_in_bytes Size in bytes of the provided workspace reserve_space Data pointer for GPU memory to be used as a reserve space for this call reserve_space_in_bytes Size in bytes for reserve_space [0] https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnHandle_t [1] https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnRNNDescriptor_t [2] https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnFilterDescriptor_t

pub fn rnn_forward_inference( handle: cudnnHandle_t, rnn_desc: cudnnRNNDescriptor_t, seq_length: c_int, x_desc: const cudnnTensorDescriptor_t, x: mut c_void, hx_desc: cudnnTensorDescriptor_t, hx: mut c_void, cx_desc: cudnnTensorDescriptor_t, cx: mut c_void, w_desc: cudnnFilterDescriptor_t, w: mut c_void, y_desc: const cudnnTensorDescriptor_t, y: mut c_void, hy_desc: cudnnTensorDescriptor_t, hy: mut c_void, cy_desc: cudnnTensorDescriptor_t, cy: mut c_void, work_space: mut c_void, work_size_in_bytes: size_t ) -> Result<(), Error>

Execute a RNN without Training This routine executes the recurrent neural network described by rnnDesc with inputs x, hx, and cx, weights w and outputs y, hy, and cy. workspace is required for intermediate storage. This function does not store intermediate data required for training; cudnnRNNForwardTraining() should be used for that purpose

§Arguments

handle Handle to a previously created cudNN context [0] rnn_desc A previously initialised RNN descriptor [1] seq_length Number of iterations for the RNN to unroll over. x_desc Array of seqLength packed tensor descriptors [1]. Each descriptor should have 3D that describe the input data format to one recurrent iterator - one descriptor per RNN time-step. [Batch Size, Input Size, 1] Input vectors should be column-major, so should be set strideA 0 = inputSize, strideA 1 = 1, strideA 2 =1 x Data Pointer to GPU memory associated with the input. hx_desc Fully packed tensor descriptor for the initial hidden state of the RNN. hx Data pointer for initial hidden state - if null will initialize state to zero. cx_desc Tensor descriptor for the initial cell state for an LSTM network. cx Data pointer for initial cell state - if null will initialize state to zero.A w_desc Handle to descriptors for weights w Data Pointer to weights y_desc Output for each recurrent iteration. Second dimension should match size of the hidden layer. First dimension should match the first dimension of the tensor in input. y Output Memory hy_desc Final hidden state of the RNN hy Memory for final hidden state cy_desc Final cell state for the RNN cy Memory for the final cell state - can be NULL. workspace Data pointer to GPU memory to be used as a workspace for this call workspace_in_bytes Size in bytes of the provided workspace [0] https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnHandle_t [1] https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnRNNDescriptor_t

impl API

pub fn rnn_backward_data( handle: cudnnHandle_t, rnn_desc: cudnnRNNDescriptor_t, seq_length: c_int, y_desc: const cudnnTensorDescriptor_t, y: const c_void, dy_desc: const cudnnTensorDescriptor_t, dy: const c_void, dhy_desc: cudnnTensorDescriptor_t, dhy: const c_void, dcy_desc: cudnnTensorDescriptor_t, dcy: const c_void, w_desc: cudnnFilterDescriptor_t, w: const c_void, hx_desc: cudnnTensorDescriptor_t, hx: const c_void, cx_desc: cudnnTensorDescriptor_t, cx: const c_void, dx_desc: const cudnnTensorDescriptor_t, dx: mut c_void, dhx_desc: cudnnTensorDescriptor_t, dhx: mut c_void, dcx_desc: cudnnTensorDescriptor_t, dcx: mut c_void, workspace: mut c_void, workspace_size_in_bytes: usize, reserve_space: *mut c_void, reserve_space_size_in_bytes: usize ) -> Result<(), Error>

CUDNN Rnn Backward Data This routine executes the recurrent neural network described by rnnDesc with output gradients dy, dhy, and dhc, weights w and input gradients dx, dhx, and dcx. Workspace is required for intermediate storage. The data in reserveSpace must have previously been generated by cudnnRNNForwardTraining(). The same reserveSpace data must be used for future calls to cudnnRNNBackwardWeights() if they execute on the same input data.

§Arguments

handle Handle to a previously created [cudNN context][0] rnn_desc A previously initialised [RNN descriptor][1] seq_length Number of iterations for the RNN to unroll over. y_desc Array of packed [tensor descriptors][1] describing the output from each recurrent iteration. y Data pointer to GPU memory for output at each iteration dy_desc Array of packed [tensor descriptors][1] describing the gradient at the output from each recurrent iteration. dy Data pointer to GPU memory for gradient at output iterations dhy_desc Array of packed [tensor descriptors][1] describing the gradients at the final hidden state of the RNN. dhy Data pointer to GPU memory for gradient at the final hidden state of the network. If this is a NULL pointer, the gradients at the final hidden state of the network will be initialised to zero. dcy_desc Array of packed [tensor descriptors][1] describing the gradients at the final cell state of the RNN. dcy Data pointer to GPU memory for gradients at the final cell state of the RNN. w_desc Handle to a previously initialized filter descriptor for the weights in the RNN w Data pointer to GPU memory for the filter descriptor for the weights. hx_desc Fully packed tensor descriptor for the initial hidden state of the RNN. hx Data pointer for initial hidden state - if null will initialize state to zero. cx_desc Tensor descriptor for the initial cell state for an LSTM network. cx Data pointer for initial cell state - if null will initialize state to zero. dx_desc Array of fully packed tensor descriptors for the gradient at the input of each iteration. dx Data pointer for the gradient of the input of each recurrent iteration. dhx_desc Fully packed tensor for the gradient of the initial hidden state of the RNN. dhx Data pointer for gradient of the initial hidden state of the RNN. workspace Data pointer to GPU memory to be used as a workspace for this call workspace_in_bytes Size in bytes of the provided workspace reserve_space Data pointer for GPU memory to be used as a reserve space for this call reserve_space_in_bytes Size in bytes for reserve_space [0]:https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnHandle_t [1]:https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnRNNDescriptor_t

pub fn rnn_backward_weights( handle: cudnnHandle_t, rnn_desc: cudnnRNNDescriptor_t, seq_length: c_int, x_desc: const cudnnTensorDescriptor_t, x: const c_void, hx_desc: cudnnTensorDescriptor_t, hx: const c_void, y_desc: const cudnnTensorDescriptor_t, y: const c_void, workspace: const c_void, work_space_size_in_bytes: usize, dw_desc: cudnnFilterDescriptor_t, dw: mut c_void, reserve_space: const c_void, reserve_space_size_in_bytes: usize ) -> Result<(), Error>

CUDNN Rnn Backward Weights This routine accumulates weight gradients dw from the recurrent neural network described by rnnDesc with inputs x, hx and outputs y. The mode of operation in this case is additive, the weight gradients calculated will be added to those already existing in dw. Workspace is required for intermediate storage. The data in reserveSpace must have previously been generated by cudnnRNNBackwardData().

§Arguments

handle Handle to a previously created [cudNN context][0] rnn_desc A previously initialised [RNN descriptor][1] seq_length Number of iterations for the RNN to unroll over. x_desc Array of packed tensor descriptors. x Data pointer for Input hx_desc Fully packed tensor descriptor for the initial hidden state of the RNN. hx Data pointer for initial hidden state - if null will initialize state to zero. y_desc Array of packed [tensor descriptors][1] describing the output from each recurrent iteration. y Data pointer to GPU memory for output at each iteration dw_desc Handle to previously initialized filter descriptor for the gradient of the weights. dw Data pointer to GPU memory for the descriptor of the gradient of the weights. workspace Data pointer to GPU memory to be used as a workspace for this call workspace_in_bytes Size in bytes of the provided workspace reserve_space Data pointer for GPU memory to be used as a reserve space for this call reserve_space_in_bytes Size in bytes for reserve_space [0]:https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnHandle_t [1]:https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnRNNDescriptor_t

impl API

pub fn softmax_forward( handle: cudnnHandle_t, algorithm: cudnnSoftmaxAlgorithm_t, mode: cudnnSoftmaxMode_t, alpha: const c_void, src_desc: cudnnTensorDescriptor_t, src_data: const c_void, beta: const c_void, dest_desc: cudnnTensorDescriptor_t, dest_data: mut c_void ) -> Result<(), Error>

Computes an softmax forward function.

pub fn softmax_backward( handle: cudnnHandle_t, algorithm: cudnnSoftmaxAlgorithm_t, mode: cudnnSoftmaxMode_t, alpha: const c_void, src_desc: cudnnTensorDescriptor_t, src_data: const c_void, src_diff_desc: cudnnTensorDescriptor_t, src_diff_data: const c_void, beta: const c_void, dest_diff_desc: cudnnTensorDescriptor_t, dest_diff_data: *mut c_void ) -> Result<(), Error>

Computes an softmax backward function.

impl API

pub fn create_tensor_descriptor() -> Result<cudnnTensorDescriptor_t, Error>

Creates a generic CUDA cuDNN Tensor Descriptor.

pub fn destroy_tensor_descriptor( tensor_desc: cudnnTensorDescriptor_t ) -> Result<(), Error>

Destroys a CUDA cuDNN Tensor Descriptor.

Should be called when freeing a CUDA::Descriptor to not trash up the CUDA device.

pub fn set_tensor_descriptor( tensor_desc: cudnnTensorDescriptor_t, data_type: cudnnDataType_t, nb_dims: c_int, dim_a: const c_int, stride_a: const c_int ) -> Result<(), Error>

Initializes a generic CUDA cuDNN Tensor Descriptor with specific properties.

pub fn get_tensor_descriptor( tensor_desc: cudnnTensorDescriptor_t, nb_dims_requested: c_int, data_type: mut cudnnDataType_t, nb_dims: mut c_int, dim_a: mut c_int, stride_a: mut c_int ) -> Result<(), Error>

Returns informations about a generic CUDA cuDNN Tensor Descriptor.

pub fn transform_tensor( handle: cudnnHandle_t, alpha: const c_void, src_desc: cudnnTensorDescriptor_t, src_data: const c_void, beta: const c_void, dest_desc: cudnnTensorDescriptor_t, dest_data: mut c_void ) -> Result<(), Error>

Transforms a CUDA cuDNN Tensor from to another Tensor with a different layout.

This function copies the scaled data from one tensor to another tensor with a different layout. Those descriptors need to have the same dimensions but not necessarily the same strides. The input and output tensors must not overlap in any way (i.e., tensors cannot be transformed in place). This function can be used to convert a tensor with an unsupported format to a supported one.

pub fn add_tensor( handle: cudnnHandle_t, alpha: const c_void, bias_desc: cudnnTensorDescriptor_t, bias_data: const c_void, beta: const c_void, src_dest_desc: cudnnTensorDescriptor_t, src_dest_data: mut c_void ) -> Result<(), Error>

Adds the scaled values from one a CUDA cuDNN Tensor to another.

Up to dimension 5, all tensor formats are supported. Beyond those dimensions, this routine is not supported.

This function adds the scaled values of one bias tensor to another tensor. Each dimension of the bias tensor must match the coresponding dimension of the src_dest tensor or must be equal to 1. In the latter case, the same value from the bias tensor for thoses dimensions will be used to blend into the src_dest tensor.

pub fn set_tensor( handle: cudnnHandle_t, src_dest_desc: cudnnTensorDescriptor_t, src_dest_data: mut c_void, value: const c_void ) -> Result<(), Error>

Sets all elements of a tensor to a given value.

pub fn scale_tensor( handle: cudnnHandle_t, src_dest_desc: cudnnTensorDescriptor_t, src_dest_data: mut c_void, alpha: const c_void ) -> Result<(), Error>

Scales all elements of a tensor by a given factor.

impl API

pub fn init() -> Result<cudnnHandle_t, Error>

Initialize the CUDA cuDNN API with needed context and resources.

The returned handle must be provided to future CUDA cuDNN API calls. Call this method outside of performance critical routines.

pub fn destroy(handle: cudnnHandle_t) -> Result<(), Error>

Destroys the CUDA cuDNN context and resources associated with the handle.

Frees up resources and will call cudaDeviceSynchronize internaly. Therefore, use this method outside of performance critical routines.

pub fn get_version() -> usize

Returns the version of the CUDA cuDNN API.

impl API

pub const fn cudnn_data_type(data_type: DataType) -> cudnnDataType_t

Convert to CUDNN Data Type

Trait Implementations§

impl Clone for API

fn clone(&self) -> API

Returns a copy of the value. Read more

1.0.0 · source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more

impl Debug for API

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more

impl Copy for API

Auto Trait Implementations§

impl UnwindSafe for API

Blanket Implementations§

impl<T> Any for T
where T: 'static + ?Sized,

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

impl<T> Borrow<T> for T
where T: ?Sized,

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

impl<T> BorrowMut<T> for T
where T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

impl<T> From<T> for T

fn from(t: T) -> T

Returns the argument unchanged.

impl<T, U> Into for T
where U: From<T>,

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

impl<T> ToOwned for T
where T: Clone,

type Owned = T

The resulting type after obtaining ownership.

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more

impl<T, U> TryFrom for T
where U: Into<T>,

type Error = Infallible

The type returned in the event of a conversion error.

fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

impl<T, U> TryInto for T
where U: TryFrom<T>,

type Error = >::Error

The type returned in the event of a conversion error.