#![allow(unused_imports)]
#![allow(unused_variables)]
#![allow(unreachable_code)]
use std::cmp::PartialOrd;
use std::fmt::Debug;
use std::ops::*;
#[cfg(feature = "native")]
use rand::{distributions::Distribution, Rng, SeedableRng};
use crate::plugin::*;
use co::plugin::numeric_helpers::Bounded;
use co::plugin::numeric_helpers::Float;
use co::plugin::Error as PluginError;
use co::prelude::*;
use co::Error;
use coaster as co;
#[macro_use]
pub mod helper;
fn lens_eq<T>(xs: &[T], ys: &[T]) -> Result<(), Error> {
if xs.len() != ys.len() {
return Err(PluginError::Operation("Tensor dimension mismatch").into());
}
Ok(())
}
fn map1_inplace<T, F>(src: &mut [T], f: F) -> Result<(), Error>
where
T: Float,
F: Fn(T) -> T,
{
for i in 0..src.len() {
src[i] = f(src[i]);
}
Ok(())
}
fn map2_inplace<T, F>(src1: &[T], src2: &mut [T], f: F) -> Result<(), Error>
where
T: Float,
F: Fn(T, T) -> T,
{
lens_eq(src1, src2)?;
for i in 0..src2.len() {
src2[i] = f(src1[i], src2[i]);
}
Ok(())
}
fn map1<T, F>(src: &[T], dst: &mut [T], f: F) -> Result<(), Error>
where
T: Float,
F: Fn(T) -> T,
{
lens_eq(dst, src)?;
for i in 0..dst.len() {
dst[i] = f(src[i]);
}
Ok(())
}
fn map2<T, F>(src1: &[T], src2: &[T], dst: &mut [T], f: F) -> Result<(), Error>
where
T: Float,
F: Fn(T, T) -> T,
{
lens_eq(dst, src1)?;
lens_eq(dst, src2)?;
for i in 0..dst.len() {
dst[i] = f(src1[i], src2[i]);
}
Ok(())
}
impl<T> NN<T> for Backend<Native>
where
T: Add<T, Output = T> + Mul<T, Output = T> + Default + Copy,
{
type CC = helper::ConvolutionConfig;
type CLRN = helper::NormalizationConfig;
type CPOOL = helper::PoolingConfig;
type CDROP = helper::DropoutConfig;
type CRNN = helper::RnnConfig;
fn init_nn() {}
}
impl<'a, T> NNOperationConfig<T> for helper::ConvolutionConfig where
T: Add<T, Output = T> + Mul<T, Output = T> + Default + Copy
{
}
impl<'a, T> ConvolutionConfig<T> for helper::ConvolutionConfig where
T: Add<T, Output = T> + Mul<T, Output = T> + Default + Copy
{
}
impl<'a, T> RnnConfig<T> for helper::RnnConfig where
T: Add<T, Output = T> + Mul<T, Output = T> + Default + Copy
{
}
impl<T> NNOperationConfig<T> for helper::NormalizationConfig where
T: Add<T, Output = T> + Mul<T, Output = T> + Default + Copy
{
}
impl<T> NNOperationConfig<T> for helper::PoolingConfig where
T: Add<T, Output = T> + Mul<T, Output = T> + Default + Copy
{
}
impl<T> NNOperationConfig<T> for helper::DropoutConfig where
T: Add<T, Output = T> + Mul<T, Output = T> + Default + Copy
{
}
impl<T> NNOperationConfig<T> for helper::RnnConfig where
T: Add<T, Output = T> + Mul<T, Output = T> + Default + Copy
{
}
impl<T> Convolution<T> for Backend<Native>
where
T: Add<T, Output = T> + Mul<T, Output = T> + Default + Copy,
{
fn new_convolution_config(
&self,
src: &SharedTensor<T>,
dest: &SharedTensor<T>,
filter: &SharedTensor<T>,
algo_fwd: ConvForwardAlgo,
algo_bwd_filter: ConvBackwardFilterAlgo,
algo_bwd_data: ConvBackwardDataAlgo,
stride: &[i32],
zero_padding: &[i32],
) -> Result<Self::CC, Error> {
match algo_fwd {
ConvForwardAlgo::Auto | ConvForwardAlgo::ImplicitGEMM => {}
_ => {
return Err(Error::Plugin(PluginError::Plugin("Unimplemented.")));
}
}
match algo_bwd_filter {
ConvBackwardFilterAlgo::Auto | ConvBackwardFilterAlgo::ImplicitGEMM => {}
_ => {
return Err(Error::Plugin(PluginError::Plugin("Unimplemented.")));
}
}
match algo_bwd_data {
ConvBackwardDataAlgo::Auto | ConvBackwardDataAlgo::ImplicitGEMM => {}
_ => {
return Err(Error::Plugin(PluginError::Plugin("Unimplemented.")));
}
}
Ok(helper::ConvolutionConfig {
filter_shape: filter.desc().clone(),
stride: stride.to_vec(),
padding: zero_padding.to_vec(),
})
}
fn convolution(
&self,
filter: &SharedTensor<T>,
x: &SharedTensor<T>,
result: &mut SharedTensor<T>,
_workspace: &mut SharedTensor<u8>,
config: &Self::CC,
) -> Result<(), Error> {
let dev = self.device();
let input_dim = x.desc();
let input = x.read(dev).unwrap().as_slice::<T>();
let input_stride = input_dim.default_stride();
let output_dim = result.desc().clone();
let output = result.write_only(dev).unwrap().as_mut_slice::<T>();
let output_stride = output_dim.default_stride();
{
for o in output.iter_mut() {
*o = Default::default();
}
}
let filter_dim = filter.desc();
let filter = filter.read(dev).unwrap().as_slice::<T>();
let filter_stride = filter_dim.default_stride();
assert!(input_dim[0] == output_dim[0]);
assert!(filter_dim[0] == output_dim[1]);
assert!(input_dim[1] == filter_dim[1]);
fn filter_<T>(
input: &[T],
input_stride: &[usize],
input_dim: &[usize],
input_offset: usize,
input_idx_base: &[usize],
filter: &[T],
filter_stride: &[usize],
filter_dim: &[usize],
filter_offset: usize,
padding: &[i32],
depth: usize,
depth_end: usize,
acc: Option<T>,
) -> T
where
T: Add<T, Output = T> + Mul<T, Output = T> + Default + Copy,
{
let mut acc = acc.unwrap_or_default();
let p = padding[0] as usize;
let input_idx_end = input_dim[0] + 2 * p;
for filter_idx in 0..filter_dim[0] {
let input_idx = input_idx_base[0] + filter_idx;
let i_offset = input_offset + (input_idx - p) * input_stride[0];
let f_offset = filter_offset + filter_idx * filter_stride[0];
let v = if input_idx < p || input_idx + 1 > input_idx_end - p {
Default::default()
} else if depth + 1 >= depth_end {
input[i_offset] * filter[f_offset]
} else {
filter_(
input,
&input_stride[1..],
&input_dim[1..],
i_offset,
&input_idx_base[1..],
filter,
&filter_stride[1..],
&filter_dim[1..],
f_offset,
&padding[1..],
depth + 1,
depth_end,
None,
)
};
acc = acc + v;
}
return acc;
}
fn conv<T>(
input: &[T],
input_stride: &[usize],
input_dim: &[usize],
top_input_offset: usize,
input_offset: usize,
input_idx_base: &mut [usize],
filter: &[T],
filter_stride: &[usize],
filter_dim: &[usize],
filter_offset: usize,
depth: usize,
padding: &[i32],
stride: &[i32],
output: &mut [T],
output_stride: &[usize],
output_dim: &[usize],
output_offset: usize,
) where
T: Add<T, Output = T> + Mul<T, Output = T> + Default + Copy,
{
let p = padding[depth] as usize;
for output_idx in 0..output_dim[0] {
let input_i = output_idx * stride[0] as usize;
input_idx_base[depth] = input_i;
let input_offset = input_offset + input_i * input_stride[depth];
let output_offset = output_offset + output_idx * output_stride[0];
if depth + 1 < input_dim.len() {
conv(
input,
input_stride,
input_dim,
top_input_offset,
input_offset,
input_idx_base,
filter,
filter_stride,
filter_dim,
filter_offset,
depth + 1,
padding,
&stride[1..],
output,
&output_stride[1..],
&output_dim[1..],
output_offset,
);
} else {
let v = filter_(
input,
input_stride,
input_dim,
top_input_offset,
&input_idx_base[..],
filter,
filter_stride,
filter_dim,
filter_offset,
padding,
0,
input_dim.len(),
None,
);
output[output_offset] = output[output_offset] + v;
}
}
}
fn conv_k_d1<T>(
_batch: usize,
input: &[T],
input_stride: &[usize],
input_dim: &[usize],
input_offset: usize,
input_idx_base: &mut [usize],
filter: &[T],
filter_stride: &[usize],
filter_dim: &[usize],
padding: &[i32],
stride: &[i32],
output: &mut [T],
output_stride: &[usize],
output_dim: &[usize],
output_offset: usize,
) where
T: Add<T, Output = T> + Mul<T, Output = T> + Default + Copy,
{
for k in 0..filter_dim[0] {
let output_offset = output_offset + k * output_stride[0];
let filter_offset = k * filter_stride[0];
for d1 in 0..input_dim[0] {
let input_offset = input_offset + d1 * input_stride[0];
let filter_offset = filter_offset + d1 * filter_stride[1];
conv(
input,
&input_stride[1..],
&input_dim[1..],
input_offset,
input_offset,
input_idx_base,
filter,
&filter_stride[2..],
&filter_dim[2..],
filter_offset,
0,
padding,
stride,
output,
&output_stride[1..],
&output_dim[1..],
output_offset,
);
}
}
}
let mut input_idx = Vec::new();
input_idx.resize(input_dim.len() - 2, 0);
let mut output_idx = Vec::new();
output_idx.resize(output_dim.len(), 0);
let batches = input_dim[0];
for batch in 0..batches {
let input_offset = batch * input_stride[0];
let output_offset = batch * output_stride[0];
conv_k_d1(
batch,
input,
&input_stride[1..],
&input_dim[1..],
input_offset,
&mut input_idx[..],
filter,
&filter_stride[..],
&filter_dim[..],
&config.padding[..],
&config.stride[..],
output,
&output_stride[1..],
&output_dim[1..],
output_offset,
);
}
Ok(())
}
fn convolution_grad_filter(
&self,
src_data: &SharedTensor<T>,
dest_diff: &SharedTensor<T>,
filter_diff: &mut SharedTensor<T>,
workspace: &mut SharedTensor<u8>,
config: &Self::CC,
) -> Result<(), Error> {
unimplemented!()
}
fn convolution_grad_data(
&self,
filter: &SharedTensor<T>,
x_diff: &SharedTensor<T>,
result_diff: &mut SharedTensor<T>,
workspace: &mut SharedTensor<u8>,
config: &Self::CC,
) -> Result<(), Error> {
unimplemented!()
}
}
impl<T> Pooling<T> for Backend<Native>
where
T: Add<T, Output = T> + Mul<T, Output = T> + Default + Copy + PartialOrd + Bounded,
{
fn new_pooling_config(
&self,
window: &[i32],
stride: &[i32],
padding: &[i32],
) -> Result<Self::CPOOL, Error> {
Ok(helper::PoolingConfig {
window: window.to_vec(),
stride: stride.to_vec(),
padding: padding.to_vec(),
})
}
fn pooling_max(
&self,
x: &SharedTensor<T>,
result: &mut SharedTensor<T>,
config: &Self::CPOOL,
) -> Result<(), Error> {
let dev = self.device();
let input_dim = x.desc(); let input = x.read(dev).unwrap().as_slice::<T>();
let input_stride = input_dim.default_stride(); let output_dim = result.desc().clone(); let output = result.write_only(dev).unwrap().as_mut_slice::<T>();
let output_stride = output_dim.default_stride(); {
for o in output.iter_mut() {
*o = Default::default();
}
}
fn max_pooling_<T>(
input: &[T],
input_stride: &[usize],
input_dim: &[usize],
input_offset: usize,
input_idx_base: &[usize],
window: &[i32],
padding: &[i32],
depth: usize,
depth_end: usize,
current_max: Option<T>,
) -> T
where
T: Add<T, Output = T> + Mul<T, Output = T> + Default + Copy + PartialOrd + Bounded,
{
let mut current_max = current_max.unwrap_or(T::min_value());
let p = padding[0] as usize;
let input_idx_end = input_dim[0] + 2 * p;
for window_idx in 0..window[0] {
let input_idx = input_idx_base[0] + window_idx as usize;
let v = if input_idx < p || input_idx + 1 > input_idx_end - p {
T::min_value()
} else {
let i_mem_offset = input_offset + (input_idx - p) * input_stride[0];
if depth + 1 >= depth_end {
input[i_mem_offset]
} else {
max_pooling_(
input,
&input_stride[1..],
&input_dim[1..],
i_mem_offset,
&input_idx_base[1..],
&window[1..],
&padding[1..],
depth + 1,
depth_end,
None,
)
}
};
current_max = if current_max >= v {
current_max
} else if current_max < v {
v
} else {
panic!("NaN")
};
}
current_max
}
fn recurse<T>(
input: &[T],
input_stride: &[usize],
input_dim: &[usize],
top_input_offset: usize,
input_offset: usize,
input_idx_base: &mut [usize],
window: &[i32],
depth: usize,
stride: &[i32],
padding: &[i32],
output: &mut [T],
output_stride: &[usize],
output_dim: &[usize],
output_offset: usize,
) where
T: Add<T, Output = T> + Mul<T, Output = T> + Default + Copy + PartialOrd + Bounded,
{
let p = padding[depth] as usize; let w = window[depth] as usize; for output_idx in 0..output_dim[0] {
let input_idx = output_idx * stride[0] as usize;
input_idx_base[depth] = input_idx;
let input_offset = input_offset + input_idx * input_stride[depth];
let output_offset = output_offset + output_idx * output_stride[0];
if depth + 1 < input_dim.len() {
recurse(
input,
input_stride,
input_dim,
top_input_offset,
input_offset,
input_idx_base,
window,
depth + 1,
&stride[1..],
padding,
output,
&output_stride[1..],
&output_dim[1..],
output_offset,
);
} else {
let v = max_pooling_(
input,
input_stride,
input_dim,
top_input_offset,
&input_idx_base[..],
window,
padding,
0,
input_dim.len(),
None,
);
output[output_offset] = v;
}
}
}
let mut input_idx = Vec::new();
input_idx.resize(input_dim.len() - 2, 0);
let mut output_idx = Vec::new();
output_idx.resize(output_dim.len(), 0);
let window = &config.window[..];
let stride = &config.stride[..];
let padding = &config.padding[..];
for batch in 0..input_dim[0] {
let input_offset = batch * input_stride[0];
let output_offset = batch * output_stride[0];
for d1 in 0..input_dim[1] {
let input_offset = input_offset + d1 * input_stride[1];
let output_offset = output_offset + d1 * output_stride[1];
recurse(
input,
&input_stride[2..],
&input_dim[2..],
input_offset,
input_offset,
&mut input_idx,
&window,
0,
&stride,
&padding,
output,
&output_stride[2..],
&output_dim[2..],
output_offset,
);
}
}
Ok(())
}
fn pooling_max_grad(
&self,
x: &SharedTensor<T>,
x_diff: &SharedTensor<T>,
result: &SharedTensor<T>,
result_diff: &mut SharedTensor<T>,
config: &Self::CPOOL,
) -> Result<(), Error> {
let dev = self.device();
let input_dim = x.desc(); println!("x dims {:?}", input_dim);
let input = x.read(dev).unwrap().as_slice::<T>();
let input_stride = input_dim.default_stride(); let x_diff_dim = x_diff.desc(); let x_diff = x_diff.read(dev).unwrap().as_slice::<T>();
println!("x_diff dims {:?}", x_diff_dim);
let output_dim = result_diff.desc().clone(); println!("result dims {:?}", result.desc());
println!("result_diff dims {:?}", output_dim);
let output = result_diff.write_only(dev).unwrap().as_mut_slice::<T>();
let output_stride = output_dim.default_stride(); {
for o in output.iter_mut() {
*o = Default::default();
}
}
fn max_pooling_<T>(
input: &[T],
input_stride: &[usize],
input_dim: &[usize],
input_offset: usize,
input_idx_base: &[usize],
window: &[i32],
padding: &[i32],
depth: usize,
depth_end: usize,
current_max: Option<T>,
current_max_index: Option<usize>,
) -> (T, usize)
where
T: Add<T, Output = T> + Mul<T, Output = T> + Default + Copy + PartialOrd + Bounded,
{
let mut current_max = (
current_max.unwrap_or(T::min_value()),
current_max_index.unwrap_or(0usize),
);
let p = padding[0] as usize;
let input_idx_end = input_dim[0] + 2 * p;
for window_idx in 0..window[0] {
let input_idx = input_idx_base[0] + window_idx as usize;
let (v, v_index) = if input_idx < p || input_idx + 1 > input_idx_end - p {
(T::min_value(), 0usize)
} else {
let i_mem_offset = input_offset + (input_idx - p) * input_stride[0];
if depth + 1 >= depth_end {
(input[i_mem_offset], i_mem_offset)
} else {
max_pooling_(
input,
&input_stride[1..],
&input_dim[1..],
i_mem_offset,
&input_idx_base[1..],
&window[1..],
&padding[1..],
depth + 1,
depth_end,
None,
None,
)
}
};
current_max = if current_max.0 >= v {
current_max
} else if current_max.0 < v {
(v, v_index)
} else {
panic!("NaN")
};
}
current_max
}
fn recurse<T>(
input: &[T],
input_stride: &[usize],
input_dim: &[usize],
top_input_offset: usize,
input_offset: usize,
input_idx_base: &mut [usize],
window: &[i32],
depth: usize,
stride: &[i32],
padding: &[i32],
output: &mut [T],
output_stride: &[usize],
output_dim: &[usize],
output_offset: usize,
dx: &[T],
) where
T: Add<T, Output = T> + Mul<T, Output = T> + Default + Copy + PartialOrd + Bounded,
{
let p = padding[depth] as usize; let w = window[depth] as usize; for output_idx in 0..output_dim[0] {
let input_idx = output_idx * stride[0] as usize;
input_idx_base[depth] = input_idx;
let input_offset = input_offset + input_idx * input_stride[depth];
let output_offset = output_offset + output_idx * output_stride[0];
if depth + 1 < input_dim.len() {
recurse(
input,
input_stride,
input_dim,
top_input_offset,
input_offset,
input_idx_base,
window,
depth + 1,
&stride[1..],
padding,
output,
&output_stride[1..],
&output_dim[1..],
output_offset,
dx,
);
} else {
let (val, index) = max_pooling_(
input,
input_stride,
input_dim,
top_input_offset,
&input_idx_base[..],
window,
padding,
0,
input_dim.len(),
None,
None,
);
output[index] = dx[0]; }
}
}
let mut input_idx = Vec::new();
input_idx.resize(input_dim.len() - 2, 0);
let mut output_idx = Vec::new();
output_idx.resize(output_dim.len(), 0);
let window = &config.window[..];
let stride = &config.stride[..];
let padding = &config.padding[..];
for batch in 0..input_dim[0] {
let input_offset = batch * input_stride[0];
let output_offset = batch * output_stride[0];
for d1 in 0..input_dim[1] {
let input_offset = input_offset + d1 * input_stride[1];
let output_offset = output_offset + d1 * output_stride[1];
recurse(
input,
&input_stride[2..],
&input_dim[2..],
input_offset,
input_offset,
&mut input_idx,
&window,
0,
&stride,
&padding,
output,
&output_stride[2..],
&output_dim[2..],
output_offset,
x_diff,
);
}
}
Ok(())
}
fn pooling_avg(
&self,
x: &SharedTensor<T>,
result: &mut SharedTensor<T>,
config: &Self::CPOOL,
) -> Result<(), Error> {
return Err(Error::Plugin(PluginError::Plugin("Unimplemented.")));
}
fn pooling_avg_grad(
&self,
x: &SharedTensor<T>,
x_diff: &SharedTensor<T>,
result: &SharedTensor<T>,
result_diff: &mut SharedTensor<T>,
config: &Self::CPOOL,
) -> Result<(), Error> {
return Err(Error::Plugin(PluginError::Plugin("Unimplemented.")));
}
}
impl<T> Rnn<T> for Backend<Native>
where
T: Float + Default + Copy + PartialOrd + Bounded,
{
fn new_rnn_config(
&self,
src: &SharedTensor<T>,
dropout_probability: Option<f32>,
dropout_seed: Option<u64>,
sequence_length: i32,
network_mode: RnnNetworkMode,
input_mode: RnnInputMode,
direction_mode: DirectionMode,
algorithm: RnnAlgorithm,
hidden_size: i32,
num_layers: i32,
batch_size: i32,
) -> Result<Self::CRNN, Error> {
unimplemented!()
}
fn generate_rnn_weight_description(
&self,
rnn_config: &Self::CRNN,
input_size: i32,
) -> Result<Vec<usize>, Error> {
unimplemented!()
}
fn rnn_forward(
&self,
src: &SharedTensor<T>,
output: &mut SharedTensor<T>,
rnn_config: &Self::CRNN,
weight: &SharedTensor<T>,
workspace: &mut SharedTensor<u8>,
) -> Result<(), Error> {
unimplemented!()
}
fn rnn_backward_data(
&self,
src: &SharedTensor<T>,
src_gradient: &mut SharedTensor<T>,
output: &SharedTensor<T>,
output_gradient: &SharedTensor<T>,
rnn_config: &Self::CRNN,
weight: &SharedTensor<T>,
workspace: &mut SharedTensor<u8>,
) -> Result<(), Error> {
unimplemented!()
}
fn rnn_backward_weights(
&self,
src: &SharedTensor<T>,
output: &SharedTensor<T>,
filter: &mut SharedTensor<T>,
rnn_config: &Self::CRNN,
workspace: &mut SharedTensor<u8>,
) -> Result<(), Error> {
unimplemented!()
}
}
#[cfg(feature = "native")]
impl<T> Dropout<T> for Backend<Native>
where
T: Float + Add<T, Output = T> + Mul<T, Output = T> + Default + Copy + PartialOrd + Bounded,
{
fn new_dropout_config(&self, probability: f32, seed: u64) -> Result<Self::CDROP, Error> {
Ok(helper::DropoutConfig { probability, seed })
}
#[cfg(feature = "native")]
fn dropout(
&self,
x: &SharedTensor<T>,
result: &mut SharedTensor<T>,
config: &Self::CDROP,
) -> Result<(), Error> {
let dev = self.device();
let input_dim = x.desc(); let input = x.read(dev).unwrap().as_slice::<T>();
let output_dim = result.desc().clone(); let output = result.write_only(dev).unwrap().as_mut_slice::<T>();
output.clone_from_slice(input);
let seed: [u8; 8] = config.seed.to_le_bytes();
let mut extrapolated_seed = [0u8; 32];
extrapolated_seed[0..8].copy_from_slice(&seed);
extrapolated_seed[12..20].copy_from_slice(&seed);
extrapolated_seed[24..32].copy_from_slice(&seed);
let mut rng = ::rand_chacha::ChaChaRng::from_seed(extrapolated_seed);
let dist = ::rand::distributions::Uniform::<f32>::new_inclusive(0., 1.);
for i in 0..output.len() {
if dist.sample(&mut rng) >= config.probability {
output[i] = input[i];
} else {
output[i] = T::zero();
}
}
Ok(())
}
#[allow(unused_variables)]
fn dropout_grad(
&self,
x: &SharedTensor<T>,
x_diff: &SharedTensor<T>,
result: &SharedTensor<T>,
result_diff: &mut SharedTensor<T>,
config: &Self::CDROP,
) -> Result<(), Error> {
Ok(())
}
}
impl_ops_sigmoid_for!(f32, Backend<Native>);
impl_ops_relu_for!(f32, Backend<Native>);
impl_ops_tanh_for!(f32, Backend<Native>);
impl_ops_softmax_for!(f32, Backend<Native>);
impl_ops_log_softmax_for!(f32, Backend<Native>);
impl_ops_sigmoid_for!(f64, Backend<Native>);
impl_ops_relu_for!(f64, Backend<Native>);
impl_ops_tanh_for!(f64, Backend<Native>);
impl_ops_softmax_for!(f64, Backend<Native>);
impl_ops_log_softmax_for!(f64, Backend<Native>);