Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions crates/burn-cubecl-fusion/src/shared/io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -566,6 +566,7 @@ fn reshaped_index(
offset
}

#[allow(unreachable_code)]
#[cube]
#[allow(clippy::clone_on_copy)]
fn reshaped_index_to_original_index<C: CubePrimitive>(
Expand All @@ -579,7 +580,7 @@ fn reshaped_index_to_original_index<C: CubePrimitive>(
#[unroll]
for r in 0..rank {
let i = reverse_index(rank, r);
let shape = original.shape(comptime![i]);
let shape = original.shape(i);
let stride = original.stride(i);

let coordinate = remaining % shape;
Expand All @@ -591,10 +592,10 @@ fn reshaped_index_to_original_index<C: CubePrimitive>(
offset / original.line_size()
}

#[allow(unused_variables)]
#[cube]
#[allow(unused_variables)]
pub(crate) fn reverse_index(#[comptime] rank: u32, iter: u32) -> comptime_type!(u32) {
intrinsic!(|scope| {
intrinsic!(|_| {
let elem = iter.constant().map(|cons| cons.as_u32()).unwrap();
rank - elem - 1
})
Expand Down
22 changes: 4 additions & 18 deletions crates/burn-cubecl-fusion/src/shared/kernel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ use crate::shared::DYN_ELEM_ID;
use super::io::*;
use super::ir::*;
use cubecl::prelude::*;
use cubecl::unexpanded;

#[cube]
/// Fuse element-wise operations at the given write position.
Expand Down Expand Up @@ -134,14 +133,13 @@ pub fn init_locals(
#[allow(clippy::clone_on_copy)]
for i in 0..config.rank {
let reverse = reverse_index(config.rank, i);
let reverse_u32_comptime = unwrap_const_u32(reverse);
let arg = comptime![Arg::ScalarShape(start + reverse_u32_comptime)];
let arg = comptime![Arg::ScalarShape(start + reverse)];
let shape = read_scalar_shape(inputs, comptime![arg.clone()]);

ref_shape[comptime![reverse_u32_comptime]] = shape;
ref_strides[comptime![reverse_u32_comptime]] = stride_curr;
ref_shape[comptime![reverse]] = shape;
ref_strides[comptime![reverse]] = stride_curr;

stride_curr *= ref_shape[comptime![reverse_u32_comptime]];
stride_curr *= ref_shape[comptime![reverse]];
}

LocalArgs::new(ref_shape.to_slice(), ref_strides.to_slice(), 1u32)
Expand All @@ -150,18 +148,6 @@ pub fn init_locals(
}
}

fn unwrap_const_u32(_elem: u32) -> u32 {
unexpanded!()
}

mod unwrap_const_u32 {
use super::*;

pub(crate) fn expand(_scope: &mut Scope, elem: ExpandElementTyped<u32>) -> u32 {
elem.constant().map(|cons| cons.as_u32()).unwrap()
}
}

#[cube]
fn fuse(
inputs: &GlobalArgs,
Expand Down
23 changes: 23 additions & 0 deletions crates/burn-cubecl/src/kernel/contiguous.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,26 @@ pub fn into_contiguous<R: CubeRuntime>(tensor: CubeTensor<R>) -> CubeTensor<R> {
)
})
}

/// Make a jit tensor contiguous with an aligned last stride.
pub fn into_contiguous_aligned<R: CubeRuntime>(tensor: CubeTensor<R>) -> CubeTensor<R> {
if tensor.is_contiguous() {
return tensor;
}

execute_with_dtype!(tensor.dtype, E, {
let output = cubecl::linalg::tensor::into_contiguous_pitched::<R, E>(
&tensor.client,
&tensor.as_handle_ref(),
);

CubeTensor::new(
tensor.client,
output.handle,
output.shape.into(),
tensor.device,
output.strides,
tensor.dtype,
)
})
}
15 changes: 12 additions & 3 deletions crates/burn-cubecl/src/kernel/conv/conv2d/base.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
use burn_tensor::ops::{ConvOptions, ConvTransposeOptions};
use cubecl::linalg::convolution::ConvLaunchError;

use crate::{CubeRuntime, FloatElement, IntElement, tensor::CubeTensor};
use crate::{
CubeRuntime, FloatElement, IntElement,
ops::{permute_nchw_to_nhwc, permute_nhwc_to_nchw},
tensor::CubeTensor,
};

#[cfg(feature = "autotune")]
use super::{conv_transpose2d_autotune, conv2d_autotune};
Expand Down Expand Up @@ -74,13 +78,18 @@ pub fn conv2d<R: CubeRuntime, E: FloatElement>(
options: ConvOptions<2>,
strategy: Conv2dStrategy,
) -> Result<CubeTensor<R>, ConvLaunchError> {
match strategy {
let input = permute_nchw_to_nhwc(input);
let weight = permute_nchw_to_nhwc(weight);

let out = match strategy {
Conv2dStrategy::Direct => conv2d_direct::<R, E>(input, weight, bias, options),
#[cfg(feature = "autotune")]
Conv2dStrategy::Autotune => Ok(conv2d_autotune::<R, E>(input, weight, bias, options)),
Conv2dStrategy::Gemm => conv2d_im2col::<R, E>(input, weight, bias, options),
Conv2dStrategy::ImplicitGemm => conv2d_gemm_cyclic::<R, E>(input, weight, bias, options),
}
}?;

Ok(permute_nhwc_to_nchw(out))
}

/// Perform a 2D convolution with the given strategy
Expand Down
9 changes: 4 additions & 5 deletions crates/burn-cubecl/src/kernel/conv/conv2d/col2im.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,11 +124,10 @@ pub(crate) fn index<R: CubeRuntime, E: CubeElement>(
for dim in tensor.shape.dims[1..].iter() {
indices.push(0..*dim);
}
let new_shape = Shape {
dims: tensor.shape.dims[1..].to_vec(),
};
let tensor = slice::<R, E>(tensor, &indices);
reshape(tensor, new_shape)
let mut tensor = slice::<R, E>(tensor, &indices);
tensor.shape.dims.remove(0);
tensor.strides.remove(0);
tensor
}

#[allow(clippy::too_many_arguments)]
Expand Down
Loading
Loading