@@ -1028,12 +1028,30 @@ void layout_optimizer::set_onednn_dyn_conv_preferred_format(convolution_node& no
10281028 node.set_preferred_input_fmt (0 , get_fsv16_format (rank));
10291029 node.set_preferred_output_fmt (0 , get_fsv16_format (rank));
10301030
1031- // Override with default format for small channels (≤ 4)
1032- if (input_channels > 0 && input_channels <= 4 ) {
1031+ // Override input for small channels (≤ 16)
1032+ // fsv16 format uses 16-element blocks. channels ≤ 16 waste block padding
1033+ // e.g. 8ch uses only 8/16 elements per block (50% waste), planar format is more efficient
1034+ if (input_channels > 0 && input_channels <= 16 ) {
10331035 node.set_preferred_input_fmt (0 , format::get_default_format (rank));
10341036 }
10351037
1036- if (output_channels > 0 && output_channels <= 4 ) {
1038+ // Override output for small channels (≤ 16)
1039+ // same as input - avoid fsv16 block padding overhead for small channel counts
1040+ if (output_channels > 0 && output_channels <= 16 ) {
1041+ node.set_preferred_output_fmt (0 , format::get_default_format (rank));
1042+ }
1043+
1044+ // Override output for channel expansion operations (small input → large output)
1045+ // when expanding from small input channels (≤16) to large output channels (≥32),
1046+ // planar output format enables OneDNN to select optimized JIT kernel instead of reference kernel
1047+ // Thresholds explained:
1048+ // - input ≤ 16: matches fsv16 block size, input side uses planar format (set above)
1049+ // - output ≥ 32: 2 or more fsv16 blocks (32/16=2), where blocked write overhead exceeds
1050+ // sequential write benefits. planar format provides better cache locality
1051+ // and memory access patterns for large channel generation
1052+ // e.g. 3ch → 1024ch would create 64 fsv16 blocks with scattered writes,
1053+ // but planar format allows efficient sequential writes
1054+ if (input_channels > 0 && input_channels <= 16 && output_channels >= 32 ) {
10371055 node.set_preferred_output_fmt (0 , format::get_default_format (rank));
10381056 }
10391057 }
0 commit comments