|
22 | 22 |
|
23 | 23 | */
|
24 | 24 |
|
| 25 | +#include "depthtospace_param.h" |
| 26 | + |
25 | 27 | #include "graph/tensor.h"
|
26 | 28 | #include "graph/node.h"
|
27 | 29 | #include "graph/graph.h"
|
|
34 | 36 |
|
35 | 37 | #include <math.h>
|
36 | 38 |
|
37 |
| -int ref_depthtospace_fp32(struct tensor* input_tensor, struct tensor* output_tensor, int num_thread) |
| 39 | +int ref_depthtospace_fp32(struct tensor* input_tensor, struct tensor* output_tensor, int num_thread, int block_size) |
38 | 40 | {
|
| 41 | + int n = input_tensor->dims[0]; |
| 42 | + int inc = input_tensor->dims[1]; |
| 43 | + int inh = input_tensor->dims[2]; |
| 44 | + int inw = input_tensor->dims[3]; |
| 45 | + |
| 46 | + int outc = inc / (block_size * block_size); |
| 47 | + int outh = input_tensor->dims[2] * block_size; |
| 48 | + int outw = input_tensor->dims[3] * block_size; |
| 49 | + |
39 | 50 | float* input_data = (float*)input_tensor->data;
|
40 | 51 | float* out_data = (float*)output_tensor->data;
|
41 | 52 | int total_size = input_tensor->elem_num;
|
42 | 53 |
|
43 |
| - for (int i = 0; i < total_size; i++) |
| 54 | + //TODO:add mode in depthtospace_param to set CRD or DCR |
| 55 | + for (int b = 0; b < n; ++b) |
44 | 56 | {
|
45 |
| - out_data[i] = input_data[i]; |
| 57 | + for (int s = 0; s < outc; ++s) |
| 58 | + { |
| 59 | + for (int h = 0; h < outh; ++h) |
| 60 | + { |
| 61 | + const int in_h = h / block_size; |
| 62 | + const int offset_h = (h % block_size); |
| 63 | + for (int w = 0; w < outw; ++w) |
| 64 | + { |
| 65 | + const int in_w = w / block_size; |
| 66 | + const int offset_w = w % block_size; |
| 67 | + //CRD |
| 68 | + const int offset_d = offset_h * block_size + offset_w; |
| 69 | + const int in_d = s * (block_size * block_size) + offset_d; |
| 70 | + //DCR |
| 71 | + //const int offset_d =(offset_h * block_size + offset_w) * outc; |
| 72 | + //const int in_d = s + offset_d; |
| 73 | + const int o_index = ((b * outc + s) * outh + h) * outw + w; |
| 74 | + const int i_index = ((b * inc + in_d) * inh + in_h) * inw + in_w; |
| 75 | + out_data[o_index] = input_data[i_index]; |
| 76 | + } |
| 77 | + } |
| 78 | + } |
46 | 79 | }
|
47 | 80 |
|
48 | 81 | return 0;
|
@@ -74,7 +107,9 @@ static int run(struct node_ops* node_ops, struct exec_node* exec_node, struct ex
|
74 | 107 | input_tensor = get_ir_graph_tensor(ir_graph, ir_node->input_tensors[0]);
|
75 | 108 | output_tensor = get_ir_graph_tensor(ir_graph, ir_node->output_tensors[0]);
|
76 | 109 |
|
77 |
| - int ret = ref_depthtospace_fp32(input_tensor, output_tensor, exec_graph->num_thread); |
| 110 | + struct depthtospace_param* param = (struct depthtospace_param*)ir_node->op.param_mem; |
| 111 | + |
| 112 | + int ret = ref_depthtospace_fp32(input_tensor, output_tensor, exec_graph->num_thread, param->block_size); |
78 | 113 | if (ret != 0)
|
79 | 114 | return -1;
|
80 | 115 |
|
|
0 commit comments