#include <ccv.h>
#include <ccv_internal.h>
#include <nnc/ccv_nnc.h>
#include <nnc/ccv_nnc_easy.h>
#include <nnc/ccv_nnc_internal.h>
#ifdef USE_OPENMP
#include <omp.h>
#endif
#ifdef USE_DISPATCH
#include <dispatch/dispatch.h>
#endif
static int _ccv_nnc_relu_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, const ccv_nnc_stream_context_t* const stream_context)
{
assert(input_size == 1);
const ccv_nnc_tensor_t* a = inputs[0];
assert(!CCV_IS_TENSOR_VIEW(a));
assert(output_size == 1);
ccv_nnc_tensor_t* b = outputs[0];
assert(!CCV_IS_TENSOR_VIEW(b));
int i, count = ccv_nnc_tensor_count(a->info);
for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC && a->info.dim[i] > 0; i++)
{
assert(a->info.dim[i] == b->info.dim[i]);
}
float* ap = a->data.f32;
float* bp = b->data.f32;
for (i = 0; i < count; i++)
bp[i] = ccv_max(ap[i], 0);
return CCV_NNC_EXEC_SUCCESS;
}
static int _ccv_nnc_relu_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, const ccv_nnc_stream_context_t* const stream_context)
{
assert(input_size == 2);
const ccv_nnc_tensor_t* g = inputs[0]; // gradient
assert(!CCV_IS_TENSOR_VIEW(g));
const ccv_nnc_tensor_t* b = inputs[1];
assert(!CCV_IS_TENSOR_VIEW(b));
assert(output_size == 1);
ccv_nnc_tensor_t* h = outputs[0];
assert(!CCV_IS_TENSOR_VIEW(h));
int i, count = ccv_nnc_tensor_count(g->info);
for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC && g->info.dim[i] > 0; i++)
{
assert(b->info.dim[i] == g->info.dim[i]);
assert(g->info.dim[i] == h->info.dim[i]);
}
float* bp = b->data.f32;
float* gp = g->data.f32;
float* hp = h->data.f32;
for (i = 0; i < count; i++)
hp[i] = (bp[i] >= 0) ? gp[i] : 0;
return CCV_NNC_EXEC_SUCCESS;
}
REGISTER_COMMAND_BACKEND(CCV_NNC_RELU_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
{
registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC;
registry->tensor_datatypes = CCV_32F;
registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
registry->algorithms = 1;
registry->exec = _ccv_nnc_relu_forw;
}
REGISTER_COMMAND_BACKEND(CCV_NNC_RELU_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
{
registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC;
registry->tensor_datatypes = CCV_32F;
registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
registry->algorithms = 1;
registry->exec = _ccv_nnc_relu_back;
}