The London Perl and Raku Workshop takes place on 26th Oct 2024. If your company depends on Perl, please consider sponsoring and/or attending.
#include "ccv.h"
#include "ccv_internal.h"
#if defined(HAVE_SSE2)
#include <xmmintrin.h>
#elif defined(HAVE_NEON)
#include <arm_neon.h>
#endif
#ifdef HAVE_GSL
#include <gsl/gsl_rng.h>
#include <gsl/gsl_randist.h>
#endif
#ifdef USE_OPENMP
#include <omp.h>
#endif
#ifdef USE_DISPATCH
#include <dispatch/dispatch.h>
#endif
#include "3rdparty/sqlite3/sqlite3.h"

const ccv_scd_param_t ccv_scd_default_params = {
	.interval = 5,
	.min_neighbors = 1,
	.step_through = 4,
	.size = {
		.width = 48,
		.height = 48,
	},
};

#define CCV_SCD_CHANNEL (11)

// this uses a look up table for cubic root computation because rgb to luv only requires data within range of 0~1
static inline float fast_cube_root(const float d)
{
	static const float cube_root[2048] = {
		0.000000e+00, 7.875788e-02, 9.922871e-02, 1.135885e-01, 1.250203e-01, 1.346741e-01, 1.431126e-01, 1.506584e-01,
		1.575158e-01, 1.638230e-01, 1.696787e-01, 1.751560e-01, 1.803105e-01, 1.851861e-01, 1.898177e-01, 1.942336e-01,
		1.984574e-01, 2.025087e-01, 2.064040e-01, 2.101577e-01, 2.137818e-01, 2.172870e-01, 2.206827e-01, 2.239769e-01,
		2.271770e-01, 2.302894e-01, 2.333199e-01, 2.362736e-01, 2.391553e-01, 2.419692e-01, 2.447191e-01, 2.474085e-01,
		2.500407e-01, 2.526186e-01, 2.551450e-01, 2.576222e-01, 2.600528e-01, 2.624387e-01, 2.647821e-01, 2.670846e-01,
		2.693482e-01, 2.715743e-01, 2.737645e-01, 2.759202e-01, 2.780428e-01, 2.801334e-01, 2.821933e-01, 2.842235e-01,
		2.862251e-01, 2.881992e-01, 2.901465e-01, 2.920681e-01, 2.939647e-01, 2.958371e-01, 2.976862e-01, 2.995125e-01,
		3.013168e-01, 3.030998e-01, 3.048621e-01, 3.066041e-01, 3.083267e-01, 3.100302e-01, 3.117152e-01, 3.133821e-01,
		3.150315e-01, 3.166639e-01, 3.182795e-01, 3.198789e-01, 3.214625e-01, 3.230307e-01, 3.245837e-01, 3.261220e-01,
		3.276460e-01, 3.291559e-01, 3.306521e-01, 3.321348e-01, 3.336045e-01, 3.350613e-01, 3.365056e-01, 3.379375e-01,
		3.393574e-01, 3.407656e-01, 3.421622e-01, 3.435475e-01, 3.449216e-01, 3.462850e-01, 3.476377e-01, 3.489799e-01,
		3.503119e-01, 3.516339e-01, 3.529460e-01, 3.542483e-01, 3.555412e-01, 3.568248e-01, 3.580992e-01, 3.593646e-01,
		3.606211e-01, 3.618689e-01, 3.631082e-01, 3.643391e-01, 3.655617e-01, 3.667762e-01, 3.679827e-01, 3.691814e-01,
		3.703723e-01, 3.715556e-01, 3.727314e-01, 3.738999e-01, 3.750610e-01, 3.762151e-01, 3.773621e-01, 3.785022e-01,
		3.796354e-01, 3.807619e-01, 3.818818e-01, 3.829952e-01, 3.841021e-01, 3.852027e-01, 3.862970e-01, 3.873852e-01,
		3.884673e-01, 3.895434e-01, 3.906136e-01, 3.916779e-01, 3.927365e-01, 3.937894e-01, 3.948367e-01, 3.958785e-01,
		3.969149e-01, 3.979458e-01, 3.989714e-01, 3.999918e-01, 4.010071e-01, 4.020171e-01, 4.030222e-01, 4.040223e-01,
		4.050174e-01, 4.060076e-01, 4.069931e-01, 4.079738e-01, 4.089499e-01, 4.099212e-01, 4.108880e-01, 4.118503e-01,
		4.128081e-01, 4.137615e-01, 4.147105e-01, 4.156551e-01, 4.165955e-01, 4.175317e-01, 4.184637e-01, 4.193916e-01,
		4.203153e-01, 4.212351e-01, 4.221508e-01, 4.230626e-01, 4.239704e-01, 4.248744e-01, 4.257746e-01, 4.266710e-01,
		4.275636e-01, 4.284525e-01, 4.293377e-01, 4.302193e-01, 4.310973e-01, 4.319718e-01, 4.328427e-01, 4.337101e-01,
		4.345741e-01, 4.354346e-01, 4.362918e-01, 4.371456e-01, 4.379961e-01, 4.388433e-01, 4.396872e-01, 4.405279e-01,
		4.413654e-01, 4.421997e-01, 4.430309e-01, 4.438590e-01, 4.446840e-01, 4.455060e-01, 4.463249e-01, 4.471409e-01,
		4.479539e-01, 4.487639e-01, 4.495711e-01, 4.503753e-01, 4.511767e-01, 4.519752e-01, 4.527710e-01, 4.535639e-01,
		4.543541e-01, 4.551415e-01, 4.559263e-01, 4.567083e-01, 4.574877e-01, 4.582644e-01, 4.590385e-01, 4.598100e-01,
		4.605789e-01, 4.613453e-01, 4.621091e-01, 4.628704e-01, 4.636292e-01, 4.643855e-01, 4.651394e-01, 4.658908e-01,
		4.666398e-01, 4.673865e-01, 4.681307e-01, 4.688726e-01, 4.696122e-01, 4.703494e-01, 4.710843e-01, 4.718169e-01,
		4.725473e-01, 4.732754e-01, 4.740013e-01, 4.747250e-01, 4.754464e-01, 4.761657e-01, 4.768828e-01, 4.775978e-01,
		4.783106e-01, 4.790214e-01, 4.797300e-01, 4.804365e-01, 4.811410e-01, 4.818434e-01, 4.825437e-01, 4.832420e-01,
		4.839384e-01, 4.846327e-01, 4.853250e-01, 4.860154e-01, 4.867038e-01, 4.873902e-01, 4.880748e-01, 4.887574e-01,
		4.894381e-01, 4.901170e-01, 4.907939e-01, 4.914690e-01, 4.921423e-01, 4.928137e-01, 4.934832e-01, 4.941510e-01,
		4.948170e-01, 4.954812e-01, 4.961436e-01, 4.968042e-01, 4.974631e-01, 4.981203e-01, 4.987757e-01, 4.994294e-01,
		5.000814e-01, 5.007317e-01, 5.013803e-01, 5.020273e-01, 5.026726e-01, 5.033162e-01, 5.039582e-01, 5.045985e-01,
		5.052372e-01, 5.058743e-01, 5.065099e-01, 5.071438e-01, 5.077761e-01, 5.084069e-01, 5.090362e-01, 5.096638e-01,
		5.102900e-01, 5.109145e-01, 5.115376e-01, 5.121592e-01, 5.127792e-01, 5.133978e-01, 5.140148e-01, 5.146304e-01,
		5.152445e-01, 5.158572e-01, 5.164684e-01, 5.170782e-01, 5.176865e-01, 5.182934e-01, 5.188988e-01, 5.195029e-01,
		5.201056e-01, 5.207069e-01, 5.213068e-01, 5.219053e-01, 5.225024e-01, 5.230982e-01, 5.236927e-01, 5.242857e-01,
		5.248775e-01, 5.254679e-01, 5.260570e-01, 5.266448e-01, 5.272312e-01, 5.278164e-01, 5.284002e-01, 5.289828e-01,
		5.295641e-01, 5.301442e-01, 5.307229e-01, 5.313004e-01, 5.318767e-01, 5.324517e-01, 5.330254e-01, 5.335979e-01,
		5.341693e-01, 5.347394e-01, 5.353082e-01, 5.358759e-01, 5.364423e-01, 5.370076e-01, 5.375717e-01, 5.381346e-01,
		5.386963e-01, 5.392569e-01, 5.398163e-01, 5.403746e-01, 5.409316e-01, 5.414876e-01, 5.420424e-01, 5.425960e-01,
		5.431486e-01, 5.437000e-01, 5.442503e-01, 5.447995e-01, 5.453476e-01, 5.458946e-01, 5.464405e-01, 5.469853e-01,
		5.475290e-01, 5.480717e-01, 5.486133e-01, 5.491537e-01, 5.496932e-01, 5.502316e-01, 5.507689e-01, 5.513052e-01,
		5.518404e-01, 5.523747e-01, 5.529078e-01, 5.534400e-01, 5.539711e-01, 5.545012e-01, 5.550303e-01, 5.555584e-01,
		5.560855e-01, 5.566117e-01, 5.571368e-01, 5.576609e-01, 5.581840e-01, 5.587062e-01, 5.592273e-01, 5.597475e-01,
		5.602668e-01, 5.607851e-01, 5.613024e-01, 5.618188e-01, 5.623342e-01, 5.628487e-01, 5.633622e-01, 5.638748e-01,
		5.643865e-01, 5.648973e-01, 5.654072e-01, 5.659161e-01, 5.664241e-01, 5.669311e-01, 5.674374e-01, 5.679426e-01,
		5.684470e-01, 5.689505e-01, 5.694531e-01, 5.699549e-01, 5.704557e-01, 5.709556e-01, 5.714548e-01, 5.719529e-01,
		5.724503e-01, 5.729468e-01, 5.734424e-01, 5.739372e-01, 5.744311e-01, 5.749242e-01, 5.754164e-01, 5.759078e-01,
		5.763984e-01, 5.768881e-01, 5.773770e-01, 5.778650e-01, 5.783523e-01, 5.788387e-01, 5.793243e-01, 5.798091e-01,
		5.802931e-01, 5.807762e-01, 5.812586e-01, 5.817402e-01, 5.822210e-01, 5.827010e-01, 5.831801e-01, 5.836585e-01,
		5.841362e-01, 5.846130e-01, 5.850891e-01, 5.855644e-01, 5.860389e-01, 5.865127e-01, 5.869856e-01, 5.874579e-01,
		5.879294e-01, 5.884001e-01, 5.888700e-01, 5.893393e-01, 5.898077e-01, 5.902755e-01, 5.907425e-01, 5.912087e-01,
		5.916742e-01, 5.921390e-01, 5.926031e-01, 5.930664e-01, 5.935290e-01, 5.939909e-01, 5.944521e-01, 5.949125e-01,
		5.953723e-01, 5.958313e-01, 5.962896e-01, 5.967473e-01, 5.972042e-01, 5.976604e-01, 5.981160e-01, 5.985708e-01,
		5.990250e-01, 5.994784e-01, 5.999312e-01, 6.003833e-01, 6.008347e-01, 6.012855e-01, 6.017355e-01, 6.021850e-01,
		6.026337e-01, 6.030817e-01, 6.035291e-01, 6.039758e-01, 6.044219e-01, 6.048673e-01, 6.053120e-01, 6.057562e-01,
		6.061996e-01, 6.066424e-01, 6.070846e-01, 6.075261e-01, 6.079670e-01, 6.084072e-01, 6.088468e-01, 6.092858e-01,
		6.097241e-01, 6.101618e-01, 6.105989e-01, 6.110353e-01, 6.114712e-01, 6.119064e-01, 6.123410e-01, 6.127750e-01,
		6.132084e-01, 6.136411e-01, 6.140732e-01, 6.145048e-01, 6.149357e-01, 6.153660e-01, 6.157957e-01, 6.162249e-01,
		6.166534e-01, 6.170813e-01, 6.175086e-01, 6.179354e-01, 6.183616e-01, 6.187872e-01, 6.192122e-01, 6.196365e-01,
		6.200604e-01, 6.204836e-01, 6.209063e-01, 6.213284e-01, 6.217499e-01, 6.221709e-01, 6.225913e-01, 6.230111e-01,
		6.234304e-01, 6.238490e-01, 6.242672e-01, 6.246848e-01, 6.251017e-01, 6.255182e-01, 6.259341e-01, 6.263494e-01,
		6.267643e-01, 6.271785e-01, 6.275922e-01, 6.280054e-01, 6.284180e-01, 6.288301e-01, 6.292416e-01, 6.296526e-01,
		6.300631e-01, 6.304730e-01, 6.308824e-01, 6.312913e-01, 6.316996e-01, 6.321074e-01, 6.325147e-01, 6.329215e-01,
		6.333277e-01, 6.337335e-01, 6.341386e-01, 6.345433e-01, 6.349475e-01, 6.353511e-01, 6.357543e-01, 6.361569e-01,
		6.365590e-01, 6.369606e-01, 6.373618e-01, 6.377624e-01, 6.381625e-01, 6.385621e-01, 6.389612e-01, 6.393598e-01,
		6.397579e-01, 6.401555e-01, 6.405526e-01, 6.409492e-01, 6.413454e-01, 6.417410e-01, 6.421362e-01, 6.425309e-01,
		6.429250e-01, 6.433188e-01, 6.437120e-01, 6.441047e-01, 6.444970e-01, 6.448888e-01, 6.452801e-01, 6.456710e-01,
		6.460613e-01, 6.464512e-01, 6.468406e-01, 6.472296e-01, 6.476181e-01, 6.480061e-01, 6.483937e-01, 6.487808e-01,
		6.491674e-01, 6.495536e-01, 6.499393e-01, 6.503246e-01, 6.507094e-01, 6.510937e-01, 6.514776e-01, 6.518611e-01,
		6.522441e-01, 6.526266e-01, 6.530087e-01, 6.533904e-01, 6.537716e-01, 6.541524e-01, 6.545327e-01, 6.549126e-01,
		6.552920e-01, 6.556710e-01, 6.560495e-01, 6.564277e-01, 6.568054e-01, 6.571826e-01, 6.575595e-01, 6.579359e-01,
		6.583118e-01, 6.586874e-01, 6.590625e-01, 6.594372e-01, 6.598114e-01, 6.601852e-01, 6.605586e-01, 6.609316e-01,
		6.613042e-01, 6.616763e-01, 6.620481e-01, 6.624194e-01, 6.627903e-01, 6.631607e-01, 6.635308e-01, 6.639005e-01,
		6.642697e-01, 6.646385e-01, 6.650070e-01, 6.653750e-01, 6.657426e-01, 6.661098e-01, 6.664766e-01, 6.668430e-01,
		6.672090e-01, 6.675746e-01, 6.679398e-01, 6.683046e-01, 6.686690e-01, 6.690330e-01, 6.693966e-01, 6.697598e-01,
		6.701226e-01, 6.704850e-01, 6.708471e-01, 6.712087e-01, 6.715700e-01, 6.719308e-01, 6.722913e-01, 6.726514e-01,
		6.730111e-01, 6.733705e-01, 6.737294e-01, 6.740879e-01, 6.744461e-01, 6.748039e-01, 6.751614e-01, 6.755184e-01,
		6.758750e-01, 6.762313e-01, 6.765872e-01, 6.769428e-01, 6.772979e-01, 6.776527e-01, 6.780071e-01, 6.783612e-01,
		6.787149e-01, 6.790682e-01, 6.794212e-01, 6.797737e-01, 6.801260e-01, 6.804778e-01, 6.808293e-01, 6.811804e-01,
		6.815312e-01, 6.818815e-01, 6.822316e-01, 6.825813e-01, 6.829306e-01, 6.832796e-01, 6.836282e-01, 6.839765e-01,
		6.843244e-01, 6.846719e-01, 6.850191e-01, 6.853660e-01, 6.857125e-01, 6.860586e-01, 6.864043e-01, 6.867498e-01,
		6.870949e-01, 6.874397e-01, 6.877841e-01, 6.881282e-01, 6.884719e-01, 6.888152e-01, 6.891583e-01, 6.895010e-01,
		6.898433e-01, 6.901854e-01, 6.905270e-01, 6.908684e-01, 6.912094e-01, 6.915500e-01, 6.918904e-01, 6.922303e-01,
		6.925700e-01, 6.929094e-01, 6.932484e-01, 6.935870e-01, 6.939254e-01, 6.942633e-01, 6.946011e-01, 6.949384e-01,
		6.952754e-01, 6.956121e-01, 6.959485e-01, 6.962845e-01, 6.966202e-01, 6.969556e-01, 6.972907e-01, 6.976255e-01,
		6.979599e-01, 6.982940e-01, 6.986278e-01, 6.989613e-01, 6.992944e-01, 6.996273e-01, 6.999598e-01, 7.002920e-01,
		7.006239e-01, 7.009555e-01, 7.012867e-01, 7.016177e-01, 7.019483e-01, 7.022786e-01, 7.026086e-01, 7.029384e-01,
		7.032678e-01, 7.035969e-01, 7.039256e-01, 7.042542e-01, 7.045823e-01, 7.049102e-01, 7.052377e-01, 7.055650e-01,
		7.058919e-01, 7.062186e-01, 7.065449e-01, 7.068710e-01, 7.071967e-01, 7.075222e-01, 7.078474e-01, 7.081722e-01,
		7.084967e-01, 7.088210e-01, 7.091449e-01, 7.094686e-01, 7.097920e-01, 7.101150e-01, 7.104378e-01, 7.107603e-01,
		7.110825e-01, 7.114044e-01, 7.117260e-01, 7.120473e-01, 7.123684e-01, 7.126891e-01, 7.130095e-01, 7.133297e-01,
		7.136496e-01, 7.139692e-01, 7.142885e-01, 7.146075e-01, 7.149262e-01, 7.152447e-01, 7.155629e-01, 7.158808e-01,
		7.161984e-01, 7.165157e-01, 7.168328e-01, 7.171495e-01, 7.174660e-01, 7.177821e-01, 7.180981e-01, 7.184138e-01,
		7.187291e-01, 7.190442e-01, 7.193590e-01, 7.196736e-01, 7.199879e-01, 7.203019e-01, 7.206156e-01, 7.209290e-01,
		7.212422e-01, 7.215551e-01, 7.218677e-01, 7.221801e-01, 7.224922e-01, 7.228040e-01, 7.231156e-01, 7.234268e-01,
		7.237378e-01, 7.240486e-01, 7.243591e-01, 7.246693e-01, 7.249793e-01, 7.252890e-01, 7.255983e-01, 7.259076e-01,
		7.262164e-01, 7.265251e-01, 7.268335e-01, 7.271415e-01, 7.274494e-01, 7.277570e-01, 7.280643e-01, 7.283714e-01,
		7.286782e-01, 7.289847e-01, 7.292911e-01, 7.295971e-01, 7.299029e-01, 7.302084e-01, 7.305137e-01, 7.308187e-01,
		7.311234e-01, 7.314279e-01, 7.317322e-01, 7.320362e-01, 7.323400e-01, 7.326434e-01, 7.329467e-01, 7.332497e-01,
		7.335525e-01, 7.338549e-01, 7.341572e-01, 7.344592e-01, 7.347609e-01, 7.350624e-01, 7.353637e-01, 7.356647e-01,
		7.359655e-01, 7.362660e-01, 7.365662e-01, 7.368662e-01, 7.371660e-01, 7.374656e-01, 7.377649e-01, 7.380639e-01,
		7.383628e-01, 7.386613e-01, 7.389597e-01, 7.392578e-01, 7.395556e-01, 7.398532e-01, 7.401506e-01, 7.404477e-01,
		7.407446e-01, 7.410412e-01, 7.413377e-01, 7.416338e-01, 7.419298e-01, 7.422255e-01, 7.425209e-01, 7.428162e-01,
		7.431112e-01, 7.434059e-01, 7.437005e-01, 7.439948e-01, 7.442889e-01, 7.445827e-01, 7.448763e-01, 7.451697e-01,
		7.454628e-01, 7.457558e-01, 7.460485e-01, 7.463409e-01, 7.466331e-01, 7.469251e-01, 7.472169e-01, 7.475084e-01,
		7.477998e-01, 7.480908e-01, 7.483817e-01, 7.486723e-01, 7.489627e-01, 7.492529e-01, 7.495428e-01, 7.498326e-01,
		7.501221e-01, 7.504114e-01, 7.507005e-01, 7.509893e-01, 7.512779e-01, 7.515663e-01, 7.518545e-01, 7.521424e-01,
		7.524302e-01, 7.527177e-01, 7.530050e-01, 7.532921e-01, 7.535789e-01, 7.538656e-01, 7.541520e-01, 7.544382e-01,
		7.547241e-01, 7.550099e-01, 7.552955e-01, 7.555808e-01, 7.558660e-01, 7.561509e-01, 7.564356e-01, 7.567201e-01,
		7.570043e-01, 7.572884e-01, 7.575722e-01, 7.578558e-01, 7.581393e-01, 7.584225e-01, 7.587055e-01, 7.589883e-01,
		7.592708e-01, 7.595532e-01, 7.598354e-01, 7.601173e-01, 7.603990e-01, 7.606806e-01, 7.609619e-01, 7.612430e-01,
		7.615239e-01, 7.618046e-01, 7.620851e-01, 7.623653e-01, 7.626454e-01, 7.629253e-01, 7.632049e-01, 7.634844e-01,
		7.637637e-01, 7.640427e-01, 7.643216e-01, 7.646002e-01, 7.648786e-01, 7.651569e-01, 7.654349e-01, 7.657127e-01,
		7.659904e-01, 7.662678e-01, 7.665451e-01, 7.668221e-01, 7.670989e-01, 7.673756e-01, 7.676520e-01, 7.679282e-01,
		7.682042e-01, 7.684801e-01, 7.687557e-01, 7.690312e-01, 7.693064e-01, 7.695814e-01, 7.698563e-01, 7.701310e-01,
		7.704054e-01, 7.706797e-01, 7.709538e-01, 7.712276e-01, 7.715013e-01, 7.717748e-01, 7.720481e-01, 7.723212e-01,
		7.725941e-01, 7.728668e-01, 7.731394e-01, 7.734116e-01, 7.736838e-01, 7.739558e-01, 7.742275e-01, 7.744991e-01,
		7.747704e-01, 7.750416e-01, 7.753126e-01, 7.755834e-01, 7.758540e-01, 7.761245e-01, 7.763947e-01, 7.766647e-01,
		7.769346e-01, 7.772043e-01, 7.774737e-01, 7.777431e-01, 7.780122e-01, 7.782811e-01, 7.785498e-01, 7.788184e-01,
		7.790868e-01, 7.793550e-01, 7.796230e-01, 7.798908e-01, 7.801584e-01, 7.804259e-01, 7.806932e-01, 7.809603e-01,
		7.812271e-01, 7.814939e-01, 7.817604e-01, 7.820268e-01, 7.822930e-01, 7.825589e-01, 7.828248e-01, 7.830904e-01,
		7.833558e-01, 7.836211e-01, 7.838862e-01, 7.841511e-01, 7.844158e-01, 7.846804e-01, 7.849448e-01, 7.852090e-01,
		7.854730e-01, 7.857369e-01, 7.860005e-01, 7.862641e-01, 7.865273e-01, 7.867905e-01, 7.870535e-01, 7.873163e-01,
		7.875788e-01, 7.878413e-01, 7.881036e-01, 7.883657e-01, 7.886276e-01, 7.888893e-01, 7.891509e-01, 7.894123e-01,
		7.896735e-01, 7.899345e-01, 7.901954e-01, 7.904561e-01, 7.907166e-01, 7.909770e-01, 7.912372e-01, 7.914972e-01,
		7.917571e-01, 7.920167e-01, 7.922763e-01, 7.925356e-01, 7.927948e-01, 7.930537e-01, 7.933126e-01, 7.935712e-01,
		7.938297e-01, 7.940881e-01, 7.943462e-01, 7.946042e-01, 7.948620e-01, 7.951197e-01, 7.953772e-01, 7.956345e-01,
		7.958916e-01, 7.961487e-01, 7.964054e-01, 7.966621e-01, 7.969186e-01, 7.971749e-01, 7.974311e-01, 7.976871e-01,
		7.979429e-01, 7.981986e-01, 7.984541e-01, 7.987095e-01, 7.989646e-01, 7.992196e-01, 7.994745e-01, 7.997292e-01,
		7.999837e-01, 8.002381e-01, 8.004923e-01, 8.007463e-01, 8.010002e-01, 8.012539e-01, 8.015075e-01, 8.017609e-01,
		8.020141e-01, 8.022672e-01, 8.025202e-01, 8.027729e-01, 8.030255e-01, 8.032780e-01, 8.035302e-01, 8.037823e-01,
		8.040344e-01, 8.042861e-01, 8.045378e-01, 8.047893e-01, 8.050406e-01, 8.052918e-01, 8.055428e-01, 8.057937e-01,
		8.060444e-01, 8.062950e-01, 8.065454e-01, 8.067956e-01, 8.070457e-01, 8.072957e-01, 8.075454e-01, 8.077950e-01,
		8.080446e-01, 8.082938e-01, 8.085430e-01, 8.087921e-01, 8.090409e-01, 8.092896e-01, 8.095381e-01, 8.097866e-01,
		8.100348e-01, 8.102829e-01, 8.105308e-01, 8.107786e-01, 8.110263e-01, 8.112738e-01, 8.115211e-01, 8.117683e-01,
		8.120154e-01, 8.122622e-01, 8.125089e-01, 8.127556e-01, 8.130020e-01, 8.132483e-01, 8.134944e-01, 8.137404e-01,
		8.139862e-01, 8.142319e-01, 8.144775e-01, 8.147229e-01, 8.149682e-01, 8.152133e-01, 8.154582e-01, 8.157030e-01,
		8.159477e-01, 8.161922e-01, 8.164365e-01, 8.166808e-01, 8.169249e-01, 8.171688e-01, 8.174126e-01, 8.176562e-01,
		8.178997e-01, 8.181431e-01, 8.183863e-01, 8.186293e-01, 8.188722e-01, 8.191150e-01, 8.193576e-01, 8.196001e-01,
		8.198425e-01, 8.200847e-01, 8.203267e-01, 8.205686e-01, 8.208104e-01, 8.210521e-01, 8.212935e-01, 8.215349e-01,
		8.217760e-01, 8.220171e-01, 8.222581e-01, 8.224988e-01, 8.227395e-01, 8.229799e-01, 8.232203e-01, 8.234605e-01,
		8.237006e-01, 8.239405e-01, 8.241804e-01, 8.244200e-01, 8.246595e-01, 8.248989e-01, 8.251381e-01, 8.253772e-01,
		8.256162e-01, 8.258550e-01, 8.260937e-01, 8.263323e-01, 8.265706e-01, 8.268089e-01, 8.270471e-01, 8.272851e-01,
		8.275229e-01, 8.277607e-01, 8.279983e-01, 8.282357e-01, 8.284730e-01, 8.287102e-01, 8.289472e-01, 8.291842e-01,
		8.294209e-01, 8.296576e-01, 8.298941e-01, 8.301305e-01, 8.303667e-01, 8.306028e-01, 8.308387e-01, 8.310746e-01,
		8.313103e-01, 8.315458e-01, 8.317813e-01, 8.320166e-01, 8.322517e-01, 8.324867e-01, 8.327217e-01, 8.329564e-01,
		8.331911e-01, 8.334256e-01, 8.336599e-01, 8.338942e-01, 8.341283e-01, 8.343623e-01, 8.345962e-01, 8.348299e-01,
		8.350635e-01, 8.352969e-01, 8.355302e-01, 8.357634e-01, 8.359964e-01, 8.362294e-01, 8.364622e-01, 8.366948e-01,
		8.369274e-01, 8.371598e-01, 8.373921e-01, 8.376243e-01, 8.378563e-01, 8.380882e-01, 8.383200e-01, 8.385516e-01,
		8.387831e-01, 8.390145e-01, 8.392458e-01, 8.394769e-01, 8.397079e-01, 8.399388e-01, 8.401695e-01, 8.404002e-01,
		8.406307e-01, 8.408611e-01, 8.410913e-01, 8.413214e-01, 8.415514e-01, 8.417813e-01, 8.420110e-01, 8.422406e-01,
		8.424702e-01, 8.426995e-01, 8.429288e-01, 8.431579e-01, 8.433869e-01, 8.436158e-01, 8.438445e-01, 8.440731e-01,
		8.443016e-01, 8.445300e-01, 8.447582e-01, 8.449863e-01, 8.452144e-01, 8.454422e-01, 8.456700e-01, 8.458977e-01,
		8.461251e-01, 8.463526e-01, 8.465798e-01, 8.468069e-01, 8.470340e-01, 8.472609e-01, 8.474877e-01, 8.477143e-01,
		8.479409e-01, 8.481673e-01, 8.483936e-01, 8.486198e-01, 8.488458e-01, 8.490717e-01, 8.492976e-01, 8.495233e-01,
		8.497488e-01, 8.499743e-01, 8.501996e-01, 8.504249e-01, 8.506500e-01, 8.508750e-01, 8.510998e-01, 8.513246e-01,
		8.515491e-01, 8.517737e-01, 8.519981e-01, 8.522223e-01, 8.524465e-01, 8.526706e-01, 8.528944e-01, 8.531182e-01,
		8.533419e-01, 8.535655e-01, 8.537889e-01, 8.540123e-01, 8.542355e-01, 8.544586e-01, 8.546816e-01, 8.549044e-01,
		8.551272e-01, 8.553498e-01, 8.555723e-01, 8.557947e-01, 8.560170e-01, 8.562392e-01, 8.564612e-01, 8.566832e-01,
		8.569050e-01, 8.571267e-01, 8.573483e-01, 8.575698e-01, 8.577912e-01, 8.580124e-01, 8.582336e-01, 8.584546e-01,
		8.586755e-01, 8.588963e-01, 8.591169e-01, 8.593375e-01, 8.595580e-01, 8.597783e-01, 8.599985e-01, 8.602186e-01,
		8.604387e-01, 8.606585e-01, 8.608783e-01, 8.610980e-01, 8.613176e-01, 8.615370e-01, 8.617563e-01, 8.619756e-01,
		8.621947e-01, 8.624136e-01, 8.626326e-01, 8.628513e-01, 8.630700e-01, 8.632885e-01, 8.635070e-01, 8.637253e-01,
		8.639436e-01, 8.641617e-01, 8.643796e-01, 8.645976e-01, 8.648154e-01, 8.650330e-01, 8.652506e-01, 8.654680e-01,
		8.656853e-01, 8.659026e-01, 8.661197e-01, 8.663368e-01, 8.665537e-01, 8.667705e-01, 8.669872e-01, 8.672037e-01,
		8.674202e-01, 8.676366e-01, 8.678529e-01, 8.680690e-01, 8.682851e-01, 8.685010e-01, 8.687168e-01, 8.689325e-01,
		8.691481e-01, 8.693637e-01, 8.695791e-01, 8.697944e-01, 8.700095e-01, 8.702246e-01, 8.704396e-01, 8.706545e-01,
		8.708693e-01, 8.710839e-01, 8.712984e-01, 8.715129e-01, 8.717272e-01, 8.719414e-01, 8.721556e-01, 8.723696e-01,
		8.725836e-01, 8.727974e-01, 8.730111e-01, 8.732247e-01, 8.734382e-01, 8.736516e-01, 8.738649e-01, 8.740780e-01,
		8.742912e-01, 8.745041e-01, 8.747170e-01, 8.749298e-01, 8.751425e-01, 8.753550e-01, 8.755675e-01, 8.757799e-01,
		8.759921e-01, 8.762043e-01, 8.764163e-01, 8.766283e-01, 8.768401e-01, 8.770519e-01, 8.772635e-01, 8.774751e-01,
		8.776865e-01, 8.778979e-01, 8.781091e-01, 8.783202e-01, 8.785312e-01, 8.787422e-01, 8.789530e-01, 8.791637e-01,
		8.793744e-01, 8.795849e-01, 8.797953e-01, 8.800057e-01, 8.802159e-01, 8.804260e-01, 8.806360e-01, 8.808460e-01,
		8.810558e-01, 8.812655e-01, 8.814751e-01, 8.816847e-01, 8.818941e-01, 8.821034e-01, 8.823127e-01, 8.825217e-01,
		8.827308e-01, 8.829397e-01, 8.831486e-01, 8.833573e-01, 8.835659e-01, 8.837745e-01, 8.839829e-01, 8.841912e-01,
		8.843995e-01, 8.846076e-01, 8.848156e-01, 8.850236e-01, 8.852314e-01, 8.854392e-01, 8.856469e-01, 8.858544e-01,
		8.860618e-01, 8.862692e-01, 8.864765e-01, 8.866837e-01, 8.868908e-01, 8.870977e-01, 8.873046e-01, 8.875114e-01,
		8.877181e-01, 8.879247e-01, 8.881311e-01, 8.883376e-01, 8.885438e-01, 8.887501e-01, 8.889562e-01, 8.891622e-01,
		8.893681e-01, 8.895739e-01, 8.897797e-01, 8.899853e-01, 8.901908e-01, 8.903963e-01, 8.906016e-01, 8.908069e-01,
		8.910121e-01, 8.912171e-01, 8.914221e-01, 8.916270e-01, 8.918318e-01, 8.920364e-01, 8.922410e-01, 8.924455e-01,
		8.926499e-01, 8.928543e-01, 8.930585e-01, 8.932626e-01, 8.934667e-01, 8.936706e-01, 8.938744e-01, 8.940782e-01,
		8.942819e-01, 8.944854e-01, 8.946889e-01, 8.948923e-01, 8.950956e-01, 8.952988e-01, 8.955019e-01, 8.957049e-01,
		8.959078e-01, 8.961107e-01, 8.963134e-01, 8.965160e-01, 8.967186e-01, 8.969210e-01, 8.971235e-01, 8.973257e-01,
		8.975279e-01, 8.977300e-01, 8.979320e-01, 8.981339e-01, 8.983358e-01, 8.985375e-01, 8.987392e-01, 8.989407e-01,
		8.991421e-01, 8.993436e-01, 8.995448e-01, 8.997460e-01, 8.999471e-01, 9.001482e-01, 9.003491e-01, 9.005499e-01,
		9.007506e-01, 9.009513e-01, 9.011519e-01, 9.013523e-01, 9.015527e-01, 9.017531e-01, 9.019532e-01, 9.021534e-01,
		9.023534e-01, 9.025534e-01, 9.027532e-01, 9.029530e-01, 9.031526e-01, 9.033523e-01, 9.035518e-01, 9.037512e-01,
		9.039505e-01, 9.041498e-01, 9.043489e-01, 9.045479e-01, 9.047469e-01, 9.049459e-01, 9.051446e-01, 9.053434e-01,
		9.055420e-01, 9.057405e-01, 9.059390e-01, 9.061373e-01, 9.063356e-01, 9.065338e-01, 9.067319e-01, 9.069299e-01,
		9.071279e-01, 9.073257e-01, 9.075235e-01, 9.077212e-01, 9.079187e-01, 9.081162e-01, 9.083136e-01, 9.085110e-01,
		9.087082e-01, 9.089054e-01, 9.091024e-01, 9.092994e-01, 9.094964e-01, 9.096932e-01, 9.098899e-01, 9.100866e-01,
		9.102831e-01, 9.104796e-01, 9.106760e-01, 9.108723e-01, 9.110685e-01, 9.112647e-01, 9.114607e-01, 9.116567e-01,
		9.118526e-01, 9.120483e-01, 9.122441e-01, 9.124397e-01, 9.126353e-01, 9.128307e-01, 9.130261e-01, 9.132214e-01,
		9.134166e-01, 9.136118e-01, 9.138068e-01, 9.140018e-01, 9.141967e-01, 9.143915e-01, 9.145862e-01, 9.147808e-01,
		9.149753e-01, 9.151698e-01, 9.153642e-01, 9.155585e-01, 9.157528e-01, 9.159469e-01, 9.161409e-01, 9.163349e-01,
		9.165288e-01, 9.167226e-01, 9.169164e-01, 9.171100e-01, 9.173036e-01, 9.174970e-01, 9.176905e-01, 9.178838e-01,
		9.180770e-01, 9.182702e-01, 9.184632e-01, 9.186562e-01, 9.188492e-01, 9.190420e-01, 9.192348e-01, 9.194274e-01,
		9.196200e-01, 9.198125e-01, 9.200049e-01, 9.201973e-01, 9.203895e-01, 9.205818e-01, 9.207739e-01, 9.209659e-01,
		9.211578e-01, 9.213497e-01, 9.215415e-01, 9.217332e-01, 9.219248e-01, 9.221163e-01, 9.223078e-01, 9.224992e-01,
		9.226905e-01, 9.228818e-01, 9.230729e-01, 9.232640e-01, 9.234550e-01, 9.236459e-01, 9.238367e-01, 9.240275e-01,
		9.242182e-01, 9.244088e-01, 9.245993e-01, 9.247897e-01, 9.249801e-01, 9.251704e-01, 9.253606e-01, 9.255507e-01,
		9.257408e-01, 9.259307e-01, 9.261206e-01, 9.263105e-01, 9.265002e-01, 9.266899e-01, 9.268795e-01, 9.270689e-01,
		9.272584e-01, 9.274477e-01, 9.276370e-01, 9.278262e-01, 9.280154e-01, 9.282044e-01, 9.283934e-01, 9.285822e-01,
		9.287710e-01, 9.289598e-01, 9.291484e-01, 9.293370e-01, 9.295255e-01, 9.297140e-01, 9.299023e-01, 9.300906e-01,
		9.302788e-01, 9.304669e-01, 9.306549e-01, 9.308429e-01, 9.310308e-01, 9.312186e-01, 9.314064e-01, 9.315941e-01,
		9.317816e-01, 9.319692e-01, 9.321566e-01, 9.323440e-01, 9.325313e-01, 9.327185e-01, 9.329057e-01, 9.330927e-01,
		9.332797e-01, 9.334666e-01, 9.336535e-01, 9.338402e-01, 9.340270e-01, 9.342135e-01, 9.344001e-01, 9.345866e-01,
		9.347730e-01, 9.349593e-01, 9.351455e-01, 9.353317e-01, 9.355178e-01, 9.357038e-01, 9.358898e-01, 9.360756e-01,
		9.362615e-01, 9.364472e-01, 9.366328e-01, 9.368184e-01, 9.370039e-01, 9.371893e-01, 9.373747e-01, 9.375600e-01,
		9.377452e-01, 9.379303e-01, 9.381154e-01, 9.383004e-01, 9.384854e-01, 9.386702e-01, 9.388550e-01, 9.390397e-01,
		9.392243e-01, 9.394089e-01, 9.395934e-01, 9.397778e-01, 9.399621e-01, 9.401464e-01, 9.403306e-01, 9.405147e-01,
		9.406988e-01, 9.408827e-01, 9.410667e-01, 9.412505e-01, 9.414343e-01, 9.416180e-01, 9.418016e-01, 9.419851e-01,
		9.421686e-01, 9.423520e-01, 9.425353e-01, 9.427186e-01, 9.429018e-01, 9.430850e-01, 9.432680e-01, 9.434510e-01,
		9.436339e-01, 9.438167e-01, 9.439995e-01, 9.441822e-01, 9.443648e-01, 9.445474e-01, 9.447299e-01, 9.449123e-01,
		9.450946e-01, 9.452769e-01, 9.454591e-01, 9.456412e-01, 9.458233e-01, 9.460053e-01, 9.461872e-01, 9.463691e-01,
		9.465508e-01, 9.467326e-01, 9.469142e-01, 9.470958e-01, 9.472773e-01, 9.474587e-01, 9.476401e-01, 9.478214e-01,
		9.480026e-01, 9.481838e-01, 9.483649e-01, 9.485459e-01, 9.487268e-01, 9.489077e-01, 9.490886e-01, 9.492693e-01,
		9.494500e-01, 9.496306e-01, 9.498111e-01, 9.499916e-01, 9.501719e-01, 9.503523e-01, 9.505326e-01, 9.507128e-01,
		9.508929e-01, 9.510729e-01, 9.512529e-01, 9.514329e-01, 9.516127e-01, 9.517925e-01, 9.519722e-01, 9.521519e-01,
		9.523315e-01, 9.525110e-01, 9.526904e-01, 9.528698e-01, 9.530491e-01, 9.532284e-01, 9.534075e-01, 9.535866e-01,
		9.537657e-01, 9.539447e-01, 9.541236e-01, 9.543024e-01, 9.544812e-01, 9.546599e-01, 9.548386e-01, 9.550171e-01,
		9.551957e-01, 9.553741e-01, 9.555525e-01, 9.557307e-01, 9.559090e-01, 9.560872e-01, 9.562653e-01, 9.564433e-01,
		9.566213e-01, 9.567992e-01, 9.569771e-01, 9.571549e-01, 9.573326e-01, 9.575102e-01, 9.576878e-01, 9.578653e-01,
		9.580427e-01, 9.582201e-01, 9.583974e-01, 9.585747e-01, 9.587519e-01, 9.589290e-01, 9.591061e-01, 9.592831e-01,
		9.594600e-01, 9.596368e-01, 9.598137e-01, 9.599904e-01, 9.601671e-01, 9.603436e-01, 9.605201e-01, 9.606966e-01,
		9.608730e-01, 9.610494e-01, 9.612256e-01, 9.614019e-01, 9.615780e-01, 9.617541e-01, 9.619301e-01, 9.621060e-01,
		9.622819e-01, 9.624578e-01, 9.626336e-01, 9.628092e-01, 9.629849e-01, 9.631604e-01, 9.633359e-01, 9.635113e-01,
		9.636867e-01, 9.638621e-01, 9.640373e-01, 9.642125e-01, 9.643876e-01, 9.645627e-01, 9.647377e-01, 9.649126e-01,
		9.650874e-01, 9.652622e-01, 9.654370e-01, 9.656116e-01, 9.657863e-01, 9.659608e-01, 9.661353e-01, 9.663097e-01,
		9.664841e-01, 9.666584e-01, 9.668326e-01, 9.670068e-01, 9.671809e-01, 9.673550e-01, 9.675289e-01, 9.677029e-01,
		9.678767e-01, 9.680505e-01, 9.682242e-01, 9.683979e-01, 9.685715e-01, 9.687451e-01, 9.689186e-01, 9.690920e-01,
		9.692653e-01, 9.694387e-01, 9.696119e-01, 9.697851e-01, 9.699582e-01, 9.701312e-01, 9.703043e-01, 9.704772e-01,
		9.706500e-01, 9.708228e-01, 9.709955e-01, 9.711683e-01, 9.713409e-01, 9.715135e-01, 9.716859e-01, 9.718584e-01,
		9.720308e-01, 9.722031e-01, 9.723753e-01, 9.725475e-01, 9.727197e-01, 9.728917e-01, 9.730637e-01, 9.732357e-01,
		9.734076e-01, 9.735794e-01, 9.737512e-01, 9.739228e-01, 9.740945e-01, 9.742661e-01, 9.744377e-01, 9.746091e-01,
		9.747805e-01, 9.749519e-01, 9.751231e-01, 9.752944e-01, 9.754655e-01, 9.756366e-01, 9.758077e-01, 9.759787e-01,
		9.761496e-01, 9.763204e-01, 9.764913e-01, 9.766620e-01, 9.768327e-01, 9.770033e-01, 9.771739e-01, 9.773444e-01,
		9.775148e-01, 9.776852e-01, 9.778556e-01, 9.780258e-01, 9.781960e-01, 9.783661e-01, 9.785362e-01, 9.787063e-01,
		9.788762e-01, 9.790462e-01, 9.792160e-01, 9.793859e-01, 9.795555e-01, 9.797252e-01, 9.798949e-01, 9.800645e-01,
		9.802339e-01, 9.804034e-01, 9.805728e-01, 9.807421e-01, 9.809114e-01, 9.810806e-01, 9.812497e-01, 9.814188e-01,
		9.815878e-01, 9.817568e-01, 9.819257e-01, 9.820946e-01, 9.822634e-01, 9.824321e-01, 9.826008e-01, 9.827695e-01,
		9.829381e-01, 9.831066e-01, 9.832750e-01, 9.834434e-01, 9.836118e-01, 9.837800e-01, 9.839482e-01, 9.841164e-01,
		9.842845e-01, 9.844526e-01, 9.846206e-01, 9.847885e-01, 9.849564e-01, 9.851242e-01, 9.852920e-01, 9.854597e-01,
		9.856274e-01, 9.857950e-01, 9.859625e-01, 9.861299e-01, 9.862974e-01, 9.864647e-01, 9.866320e-01, 9.867993e-01,
		9.869665e-01, 9.871337e-01, 9.873008e-01, 9.874678e-01, 9.876347e-01, 9.878017e-01, 9.879685e-01, 9.881353e-01,
		9.883021e-01, 9.884688e-01, 9.886354e-01, 9.888020e-01, 9.889685e-01, 9.891350e-01, 9.893014e-01, 9.894677e-01,
		9.896340e-01, 9.898003e-01, 9.899665e-01, 9.901326e-01, 9.902986e-01, 9.904646e-01, 9.906306e-01, 9.907965e-01,
		9.909624e-01, 9.911281e-01, 9.912939e-01, 9.914596e-01, 9.916252e-01, 9.917908e-01, 9.919563e-01, 9.921218e-01,
		9.922872e-01, 9.924526e-01, 9.926178e-01, 9.927831e-01, 9.929483e-01, 9.931134e-01, 9.932785e-01, 9.934435e-01,
		9.936085e-01, 9.937734e-01, 9.939383e-01, 9.941031e-01, 9.942678e-01, 9.944325e-01, 9.945971e-01, 9.947617e-01,
		9.949263e-01, 9.950907e-01, 9.952552e-01, 9.954196e-01, 9.955838e-01, 9.957481e-01, 9.959123e-01, 9.960765e-01,
		9.962406e-01, 9.964046e-01, 9.965686e-01, 9.967325e-01, 9.968964e-01, 9.970602e-01, 9.972240e-01, 9.973878e-01,
		9.975514e-01, 9.977150e-01, 9.978786e-01, 9.980421e-01, 9.982055e-01, 9.983689e-01, 9.985323e-01, 9.986956e-01,
		9.988588e-01, 9.990220e-01, 9.991851e-01, 9.993482e-01, 9.995112e-01, 9.996742e-01, 9.998372e-01, 1.000000e+00,
	};
	int i = (int)(d * 2047);
	assert(i >= 0 && i < 2048);
	return cube_root[i];
}

static inline void _ccv_rgb_to_luv(const float r, const float g, const float b, float* pl, float* pu, float* pv)
{
	const float x = 0.412453f * r + 0.35758f * g + 0.180423f * b;
	const float y = 0.212671f * r + 0.71516f * g + 0.072169f * b;
	const float z = 0.019334f * r + 0.119193f * g + 0.950227f * b;

	const float x_n = 0.312713f, y_n = 0.329016f;
	const float uv_n_divisor = -2.f * x_n + 12.f * y_n + 3.f;
	const float u_n = 4.f * x_n / uv_n_divisor;
	const float v_n = 9.f * y_n / uv_n_divisor;

    const float uv_divisor = ccv_max((x + 15.f * y + 3.f * z), FLT_EPSILON);
	const float u = 4.f * x / uv_divisor;
	const float v = 9.f * y / uv_divisor;

	const float y_cube_root = fast_cube_root(y);

	const float l_value = ccv_max(0.f, ((116.f * y_cube_root) - 16.f));
	const float u_value = 13.f * l_value * (u - u_n);
	const float v_value = 13.f * l_value * (v - v_n);

	// L in [0, 100], U in [-134, 220], V in [-140, 122]
	*pl = l_value * (255.f / 100.f);
	*pu = (u_value + 134.f) * (255.f / (220.f + 134.f));
	*pv = (v_value + 140.f) * (255.f / (122.f + 140.f));
}

void ccv_scd(ccv_dense_matrix_t* a, ccv_dense_matrix_t** b, int type)
{
	int ch = CCV_GET_CHANNEL(a->type);
	assert(ch == 1 || ch == 3);
	ccv_declare_derived_signature(sig, a->sig != 0, ccv_sign_with_literal("ccv_scd"), a->sig, CCV_EOF_SIGN);
	// diagonal u, v, x, y and LUV color, therefore 11 channels
	ccv_dense_matrix_t* db = *b = ccv_dense_matrix_renew(*b, a->rows, a->cols, CCV_32F | 11, CCV_32F | 11, sig);
	ccv_object_return_if_cached(, db);
	ccv_dense_matrix_t* blur = 0;
	ccv_blur(a, &blur, 0, 0.5); // do a modest blur, which suppresses noise
	ccv_dense_matrix_t* dx = 0;
	ccv_sobel(blur, &dx, 0, 1, 0);
	ccv_dense_matrix_t* dy = 0;
	ccv_sobel(blur, &dy, 0, 0, 1);
	ccv_dense_matrix_t* du = 0;
	ccv_sobel(blur, &du, 0, 1, 1);
	ccv_dense_matrix_t* dv = 0;
	ccv_sobel(blur, &dv, 0, -1, 1);
	ccv_matrix_free(blur);
	assert(CCV_GET_DATA_TYPE(dx->type) == CCV_GET_DATA_TYPE(dy->type));
	assert(CCV_GET_DATA_TYPE(dy->type) == CCV_GET_DATA_TYPE(du->type));
	assert(CCV_GET_DATA_TYPE(du->type) == CCV_GET_DATA_TYPE(dv->type));
	assert(CCV_GET_CHANNEL(dx->type) == CCV_GET_CHANNEL(dy->type));
	assert(CCV_GET_CHANNEL(dy->type) == CCV_GET_CHANNEL(du->type));
	assert(CCV_GET_CHANNEL(du->type) == CCV_GET_CHANNEL(dv->type));
	// this is a naive unoptimized implementation yet
	int i, j, k;
	unsigned char* a_ptr = a->data.u8;
	unsigned char* dx_ptr = dx->data.u8;
	unsigned char* dy_ptr = dy->data.u8;
	unsigned char* du_ptr = du->data.u8;
	unsigned char* dv_ptr = dv->data.u8;
	float* dbp = db->data.f32;
	if (ch == 1)
	{
#define for_block(_for_get_d, _for_get_a) \
		for (i = 0; i < a->rows; i++) \
		{ \
			for (j = 0; j < a->cols; j++) \
			{ \
				float fdx = _for_get_d(dx_ptr, j, 0), fdy = _for_get_d(dy_ptr, j, 0); \
				float fdu = _for_get_d(du_ptr, j, 0), fdv = _for_get_d(dv_ptr, j, 0); \
				float adx = fabsf(fdx), ady = fabsf(fdy); \
				float adu = fabsf(fdu), adv = fabsf(fdv); \
				dbp[0] = fdx, dbp[1] = fdy; \
				dbp[2] = fdu, dbp[3] = fdv; \
				dbp[4] = adx, dbp[5] = ady; \
				dbp[6] = adu, dbp[7] = adv; \
				dbp[8] = _for_get_a(a_ptr, j, 0) / 255.0; \
				dbp[9] = dbp[10] = 0; \
				dbp += 11; \
			} \
			a_ptr += a->step; \
			dx_ptr += dx->step; \
			dy_ptr += dy->step; \
			du_ptr += du->step; \
			dv_ptr += dv->step; \
		}
		ccv_matrix_getter(dx->type, ccv_matrix_getter_a, a->type, for_block);
#undef for_block
	} else {
#define for_block(_for_get_d, _for_get_a) \
		for (i = 0; i < a->rows; i++) \
		{ \
			for (j = 0; j < a->cols; j++) \
			{ \
				float fdx = _for_get_d(dx_ptr, j * ch, 0), fdy = _for_get_d(dy_ptr, j * ch, 0); \
				float fdu = _for_get_d(du_ptr, j * ch, 0), fdv = _for_get_d(dv_ptr, j * ch, 0); \
				float adx = fabsf(fdx), ady = fabsf(fdy); \
				float adu = fabsf(fdu), adv = fabsf(fdv); \
				/* select the strongest ones from all the channels */ \
				for (k = 1; k < ch; k++) \
				{ \
					if (fabsf((float)_for_get_d(dx_ptr, j * ch + k, 0)) > adx) \
					{ \
						fdx = _for_get_d(dx_ptr, j * ch + k, 0); \
						adx = fabsf(fdx); \
					} \
					if (fabsf((float)_for_get_d(dy_ptr, j * ch + k, 0)) > ady) \
					{ \
						fdy = _for_get_d(dy_ptr, j * ch + k, 0); \
						ady = fabsf(fdy); \
					} \
					if (fabsf((float)_for_get_d(du_ptr, j * ch + k, 0)) > adu) \
					{ \
						fdu = _for_get_d(du_ptr, j * ch + k, 0); \
						adu = fabsf(fdu); \
					} \
					if (fabsf((float)_for_get_d(dv_ptr, j * ch + k, 0)) > adv) \
					{ \
						fdv = _for_get_d(dv_ptr, j * ch + k, 0); \
						adv = fabsf(fdv); \
					} \
				} \
				dbp[0] = fdx, dbp[1] = fdy; \
				dbp[2] = fdu, dbp[3] = fdv; \
				dbp[4] = adx, dbp[5] = ady; \
				dbp[6] = adu, dbp[7] = adv; \
				_ccv_rgb_to_luv(_for_get_a(a_ptr, j * ch, 0) / 255.0, \
								_for_get_a(a_ptr, j * ch + 1, 0) / 255.0, \
								_for_get_a(a_ptr, j * ch + 2, 0) / 255.0, \
								dbp + 8, dbp + 9, dbp + 10); \
				dbp += 11; \
			} \
			a_ptr += a->step; \
			dx_ptr += dx->step; \
			dy_ptr += dy->step; \
			du_ptr += du->step; \
			dv_ptr += dv->step; \
		}
		ccv_matrix_getter(dx->type, ccv_matrix_getter_a, a->type, for_block);
#undef for_block
	}
	ccv_matrix_free(dx);
	ccv_matrix_free(dy);
	ccv_matrix_free(du);
	ccv_matrix_free(dv);
}

#if defined(HAVE_SSE2)
static inline void _ccv_scd_run_feature_at_sse2(float* at, int cols, ccv_scd_stump_feature_t* feature, __m128 surf[8])
{
	int i;
	// extract feature
	for (i = 0; i < 4; i++)
	{
		__m128 d0 = _mm_loadu_ps(at + (cols * feature->sy[i] + feature->sx[i]) * CCV_SCD_CHANNEL);
		__m128 d1 = _mm_loadu_ps(at + 4 + (cols * feature->sy[i] + feature->sx[i]) * CCV_SCD_CHANNEL);
		__m128 du0 = _mm_loadu_ps(at + (cols * feature->dy[i] + feature->sx[i]) * CCV_SCD_CHANNEL);
		__m128 du1 = _mm_loadu_ps(at + 4 + (cols * feature->dy[i] + feature->sx[i]) * CCV_SCD_CHANNEL);
		__m128 dv0 = _mm_loadu_ps(at + (cols * feature->sy[i] + feature->dx[i]) * CCV_SCD_CHANNEL);
		__m128 dv1 = _mm_loadu_ps(at + 4 + (cols * feature->sy[i] + feature->dx[i]) * CCV_SCD_CHANNEL);
		__m128 duv0 = _mm_loadu_ps(at + (cols * feature->dy[i] + feature->dx[i]) * CCV_SCD_CHANNEL);
		__m128 duv1 = _mm_loadu_ps(at + 4 + (cols * feature->dy[i] + feature->dx[i]) * CCV_SCD_CHANNEL);
		surf[i * 2] = _mm_sub_ps(_mm_add_ps(duv0, d0), _mm_add_ps(du0, dv0));
		surf[i * 2 + 1] = _mm_sub_ps(_mm_add_ps(duv1, d1), _mm_add_ps(du1, dv1));
	}
	// L2Hys normalization
	__m128 v0 = _mm_add_ps(_mm_mul_ps(surf[0], surf[0]), _mm_mul_ps(surf[1], surf[1]));
	__m128 v1 = _mm_add_ps(_mm_mul_ps(surf[2], surf[2]), _mm_mul_ps(surf[3], surf[3]));
	__m128 v2 = _mm_add_ps(_mm_mul_ps(surf[4], surf[4]), _mm_mul_ps(surf[5], surf[5]));
	__m128 v3 = _mm_add_ps(_mm_mul_ps(surf[6], surf[6]), _mm_mul_ps(surf[7], surf[7]));
	v0 = _mm_add_ps(v0, v1);
	v2 = _mm_add_ps(v2, v3);
	union {
		float f[4];
		__m128 p;
	} vx;
	vx.p = _mm_add_ps(v0, v2);
	v0 = _mm_set1_ps(1.0 / (sqrtf(vx.f[0] + vx.f[1] + vx.f[2] + vx.f[3]) + 1e-6));
	static float thlf = -2.0 / 5.65685424949; // -sqrtf(32)
	static float thuf = 2.0 / 5.65685424949; // sqrtf(32)
	const __m128 thl = _mm_set1_ps(thlf);
	const __m128 thu = _mm_set1_ps(thuf);
	for (i = 0; i < 8; i++)
	{
		surf[i] = _mm_mul_ps(surf[i], v0);
		surf[i] = _mm_min_ps(surf[i], thu);
		surf[i] = _mm_max_ps(surf[i], thl);
	}
	__m128 u0 = _mm_add_ps(_mm_mul_ps(surf[0], surf[0]), _mm_mul_ps(surf[1], surf[1]));
	__m128 u1 = _mm_add_ps(_mm_mul_ps(surf[2], surf[2]), _mm_mul_ps(surf[3], surf[3]));
	__m128 u2 = _mm_add_ps(_mm_mul_ps(surf[4], surf[4]), _mm_mul_ps(surf[5], surf[5]));
	__m128 u3 = _mm_add_ps(_mm_mul_ps(surf[6], surf[6]), _mm_mul_ps(surf[7], surf[7]));
	u0 = _mm_add_ps(u0, u1);
	u2 = _mm_add_ps(u2, u3);
	union {
		float f[4];
		__m128 p;
	} ux;
	ux.p = _mm_add_ps(u0, u2);
	u0 = _mm_set1_ps(1.0 / (sqrtf(ux.f[0] + ux.f[1] + ux.f[2] + ux.f[3]) + 1e-6));
	for (i = 0; i < 8; i++)
		surf[i] = _mm_mul_ps(surf[i], u0);
}
#else
static inline void _ccv_scd_run_feature_at(float* at, int cols, ccv_scd_stump_feature_t* feature, float surf[32])
{
	int i, j;
	// extract feature
	for (i = 0; i < 4; i++)
	{
		float* d = at + (cols * feature->sy[i] + feature->sx[i]) * CCV_SCD_CHANNEL;
		float* du = at + (cols * feature->dy[i] + feature->sx[i]) * CCV_SCD_CHANNEL;
		float* dv = at + (cols * feature->sy[i] + feature->dx[i]) * CCV_SCD_CHANNEL;
		float* duv = at + (cols * feature->dy[i] + feature->dx[i]) * CCV_SCD_CHANNEL;
		for (j = 0; j < 8; j++)
			surf[i * 8 + j] = duv[j] - du[j] + d[j] - dv[j];
	}
	// L2Hys normalization
	float v = 0;
	for (i = 0; i < 32; i++)
		v += surf[i] * surf[i];
	v = 1.0 / (sqrtf(v) + 1e-6);
	static float theta = 2.0 / 5.65685424949; // sqrtf(32)
	float u = 0;
	for (i = 0; i < 32; i++)
	{
		surf[i] = surf[i] * v;
		surf[i] = ccv_clamp(surf[i], -theta, theta);
		u += surf[i] * surf[i];
	}
	u = 1.0 / (sqrtf(u) + 1e-6);
	for (i = 0; i < 32; i++)
		surf[i] = surf[i] * u;
}
#endif

#ifdef HAVE_GSL
static ccv_array_t* _ccv_scd_collect_negatives(gsl_rng* rng, ccv_size_t size, ccv_array_t* hard_mine, int total, int grayscale)
{
	ccv_array_t* negatives = ccv_array_new(ccv_compute_dense_matrix_size(size.height, size.width, CCV_8U | (grayscale ? CCV_C1 : CCV_C3)), total, 0);
	int i, j, k;
	for (i = 0; i < total;)
	{
		FLUSH(CCV_CLI_INFO, " - collect negatives %d%% (%d / %d)", (i + 1) * 100 / total, i + 1, total);
		double ratio = (double)(total - i) / hard_mine->rnum;
		for (j = 0; j < hard_mine->rnum && i < total; j++)
		{
			ccv_file_info_t* file_info = (ccv_file_info_t*)ccv_array_get(hard_mine, j);
			ccv_dense_matrix_t* image = 0;
			ccv_read(file_info->filename, &image, CCV_IO_ANY_FILE | (grayscale ? CCV_IO_GRAY : CCV_IO_RGB_COLOR));
			if (image == 0)
			{
				PRINT(CCV_CLI_ERROR, "\n - %s: cannot be open, possibly corrupted\n", file_info->filename);
				continue;
			}
			double max_scale_ratio = ccv_min((double)image->rows / size.height, (double)image->cols / size.width);
			if (max_scale_ratio <= 0.5) // too small to be interesting
				continue;
			for (k = 0; k < ratio; k++)
				if (k < (int)ratio || gsl_rng_uniform(rng) <= ccv_max(0.1, ratio - (int)ratio))
				{
					FLUSH(CCV_CLI_INFO, " - collect negatives %d%% (%d / %d)", (i + 1) * 100 / total, i + 1, total);
					ccv_rect_t rect;
					double scale_ratio = gsl_rng_uniform(rng) * (max_scale_ratio - 0.5) + 0.5;
					rect.width = ccv_min(image->cols, (int)(size.width * scale_ratio + 0.5));
					rect.height = ccv_min(image->rows, (int)(size.height * scale_ratio + 0.5));
					rect.x = gsl_rng_uniform_int(rng, ccv_max(image->cols - rect.width + 1, 1));
					rect.y = gsl_rng_uniform_int(rng, ccv_max(image->rows - rect.height + 1, 1));
					ccv_dense_matrix_t* sliced = 0;
					ccv_slice(image, (ccv_matrix_t**)&sliced, 0, rect.y, rect.x, rect.height, rect.width);
					ccv_dense_matrix_t* b = 0;
					if (size.width > rect.width)
						ccv_resample(sliced, &b, 0, size.height, size.width, CCV_INTER_CUBIC);
					else
						ccv_resample(sliced, &b, 0, size.height, size.width, CCV_INTER_AREA);
					ccv_matrix_free(sliced);
					b->sig = 0;
					// this leveraged the fact that because I know the ccv_dense_matrix_t is continuous in memory
					ccv_array_push(negatives, b);
					ccv_matrix_free(b);
					++i;
					if (i >= total)
						break;
				}
			ccv_matrix_free(image);
		}
	}
	PRINT(CCV_CLI_INFO, "\n");
	ccv_make_array_immutable(negatives);
	return negatives;
}

static ccv_array_t*_ccv_scd_collect_positives(ccv_size_t size, ccv_array_t* posfiles, int grayscale)
{
	ccv_array_t* positives = ccv_array_new(ccv_compute_dense_matrix_size(size.height, size.width, CCV_8U | (grayscale ? CCV_C1 : CCV_C3)), posfiles->rnum, 0);
	int i;
	for (i = 0; i < posfiles->rnum; i++)
	{
		FLUSH(CCV_CLI_INFO, " - collect positives %d%% (%d / %d)", (i + 1) * 100 / posfiles->rnum, i + 1, posfiles->rnum);
		ccv_file_info_t* file_info = (ccv_file_info_t*)ccv_array_get(posfiles, i);
		ccv_dense_matrix_t* a = 0;
		ccv_read(file_info->filename, &a, CCV_IO_ANY_FILE | (grayscale ? CCV_IO_GRAY : CCV_IO_RGB_COLOR));
		a->sig = 0;
		ccv_array_push(positives, a);
		ccv_matrix_free(a);
	}
	PRINT(CCV_CLI_INFO, "\n");
	ccv_make_array_immutable(positives);
	return positives;
}

static ccv_array_t* _ccv_scd_stump_features(ccv_size_t base, int range_through, int step_through, ccv_size_t size)
{
	ccv_array_t* features = ccv_array_new(sizeof(ccv_scd_stump_feature_t), 64, 0);
	int x, y, w, h;
	for (w = base.width; w <= size.width; w += range_through)
		if (w % 4 == 0) // only allow 4:1
		{
			h = w / 4;
			for (x = 0; x <= size.width - w; x += step_through)
				for (y = 0; y <= size.height - h; y += step_through)
				{
					// 4x1 feature
					ccv_scd_stump_feature_t feature;
					feature.sx[0] = x;
					feature.dx[0] = x + (w / 4);
					feature.sx[1] = x + (w / 4);
					feature.dx[1] = x + 2 * (w / 4);
					feature.sx[2] = x + 2 * (w / 4);
					feature.dx[2] = x + 3 * (w / 4);
					feature.sx[3] = x + 3 * (w / 4);
					feature.dx[3] = x + w;
					feature.sy[0] = feature.sy[1] = feature.sy[2] = feature.sy[3] = y;
					feature.dy[0] = feature.dy[1] = feature.dy[2] = feature.dy[3] = y + h;
					ccv_array_push(features, &feature);
				}
		}
	for (h = base.height; h <= size.height; h += range_through)
		if (h % 4 == 0) // only allow 1:4
		{
			w = h / 4;
			for (x = 0; x <= size.width - w; x += step_through)
				for (y = 0; y <= size.height - h; y += step_through)
				{
					// 1x4 feature
					ccv_scd_stump_feature_t feature;
					feature.sx[0] = feature.sx[1] = feature.sx[2] = feature.sx[3] = x;
					feature.dx[0] = feature.dx[1] = feature.dx[2] = feature.dx[3] = x + w;
					feature.sy[0] = y;
					feature.dy[0] = y + (h / 4);
					feature.sy[1] = y + (h / 4);
					feature.dy[1] = y + 2 * (h / 4);
					feature.sy[2] = y + 2 * (h / 4);
					feature.dy[2] = y + 3 * (h / 4);
					feature.sy[3] = y + 3 * (h / 4);
					feature.dy[3] = y + h;
					ccv_array_push(features, &feature);
				}
		}
	for (w = base.width; w <= size.width; w += range_through)
		for (h = base.height; h <= size.height; h += range_through)
			for (x = 0; x <= size.width - w; x += step_through)
				for (y = 0; y <= size.height - h; y += step_through)
					if (w % 2 == 0 && h % 2 == 0 &&
						(w == h || w == h * 2 || w * 2 == h || w * 2 == h * 3 || w * 3 == h * 2)) // allow 1:1, 1:2, 2:1, 2:3, 3:2
					{
						// 2x2 feature
						ccv_scd_stump_feature_t feature;
						feature.sx[0] = feature.sx[1] = x;
						feature.dx[0] = feature.dx[1] = x + (w / 2);
						feature.sy[0] = feature.sy[2] = y;
						feature.dy[0] = feature.dy[2] = y + (h / 2);
						feature.sx[2] = feature.sx[3] = x + (w / 2);
						feature.dx[2] = feature.dx[3] = x + w;
						feature.sy[1] = feature.sy[3] = y + (h / 2);
						feature.dy[1] = feature.dy[3] = y + h;
						ccv_array_push(features, &feature);
					}
	return features;
}

typedef struct {
	double value;
	int index;
} ccv_scd_value_index_t;

#define more_than(s1, s2, aux) ((s1).value >= (s2).value)
static CCV_IMPLEMENT_QSORT(_ccv_scd_value_index_sortby_value, ccv_scd_value_index_t, more_than)
#undef more_than
#define less_than(s1, s2, aux) ((s1).index < (s2).index)
static CCV_IMPLEMENT_QSORT(_ccv_scd_value_index_sortby_index, ccv_scd_value_index_t, less_than)
#undef less_than

static float* _ccv_scd_get_surf_at(float* fv, int feature_no, int example_no, int positive_count, int negative_count)
{
	return fv + ((off_t)example_no + feature_no * (positive_count + negative_count)) * 32;
}

static void _ccv_scd_precompute_feature_vectors(const ccv_array_t* features, const ccv_array_t* positives, const ccv_array_t* negatives, float* fv)
{
	parallel_for(i, positives->rnum) {
		int j;
		if ((i + 1) % 4031 == 1)
			FLUSH(CCV_CLI_INFO, " - precompute feature vectors of example %d / %d over %d features", (int)(i + 1), positives->rnum + negatives->rnum, features->rnum);
		ccv_dense_matrix_t* a = (ccv_dense_matrix_t*)ccv_array_get(positives, i);
		a->data.u8 = (unsigned char*)(a + 1);
		ccv_dense_matrix_t* b = 0;
		ccv_scd(a, &b, 0);
		ccv_dense_matrix_t* sat = 0;
		ccv_sat(b, &sat, 0, CCV_PADDING_ZERO);
		ccv_matrix_free(b);
		for (j = 0; j < features->rnum; j++)
		{
			ccv_scd_stump_feature_t* feature = (ccv_scd_stump_feature_t*)ccv_array_get(features, j);
			// save to fv
#if defined(HAVE_SSE2)
			_ccv_scd_run_feature_at_sse2(sat->data.f32, sat->cols, feature, (__m128*)_ccv_scd_get_surf_at(fv, j, i, positives->rnum, negatives->rnum));
#else
			_ccv_scd_run_feature_at(sat->data.f32, sat->cols, feature, _ccv_scd_get_surf_at(fv, j, i, positives->rnum, negatives->rnum));
#endif
		}
		ccv_matrix_free(sat);
	} parallel_endfor
	parallel_for(i, negatives->rnum) {
		int j;
		if ((i + 1) % 731 == 1 || (i + 1) == negatives->rnum)
			FLUSH(CCV_CLI_INFO, " - precompute feature vectors of example %d / %d over %d features", (int)(i + positives->rnum + 1), positives->rnum + negatives->rnum, features->rnum);
		ccv_dense_matrix_t* a = (ccv_dense_matrix_t*)ccv_array_get(negatives, i);
		a->data.u8 = (unsigned char*)(a + 1);
		ccv_dense_matrix_t* b = 0;
		ccv_scd(a, &b, 0);
		ccv_dense_matrix_t* sat = 0;
		ccv_sat(b, &sat, 0, CCV_PADDING_ZERO);
		ccv_matrix_free(b);
		for (j = 0; j < features->rnum; j++)
		{
			ccv_scd_stump_feature_t* feature = (ccv_scd_stump_feature_t*)ccv_array_get(features, j);
			// save to fv
#if defined(HAVE_SSE2)
			_ccv_scd_run_feature_at_sse2(sat->data.f32, sat->cols, feature, (__m128*)_ccv_scd_get_surf_at(fv, j, i + positives->rnum, positives->rnum, negatives->rnum));
#else
			_ccv_scd_run_feature_at(sat->data.f32, sat->cols, feature, _ccv_scd_get_surf_at(fv, j, i + positives->rnum, positives->rnum, negatives->rnum));
#endif
		}
		ccv_matrix_free(sat);
	} parallel_endfor
}

typedef struct {
	int feature_no;
	double C;
	ccv_scd_value_index_t* pwidx;
	ccv_scd_value_index_t* nwidx;
	int positive_count;
	int negative_count;
	int active_positive_count;
	int active_negative_count;
	float* fv;
} ccv_loss_minimize_context_t;

static int _ccv_scd_stump_feature_gentle_adaboost_loss(const ccv_dense_matrix_t* x, double* f, ccv_dense_matrix_t* df, void* data)
{
	ccv_loss_minimize_context_t* context = (ccv_loss_minimize_context_t*)data;
	int i, j;
	float loss = 0;
	float* d = df->data.f32;
	for (i = 0; i < 32; i++)
	{
		loss += context->C * fabs(x->data.f32[i]);
		d[i] = x->data.f32[i] > 0 ? context->C : -context->C;
	}
	d[32] = 0;
	float* surf = _ccv_scd_get_surf_at(context->fv, context->feature_no, 0, context->positive_count, context->negative_count);
	for (i = 0; i < context->active_positive_count; i++)
	{
		float* cur_surf = surf + (off_t)(context->pwidx[i].index) * 32;
		float v = x->data.f32[32];
		for (j = 0; j < 32; j++)
			v += cur_surf[j] * x->data.f32[j];
		v = expf(v);
		float tanh = (v - 1) / (v + 1);
		loss += context->pwidx[i].value * (1.0 - tanh) * (1.0 - tanh);
		float dv = -8.0 * context->pwidx[i].value * v / ((1.0 + v) * (1.0 + v) * (1.0 + v));
		for (j = 0; j < 32; j++)
			d[j] += dv * cur_surf[j];
		d[32] += dv;
	}
	for (i = 0; i < context->active_negative_count; i++)
	{
		float* cur_surf = surf + (off_t)(context->nwidx[i].index + context->positive_count) * 32;
		float v = x->data.f32[32];
		for (j = 0; j < 32; j++)
			v += cur_surf[j] * x->data.f32[j];
		v = expf(v);
		float tanh = (v - 1) / (v + 1);
		loss += context->nwidx[i].value * (-1.0 - tanh) * (-1.0 - tanh);
		float dv = 8.0 * context->nwidx[i].value * v * v / ((1.0 + v) * (1.0 + v) * (1.0 + v));
		for (j = 0; j < 32; j++)
			d[j] += dv * cur_surf[j];
		d[32] += dv;
	}
	f[0] = loss;
	return 0;
}

static int _ccv_scd_weight_trimming(ccv_scd_value_index_t* idx, int count, double weight_trimming)
{
	int active_count = count;
	int i;
	double w = 0;
	for (i = 0; i < count; i++)
	{
		w += idx[i].value;
		if (w >= weight_trimming)
		{
			active_count = i + 1;
			break;
		}
	}
	assert(active_count > 0);
	for (i = active_count; i < count; i++)
		if (idx[i - 1].value == idx[i].value) // for exactly the same weight, we skip
			active_count = i + 1;
		else
			break;
	return active_count;
}

static void _ccv_scd_stump_feature_supervised_train(gsl_rng* rng, ccv_array_t* features, int positive_count, int negative_count, double* pw, double* nw, float* fv, double C, double weight_trimming)
{
	int i;
	ccv_scd_value_index_t* pwidx = (ccv_scd_value_index_t*)ccmalloc(sizeof(ccv_scd_value_index_t) * positive_count);
	ccv_scd_value_index_t* nwidx = (ccv_scd_value_index_t*)ccmalloc(sizeof(ccv_scd_value_index_t) * negative_count);
	for (i = 0; i < positive_count; i++)
		pwidx[i].value = pw[i], pwidx[i].index = i;
	for (i = 0; i < negative_count; i++)
		nwidx[i].value = nw[i], nwidx[i].index = i;
	_ccv_scd_value_index_sortby_value(pwidx, positive_count, 0);
	_ccv_scd_value_index_sortby_value(nwidx, negative_count, 0);
	int active_positive_count = _ccv_scd_weight_trimming(pwidx, positive_count, weight_trimming * 0.5); // the sum of positive weights is 0.5
	int active_negative_count = _ccv_scd_weight_trimming(nwidx, negative_count, weight_trimming * 0.5); // the sum of negative weights is 0.5
	_ccv_scd_value_index_sortby_index(pwidx, active_positive_count, 0);
	_ccv_scd_value_index_sortby_index(nwidx, active_negative_count, 0);
	parallel_for(i, features->rnum) {
		if ((i + 1) % 31 == 1 || (i + 1) == features->rnum)
			FLUSH(CCV_CLI_INFO, " - supervised train feature %d / %d with logistic regression, active set {%d, %d}", (int)(i + 1), features->rnum, active_positive_count, active_negative_count);
		ccv_scd_stump_feature_t* feature = (ccv_scd_stump_feature_t*)ccv_array_get(features, i);
		ccv_loss_minimize_context_t context = {
			.feature_no = i,
			.C = C,
			.positive_count = positive_count,
			.negative_count = negative_count,
			.active_positive_count = active_positive_count,
			.active_negative_count = active_negative_count,
			.pwidx = pwidx,
			.nwidx = nwidx,
			.fv = fv,
		};
		ccv_dense_matrix_t* x = ccv_dense_matrix_new(1, 33, CCV_32F | CCV_C1, 0, 0);
		int j;
		for (j = 0; j < 33; j++)
			x->data.f32[j] = gsl_rng_uniform_pos(rng) * 2 - 1.0;
		ccv_minimize(x, 10, 1.0, _ccv_scd_stump_feature_gentle_adaboost_loss, ccv_minimize_default_params, &context);
		for (j = 0; j < 32; j++)
			feature->w[j] = x->data.f32[j];
		feature->bias = x->data.f32[32];
		ccv_matrix_free(x);
	} parallel_endfor
	ccfree(pwidx);
	ccfree(nwidx);
}

static double _ccv_scd_auc(double* s, int posnum, int negnum)
{
	ccv_scd_value_index_t* sidx = (ccv_scd_value_index_t*)ccmalloc(sizeof(ccv_scd_value_index_t) * (posnum + negnum));
	int i;
	for (i = 0; i < posnum + negnum; i++)
		sidx[i].value = s[i], sidx[i].index = i;
	_ccv_scd_value_index_sortby_value(sidx, posnum + negnum, 0);
	int fp = 0, tp = 0, fp_prev = 0, tp_prev = 0;
	double a = 0;
	double f_prev = -DBL_MAX;
	for (i = 0; i < posnum + negnum; i++)
	{
		if (sidx[i].value != f_prev)
		{
			a += (double)(fp - fp_prev) * (tp + tp_prev) * 0.5;
			f_prev = sidx[i].value;
			fp_prev = fp;
			tp_prev = tp;
		}
		if (sidx[i].index < posnum)
			++tp;
		else
			++fp;
	}
	ccfree(sidx);
	a += (double)(negnum - fp_prev) * (posnum + tp_prev) * 0.5;
	return a / ((double)posnum * negnum);
}

static int _ccv_scd_find_match_feature(ccv_scd_stump_feature_t* value, ccv_array_t* features)
{
	int i;
	for (i = 0; i < features->rnum; i++)
	{
		ccv_scd_stump_feature_t* feature = (ccv_scd_stump_feature_t*)ccv_array_get(features, i);
		if (feature->sx[0] == value->sx[0] && feature->sy[0] == value->sy[0] &&
			feature->dx[0] == value->dx[0] && feature->dy[0] == value->dy[0] &&
			feature->sx[1] == value->sx[1] && feature->sy[1] == value->sy[1] &&
			feature->dx[1] == value->dx[1] && feature->dy[1] == value->dy[1] &&
			feature->sx[2] == value->sx[2] && feature->sy[2] == value->sy[2] &&
			feature->dx[2] == value->dx[2] && feature->dy[2] == value->dy[2] &&
			feature->sx[3] == value->sx[3] && feature->sy[3] == value->sy[3] &&
			feature->dx[3] == value->dx[3] && feature->dy[3] == value->dy[3])
			return i;
	}
	return -1;
}

static int _ccv_scd_best_feature_gentle_adaboost(double* s, ccv_array_t* features, double* pw, double* nw, int positive_count, int negative_count, float* fv)
{
	int i;
	double* error_rate = (double*)cccalloc(features->rnum, sizeof(double));
	assert(positive_count + negative_count > 0);
	parallel_for(i, features->rnum) {
		int j, k;
		if ((i + 1) % 331 == 1 || (i + 1) == features->rnum)
			FLUSH(CCV_CLI_INFO, " - go through %d / %d (%.1f%%) for adaboost", (int)(i + 1), features->rnum, (float)(i + 1) * 100 / features->rnum);
		ccv_scd_stump_feature_t* feature = (ccv_scd_stump_feature_t*)ccv_array_get(features, i);
		for (j = 0; j < positive_count; j++)
		{
			float* surf = _ccv_scd_get_surf_at(fv, i, j, positive_count, negative_count);
			float v = feature->bias;
			for (k = 0; k < 32; k++)
				v += surf[k] * feature->w[k];
			v = expf(v);
			v = (v - 1) / (v + 1); // probability
			error_rate[i] += pw[j] * (1 - v) * (1 - v);
		}
		for (j = 0; j < negative_count; j++)
		{
			float* surf = _ccv_scd_get_surf_at(fv, i, j + positive_count, positive_count, negative_count);
			float v = feature->bias;
			for (k = 0; k < 32; k++)
				v += surf[k] * feature->w[k];
			v = expf(v);
			v = (v - 1) / (v + 1); // probability
			error_rate[i] += nw[j] * (-1 - v) * (-1 - v);
		}
	} parallel_endfor
	double min_error_rate = error_rate[0];
	int j = 0;
	for (i = 1; i < features->rnum; i++)
		if (error_rate[i] < min_error_rate)
		{
			min_error_rate = error_rate[i];
			j = i;
		}
	ccfree(error_rate);
	return j;
}

static float _ccv_scd_threshold_at_hit_rate(double* s, int posnum, int negnum, float hit_rate, float* tp_out, float* fp_out)
{
	ccv_scd_value_index_t* psidx = (ccv_scd_value_index_t*)ccmalloc(sizeof(ccv_scd_value_index_t) * posnum);
	int i;
	for (i = 0; i < posnum; i++)
		psidx[i].value = s[i], psidx[i].index = i;
	_ccv_scd_value_index_sortby_value(psidx, posnum, 0);
	float threshold = psidx[(int)((posnum - 0.5) * hit_rate - 0.5)].value - 1e-6;
	ccfree(psidx);
	int tp = 0;
	for (i = 0; i < posnum; i++)
		if (s[i] > threshold)
			++tp;
	int fp = 0;
	for (i = 0; i < negnum; i++)
		if (s[i + posnum] > threshold)
			++fp;
	if (tp_out)
		*tp_out = (float)tp / posnum;
	if (fp_out)
		*fp_out = (float)fp / negnum;
	return threshold;
}

static int _ccv_scd_classifier_cascade_pass(ccv_scd_classifier_cascade_t* cascade, ccv_dense_matrix_t* a)
{
#if defined(HAVE_SSE2)
	__m128 surf[8];
#else
	float surf[32];
#endif
	ccv_dense_matrix_t* b = 0;
	ccv_scd(a, &b, 0);
	ccv_dense_matrix_t* sat = 0;
	ccv_sat(b, &sat, 0, CCV_PADDING_ZERO);
	ccv_matrix_free(b);
	int pass = 1;
	int i, j;
	for (i = 0; i < cascade->count; i++)
	{
		ccv_scd_stump_classifier_t* classifier = cascade->classifiers + i;
		float v = 0;
		for (j = 0; j < classifier->count; j++)
		{
			ccv_scd_stump_feature_t* feature = classifier->features + j;
#if defined(HAVE_SSE2)
			_ccv_scd_run_feature_at_sse2(sat->data.f32, sat->cols, feature, surf);
			__m128 u0 = _mm_add_ps(_mm_mul_ps(surf[0], _mm_loadu_ps(feature->w)), _mm_mul_ps(surf[1], _mm_loadu_ps(feature->w + 4)));
			__m128 u1 = _mm_add_ps(_mm_mul_ps(surf[2], _mm_loadu_ps(feature->w + 8)), _mm_mul_ps(surf[3], _mm_loadu_ps(feature->w + 12)));
			__m128 u2 = _mm_add_ps(_mm_mul_ps(surf[4], _mm_loadu_ps(feature->w + 16)), _mm_mul_ps(surf[5], _mm_loadu_ps(feature->w + 20)));
			__m128 u3 = _mm_add_ps(_mm_mul_ps(surf[6], _mm_loadu_ps(feature->w + 24)), _mm_mul_ps(surf[7], _mm_loadu_ps(feature->w + 28)));
			u0 = _mm_add_ps(u0, u1);
			u2 = _mm_add_ps(u2, u3);
			union {
				float f[4];
				__m128 p;
			} ux;
			ux.p = _mm_add_ps(u0, u2);
			float u = expf(feature->bias + ux.f[0] + ux.f[1] + ux.f[2] + ux.f[3]);
#else
			_ccv_scd_run_feature_at(sat->data.f32, sat->cols, feature, surf);
			float u = feature->bias;
			int k;
			for (k = 0; k < 32; k++)
				u += surf[k] * feature->w[k];
			u = expf(u);
#endif
			v += (u - 1) / (u + 1);
		}
		if (v <= classifier->threshold)
		{
			pass = 0;
			break;
		}
	}
	ccv_matrix_free(sat);
	return pass;
}

static ccv_array_t* _ccv_scd_hard_mining(gsl_rng* rng, ccv_scd_classifier_cascade_t* cascade, ccv_array_t* hard_mine, ccv_array_t* negatives, int negative_count, int grayscale, int even_dist)
{
	ccv_array_t* hard_negatives = ccv_array_new(ccv_compute_dense_matrix_size(cascade->size.height, cascade->size.width, CCV_8U | (grayscale ? CCV_C1 : CCV_C3)), negative_count, 0);
	int i, j, t;
	for (i = 0; i < negatives->rnum; i++)
	{
		ccv_dense_matrix_t* a = (ccv_dense_matrix_t*)ccv_array_get(negatives, i);
		a->data.u8 = (unsigned char*)(a + 1);
		if (_ccv_scd_classifier_cascade_pass(cascade, a))
			ccv_array_push(hard_negatives, a);
	}
	int n_per_mine = ccv_max((negative_count - hard_negatives->rnum) / hard_mine->rnum, 10);
	// the hard mining comes in following fashion:
	// 1). original, with n_per_mine set;
	// 2). horizontal flip, with n_per_mine set;
	// 3). vertical flip, with n_per_mine set;
	// 4). 180 rotation, with n_per_mine set;
	// 5~8). repeat above, but with no n_per_mine set;
	// after above, if we still cannot collect enough, so be it.
	for (t = (even_dist ? 0 : 4); t < 8 /* exhausted all variations */ && hard_negatives->rnum < negative_count; t++)
	{
		for (i = 0; i < hard_mine->rnum; i++)
		{
			FLUSH(CCV_CLI_INFO, " - hard mine negatives %d%% with %d-th permutation", 100 * hard_negatives->rnum / negative_count, t + 1);
			ccv_file_info_t* file_info = (ccv_file_info_t*)ccv_array_get(hard_mine, i);
			ccv_dense_matrix_t* image = 0;
			ccv_read(file_info->filename, &image, CCV_IO_ANY_FILE | (grayscale ? CCV_IO_GRAY : CCV_IO_RGB_COLOR));
			if (image == 0)
			{
				PRINT(CCV_CLI_ERROR, "\n - %s: cannot be open, possibly corrupted\n", file_info->filename);
				continue;
			}
			if (t % 2 != 0)
				ccv_flip(image, 0, 0, CCV_FLIP_X);
			if (t % 4 >= 2)
				ccv_flip(image, 0, 0, CCV_FLIP_Y);
			if (t >= 4)
				n_per_mine = negative_count; // no hard limit on n_per_mine anymore for the last pass
			ccv_scd_param_t params = {
				.interval = 3,
				.min_neighbors = 0,
				.step_through = 4,
				.size = cascade->size,
			};
			ccv_array_t* objects = ccv_scd_detect_objects(image, &cascade, 1, params);
			if (objects->rnum > 0)
			{
				gsl_ran_shuffle(rng, objects->data, objects->rnum, objects->rsize);
				for (j = 0; j < ccv_min(objects->rnum, n_per_mine); j++)
				{
					ccv_rect_t* rect = (ccv_rect_t*)ccv_array_get(objects, j);
					if (rect->x < 0 || rect->y < 0 || rect->x + rect->width > image->cols || rect->y + rect->height > image->rows)
						continue;
					ccv_dense_matrix_t* sliced = 0;
					ccv_slice(image, (ccv_matrix_t**)&sliced, 0, rect->y, rect->x, rect->height, rect->width);
					ccv_dense_matrix_t* resized = 0;
					assert(sliced->rows >= cascade->size.height && sliced->cols >= cascade->size.width);
					if (sliced->rows > cascade->size.height || sliced->cols > cascade->size.width)
					{
						ccv_resample(sliced, &resized, 0, cascade->size.height, cascade->size.width, CCV_INTER_CUBIC);
						ccv_matrix_free(sliced);
					} else {
						resized = sliced;
					}
					if (_ccv_scd_classifier_cascade_pass(cascade, resized))
						ccv_array_push(hard_negatives, resized);
					ccv_matrix_free(resized);
					if (hard_negatives->rnum >= negative_count)
						break;
				}
			}
			ccv_matrix_free(image);
			if (hard_negatives->rnum >= negative_count)
				break;
		}
	}
	FLUSH(CCV_CLI_INFO, " - hard mine negatives : %d\n", hard_negatives->rnum);
	ccv_make_array_immutable(hard_negatives);
	return hard_negatives;
}

typedef struct {
	ccv_function_state_reserve_field;
	int t, k;
	uint64_t array_signature;
	ccv_array_t* features;
	ccv_array_t* positives;
	ccv_array_t* negatives;
	double* s;
	double* pw;
	double* nw;
	float* fv; // feature vector for examples * feature
	double auc_prev;
	double accu_true_positive_rate;
	double accu_false_positive_rate;
	ccv_scd_classifier_cascade_t* cascade;
	ccv_scd_train_param_t params;
} ccv_scd_classifier_cascade_new_function_state_t;

static void _ccv_scd_classifier_cascade_new_function_state_read(const char* filename, ccv_scd_classifier_cascade_new_function_state_t* z)
{
	ccv_scd_classifier_cascade_t* cascade = ccv_scd_classifier_cascade_read(filename);
	if (!cascade)
		return;
	if (z->cascade)
		ccv_scd_classifier_cascade_free(z->cascade);
	z->cascade = cascade;
	assert(z->cascade->size.width == z->params.size.width);
	assert(z->cascade->size.height == z->params.size.height);
	sqlite3* db = 0;
	if (SQLITE_OK == sqlite3_open(filename, &db))
	{
		const char negative_data_qs[] =
			"SELECT data, rnum, rsize FROM negative_data WHERE id=0;";
		sqlite3_stmt* negative_data_stmt = 0;
		if (SQLITE_OK == sqlite3_prepare_v2(db, negative_data_qs, sizeof(negative_data_qs), &negative_data_stmt, 0))
		{
			if (sqlite3_step(negative_data_stmt) == SQLITE_ROW)
			{
				int rsize = ccv_compute_dense_matrix_size(z->cascade->size.height, z->cascade->size.width, CCV_8U | (z->params.grayscale ? CCV_C1 : CCV_C3));
				int rnum = sqlite3_column_int(negative_data_stmt, 1);
				assert(sqlite3_column_int(negative_data_stmt, 2) == rsize);
				size_t size = sqlite3_column_bytes(negative_data_stmt, 0);
				assert(size == (size_t)rsize * rnum);
				if (z->negatives)
					ccv_array_clear(z->negatives);
				else
					z->negatives = ccv_array_new(rsize, rnum, 0);
				int i;
				const uint8_t* data = (const uint8_t*)sqlite3_column_blob(negative_data_stmt, 0);
				for (i = 0; i < rnum; i++)
					ccv_array_push(z->negatives, data + (off_t)i * rsize);
				ccv_make_array_immutable(z->negatives);
				z->array_signature = z->negatives->sig;
			}
			sqlite3_finalize(negative_data_stmt);
		}
		const char function_state_qs[] =
			"SELECT t, k, positive_count, auc_prev, " // 4
			"accu_true_positive_rate, accu_false_positive_rate, " // 6
			"line_no, s, pw, nw FROM function_state WHERE fsid = 0;"; // 10
		sqlite3_stmt* function_state_stmt = 0;
		if (SQLITE_OK == sqlite3_prepare_v2(db, function_state_qs, sizeof(function_state_qs), &function_state_stmt, 0))
		{
			if (sqlite3_step(function_state_stmt) == SQLITE_ROW)
			{
				z->t = sqlite3_column_int(function_state_stmt, 0);
				z->k = sqlite3_column_int(function_state_stmt, 1);
				int positive_count = sqlite3_column_int(function_state_stmt, 2);
				assert(positive_count == z->positives->rnum);
				z->auc_prev = sqlite3_column_double(function_state_stmt, 3);
				z->accu_true_positive_rate = sqlite3_column_double(function_state_stmt, 4);
				z->accu_false_positive_rate = sqlite3_column_double(function_state_stmt, 5);
				z->line_no = sqlite3_column_int(function_state_stmt, 6);
				size_t size = sqlite3_column_bytes(function_state_stmt, 7);
				const void* s = sqlite3_column_blob(function_state_stmt, 7);
				memcpy(z->s, s, size);
				size = sqlite3_column_bytes(function_state_stmt, 8);
				const void* pw = sqlite3_column_blob(function_state_stmt, 8);
				memcpy(z->pw, pw, size);
				size = sqlite3_column_bytes(function_state_stmt, 9);
				const void* nw = sqlite3_column_blob(function_state_stmt, 9);
				memcpy(z->nw, nw, size);
			}
			sqlite3_finalize(function_state_stmt);
		}
		_ccv_scd_precompute_feature_vectors(z->features, z->positives, z->negatives, z->fv);
		sqlite3_close(db);
	}
}

static void _ccv_scd_classifier_cascade_new_function_state_write(ccv_scd_classifier_cascade_new_function_state_t* z, const char* filename)
{
	ccv_scd_classifier_cascade_write(z->cascade, filename);
	sqlite3* db = 0;
	if (SQLITE_OK == sqlite3_open(filename, &db))
	{
		const char function_state_create_table_qs[] =
			"CREATE TABLE IF NOT EXISTS function_state "
			"(fsid INTEGER PRIMARY KEY ASC, t INTEGER, k INTEGER, positive_count INTEGER, auc_prev DOUBLE, accu_true_positive_rate DOUBLE, accu_false_positive_rate DOUBLE, line_no INTEGER, s BLOB, pw BLOB, nw BLOB);"
			"CREATE TABLE IF NOT EXISTS negative_data "
			"(id INTEGER PRIMARY KEY ASC, data BLOB, rnum INTEGER, rsize INTEGER);";
		assert(SQLITE_OK == sqlite3_exec(db, function_state_create_table_qs, 0, 0, 0));
		const char function_state_insert_qs[] =
			"REPLACE INTO function_state "
			"(fsid, t, k, positive_count, auc_prev, accu_true_positive_rate, accu_false_positive_rate, line_no, s, pw, nw) VALUES "
			"(0, $t, $k, $positive_count, $auc_prev, $accu_true_positive_rate, $accu_false_positive_rate, $line_no, $s, $pw, $nw);";
		sqlite3_stmt* function_state_insert_stmt = 0;
		assert(SQLITE_OK == sqlite3_prepare_v2(db, function_state_insert_qs, sizeof(function_state_insert_qs), &function_state_insert_stmt, 0));
		sqlite3_bind_int(function_state_insert_stmt, 1, z->t);
		sqlite3_bind_int(function_state_insert_stmt, 2, z->k);
		sqlite3_bind_int(function_state_insert_stmt, 3, z->positives->rnum);
		sqlite3_bind_double(function_state_insert_stmt, 4, z->auc_prev);
		sqlite3_bind_double(function_state_insert_stmt, 5, z->accu_true_positive_rate);
		sqlite3_bind_double(function_state_insert_stmt, 6, z->accu_false_positive_rate);
		sqlite3_bind_int(function_state_insert_stmt, 7, z->line_no);
		sqlite3_bind_blob(function_state_insert_stmt, 8, z->s, sizeof(double) * (z->positives->rnum + z->negatives->rnum), SQLITE_STATIC);
		sqlite3_bind_blob(function_state_insert_stmt, 9, z->pw, sizeof(double) * z->positives->rnum, SQLITE_STATIC);
		sqlite3_bind_blob(function_state_insert_stmt, 10, z->nw, sizeof(double) * z->negatives->rnum, SQLITE_STATIC);
		assert(SQLITE_DONE == sqlite3_step(function_state_insert_stmt));
		sqlite3_finalize(function_state_insert_stmt);
		if (z->array_signature != z->negatives->sig)
		{
			const char negative_data_insert_qs[] =
				"REPLACE INTO negative_data "
				"(id, data, rnum, rsize) VALUES (0, $data, $rnum, $rsize);";
			sqlite3_stmt* negative_data_insert_stmt = 0;
			assert(SQLITE_OK == sqlite3_prepare_v2(db, negative_data_insert_qs, sizeof(negative_data_insert_qs), &negative_data_insert_stmt, 0));
			sqlite3_bind_blob(negative_data_insert_stmt, 1, z->negatives->data, z->negatives->rsize * z->negatives->rnum, SQLITE_STATIC);
			sqlite3_bind_int(negative_data_insert_stmt, 2, z->negatives->rnum);
			sqlite3_bind_int(negative_data_insert_stmt, 3, z->negatives->rsize);
			assert(SQLITE_DONE == sqlite3_step(negative_data_insert_stmt));
			sqlite3_finalize(negative_data_insert_stmt);
			z->array_signature = z->negatives->sig;
		}
		sqlite3_close(db);
	}
}
#endif

ccv_scd_classifier_cascade_t* ccv_scd_classifier_cascade_new(ccv_array_t* posfiles, ccv_array_t* hard_mine, int negative_count, const char* filename, ccv_scd_train_param_t params)
{
#ifdef HAVE_GSL
	assert(posfiles->rnum > 0);
	assert(hard_mine->rnum > 0);
	gsl_rng_env_setup();
	gsl_rng* rng = gsl_rng_alloc(gsl_rng_default);
	ccv_scd_classifier_cascade_new_function_state_t z = {0};
	z.features = _ccv_scd_stump_features(params.feature.base, params.feature.range_through, params.feature.step_through, params.size);
	PRINT(CCV_CLI_INFO, " - using %d features\n", z.features->rnum);
	int i, j, p, q;
	z.positives = _ccv_scd_collect_positives(params.size, posfiles, params.grayscale);
	double* h = (double*)ccmalloc(sizeof(double) * (z.positives->rnum + negative_count));
	z.s = (double*)ccmalloc(sizeof(double) * (z.positives->rnum + negative_count));
	assert(z.s);
	z.pw = (double*)ccmalloc(sizeof(double) * z.positives->rnum);
	assert(z.pw);
	z.nw = (double*)ccmalloc(sizeof(double) * negative_count);
	assert(z.nw);
	ccmemalign((void**)&z.fv, 16, sizeof(float) * (z.positives->rnum + negative_count) * z.features->rnum * 32);
	assert(z.fv);
	z.params = params;
	ccv_function_state_begin(_ccv_scd_classifier_cascade_new_function_state_read, z, filename);
	z.negatives = _ccv_scd_collect_negatives(rng, params.size, hard_mine, negative_count, params.grayscale);
	_ccv_scd_precompute_feature_vectors(z.features, z.positives, z.negatives, z.fv);
	z.cascade = (ccv_scd_classifier_cascade_t*)ccmalloc(sizeof(ccv_scd_classifier_cascade_t));
	z.cascade->margin = ccv_margin(0, 0, 0, 0);
	z.cascade->size = params.size;
	z.cascade->count = 0;
	z.cascade->classifiers = 0;
	z.accu_true_positive_rate = 1;
	z.accu_false_positive_rate = 1;
	ccv_function_state_resume(_ccv_scd_classifier_cascade_new_function_state_write, z, filename);
	for (z.t = 0; z.t < params.boosting; z.t++)
	{
		for (i = 0; i < z.positives->rnum; i++)
			z.pw[i] = 0.5 / z.positives->rnum;
		for (i = 0; i < z.negatives->rnum; i++)
			z.nw[i] = 0.5 / z.negatives->rnum;
		memset(z.s, 0, sizeof(double) * (z.positives->rnum + z.negatives->rnum));
		z.cascade->classifiers = (ccv_scd_stump_classifier_t*)ccrealloc(z.cascade->classifiers, sizeof(ccv_scd_stump_classifier_t) * (z.t + 1));
		z.cascade->count = z.t + 1;
		z.cascade->classifiers[z.t].threshold = 0;
		z.cascade->classifiers[z.t].features = 0;
		z.cascade->classifiers[z.t].count = 0;
		z.auc_prev = 0;
		assert(z.positives->rnum > 0 && z.negatives->rnum > 0);
		// for the first prune stages, we have more restrictive number of features (faster)
		for (z.k = 0; z.k < (z.t < params.stop_criteria.prune_stage ? params.stop_criteria.prune_feature : params.stop_criteria.maximum_feature); z.k++)
		{
			ccv_scd_stump_classifier_t* classifier = z.cascade->classifiers + z.t;
			classifier->features = (ccv_scd_stump_feature_t*)ccrealloc(classifier->features, sizeof(ccv_scd_stump_feature_t) * (z.k + 1));
			_ccv_scd_stump_feature_supervised_train(rng, z.features, z.positives->rnum, z.negatives->rnum, z.pw, z.nw, z.fv, params.C, params.weight_trimming);
			int best_feature_no = _ccv_scd_best_feature_gentle_adaboost(z.s, z.features, z.pw, z.nw, z.positives->rnum, z.negatives->rnum, z.fv);
			ccv_scd_stump_feature_t best_feature = *(ccv_scd_stump_feature_t*)ccv_array_get(z.features, best_feature_no);
			for (i = 0; i < z.positives->rnum + z.negatives->rnum; i++)
			{
				float* surf = _ccv_scd_get_surf_at(z.fv, best_feature_no, i, z.positives->rnum, z.negatives->rnum);
				float v = best_feature.bias;
				for (j = 0; j < 32; j++)
					v += best_feature.w[j] * surf[j];
				v = expf(v);
				h[i] = (v - 1) / (v + 1);
			}
			// compute the total score so far
			for (i = 0; i < z.positives->rnum + z.negatives->rnum; i++)
				z.s[i] += h[i];
			// compute AUC
			double auc = _ccv_scd_auc(z.s, z.positives->rnum, z.negatives->rnum);
			float true_positive_rate = 0;
			float false_positive_rate = 0;
			// compute true positive / false positive rate
			_ccv_scd_threshold_at_hit_rate(z.s, z.positives->rnum, z.negatives->rnum, params.stop_criteria.hit_rate, &true_positive_rate, &false_positive_rate);
			FLUSH(CCV_CLI_INFO, " - at %d-th iteration, auc: %lf, TP rate: %f, FP rate: %f\n", z.k + 1, auc, true_positive_rate, false_positive_rate);
			PRINT(CCV_CLI_INFO, " --- pick feature %s @ (%d, %d, %d, %d)\n", ((best_feature.dy[3] == best_feature.dy[0] ? "4x1" : (best_feature.dx[3] == best_feature.dx[0] ? "1x4" : "2x2"))), best_feature.sx[0], best_feature.sy[0], best_feature.dx[3], best_feature.dy[3]);
			classifier->features[z.k] = best_feature;
			classifier->count = z.k + 1;
			double auc_prev = z.auc_prev;
			z.auc_prev = auc;
			// auc stop to improve, as well as the false positive rate goal reach, at that point, we stop
			if (auc - auc_prev < params.stop_criteria.auc_crit && false_positive_rate < params.stop_criteria.false_positive_rate)
				break;
			// re-weight, with Gentle AdaBoost
			for (i = 0; i < z.positives->rnum; i++)
				z.pw[i] *= exp(-h[i]);
			for (i = 0; i < z.negatives->rnum; i++)
				z.nw[i] *= exp(h[i + z.positives->rnum]);
			// re-normalize
			double w = 0;
			for (i = 0; i < z.positives->rnum; i++)
				w += z.pw[i];
			w = 0.5 / w;
			for (i = 0; i < z.positives->rnum; i++)
				z.pw[i] *= w;
			w = 0;
			for (i = 0; i < z.negatives->rnum; i++)
				w += z.nw[i];
			w = 0.5 / w;
			for (i = 0; i < z.negatives->rnum; i++)
				z.nw[i] *= w;
			ccv_function_state_resume(_ccv_scd_classifier_cascade_new_function_state_write, z, filename);
		}
		// backtrack removal
		while (z.cascade->classifiers[z.t].count > 1)
		{
			double max_auc = 0;
			p = -1;
			for (i = 0; i < z.cascade->classifiers[z.t].count; i++)
			{
				ccv_scd_stump_feature_t* feature = z.cascade->classifiers[z.t].features + i;
				int k = _ccv_scd_find_match_feature(feature, z.features);
				assert(k >= 0);
				for (j = 0; j < z.positives->rnum + z.negatives->rnum; j++)
				{
					float* surf = _ccv_scd_get_surf_at(z.fv, k, j, z.positives->rnum, z.negatives->rnum);
					float v = feature->bias;
					for (q = 0; q < 32; q++)
						v += feature->w[q]* surf[q];
					v = expf(v);
					h[j] = z.s[j] - (v - 1) / (v + 1);
				}
				double auc = _ccv_scd_auc(h, z.positives->rnum, z.negatives->rnum);
				FLUSH(CCV_CLI_INFO, " - attempting without %d-th feature, auc: %lf", i + 1, auc);
				if (auc >= max_auc)
					max_auc = auc, p = i;
			}
			if (max_auc >= z.auc_prev)
			{
				FLUSH(CCV_CLI_INFO, " - remove %d-th feature with new auc %lf\n", p + 1, max_auc);
				ccv_scd_stump_feature_t* feature = z.cascade->classifiers[z.t].features + p;
				int k = _ccv_scd_find_match_feature(feature, z.features);
				assert(k >= 0);
				for (j = 0; j < z.positives->rnum + z.negatives->rnum; j++)
				{
					float* surf = _ccv_scd_get_surf_at(z.fv, k, j, z.positives->rnum, z.negatives->rnum);
					float v = feature->bias;
					for (q = 0; q < 32; q++)
						v += feature->w[q] * surf[q];
					v = expf(v);
					z.s[j] -= (v - 1) / (v + 1);
				}
				z.auc_prev = _ccv_scd_auc(z.s, z.positives->rnum, z.negatives->rnum);
				--z.cascade->classifiers[z.t].count;
				if (p < z.cascade->classifiers[z.t].count)
					memmove(z.cascade->classifiers[z.t].features + p + 1, z.cascade->classifiers[z.t].features + p, sizeof(ccv_scd_stump_feature_t) * (z.cascade->classifiers[z.t].count - p));
			} else
				break;
		}
		float true_positive_rate = 0;
		float false_positive_rate = 0;
		z.cascade->classifiers[z.t].threshold = _ccv_scd_threshold_at_hit_rate(z.s, z.positives->rnum, z.negatives->rnum, params.stop_criteria.hit_rate, &true_positive_rate, &false_positive_rate);
		z.accu_true_positive_rate *= true_positive_rate;
		z.accu_false_positive_rate *= false_positive_rate;
		FLUSH(CCV_CLI_INFO, " - %d-th stage classifier TP rate : %f, FP rate : %f, ATP rate : %lf, AFP rate : %lg, at threshold : %f\n", z.t + 1, true_positive_rate, false_positive_rate, z.accu_true_positive_rate, z.accu_false_positive_rate, z.cascade->classifiers[z.t].threshold);
		if (z.accu_false_positive_rate < params.stop_criteria.accu_false_positive_rate)
			break;
		ccv_function_state_resume(_ccv_scd_classifier_cascade_new_function_state_write, z, filename);
		if (z.t < params.boosting - 1)
		{
			int pass = 0;
			for (i = 0; i < z.positives->rnum; i++)
			{
				ccv_dense_matrix_t* a = (ccv_dense_matrix_t*)ccv_array_get(z.positives, i);
				a->data.u8 = (unsigned char*)(a + 1);
				if (_ccv_scd_classifier_cascade_pass(z.cascade, a))
					++pass;
			}
			PRINT(CCV_CLI_INFO, " - %d-th stage classifier TP rate (with pass) : %f\n", z.t + 1, (float)pass / z.positives->rnum);
			ccv_array_t* hard_negatives = _ccv_scd_hard_mining(rng, z.cascade, hard_mine, z.negatives, negative_count, params.grayscale, z.t < params.stop_criteria.prune_stage /* try to balance even distribution among negatives when we are in prune stage */);
			ccv_array_free(z.negatives);
			z.negatives = hard_negatives;
			_ccv_scd_precompute_feature_vectors(z.features, z.positives, z.negatives, z.fv);
		}
		ccv_function_state_resume(_ccv_scd_classifier_cascade_new_function_state_write, z, filename);
	}
	ccv_array_free(z.negatives);
	ccv_function_state_finish();
	ccv_array_free(z.features);
	ccv_array_free(z.positives);
	ccfree(h);
	ccfree(z.s);
	ccfree(z.pw);
	ccfree(z.nw);
	ccfree(z.fv);
	gsl_rng_free(rng);
	return z.cascade;
#else
	assert(0 && "ccv_scd_classifier_cascade_new requires GSL library and support");
	return 0;
#endif
}

void ccv_scd_classifier_cascade_write(ccv_scd_classifier_cascade_t* cascade, const char* filename)
{
	sqlite3* db = 0;
	if (SQLITE_OK == sqlite3_open(filename, &db))
	{
		const char create_table_qs[] =
			"CREATE TABLE IF NOT EXISTS cascade_params "
			"(id INTEGER PRIMARY KEY ASC, count INTEGER, "
			"margin_left INTEGER, margin_top INTEGER, margin_right INTEGER, margin_bottom INTEGER, "
			"size_width INTEGER, size_height INTEGER);"
			"CREATE TABLE IF NOT EXISTS classifier_params "
			"(classifier INTEGER PRIMARY KEY ASC, count INTEGER, threshold DOUBLE);"
			"CREATE TABLE IF NOT EXISTS feature_params "
			"(classifier INTEGER, id INTEGER, "
			"sx_0 INTEGER, sy_0 INTEGER, dx_0 INTEGER, dy_0 INTEGER, "
			"sx_1 INTEGER, sy_1 INTEGER, dx_1 INTEGER, dy_1 INTEGER, "
			"sx_2 INTEGER, sy_2 INTEGER, dx_2 INTEGER, dy_2 INTEGER, "
			"sx_3 INTEGER, sy_3 INTEGER, dx_3 INTEGER, dy_3 INTEGER, "
			"bias DOUBLE, w BLOB, UNIQUE (classifier, id));";
		assert(SQLITE_OK == sqlite3_exec(db, create_table_qs, 0, 0, 0));
		const char cascade_params_insert_qs[] = 
			"REPLACE INTO cascade_params "
			"(id, count, "
			"margin_left, margin_top, margin_right, margin_bottom, "
			"size_width, size_height) VALUES "
			"(0, $count, " // 0
			"$margin_left, $margin_top, $margin_bottom, $margin_right, " // 4
			"$size_width, $size_height);"; // 6
		sqlite3_stmt* cascade_params_insert_stmt = 0;
		assert(SQLITE_OK == sqlite3_prepare_v2(db, cascade_params_insert_qs, sizeof(cascade_params_insert_qs), &cascade_params_insert_stmt, 0));
		sqlite3_bind_int(cascade_params_insert_stmt, 1, cascade->count);
		sqlite3_bind_int(cascade_params_insert_stmt, 2, cascade->margin.left);
		sqlite3_bind_int(cascade_params_insert_stmt, 3, cascade->margin.top);
		sqlite3_bind_int(cascade_params_insert_stmt, 4, cascade->margin.right);
		sqlite3_bind_int(cascade_params_insert_stmt, 5, cascade->margin.bottom);
		sqlite3_bind_int(cascade_params_insert_stmt, 6, cascade->size.width);
		sqlite3_bind_int(cascade_params_insert_stmt, 7, cascade->size.height);
		assert(SQLITE_DONE == sqlite3_step(cascade_params_insert_stmt));
		sqlite3_finalize(cascade_params_insert_stmt);
		const char classifier_params_insert_qs[] = 
			"REPLACE INTO classifier_params "
			"(classifier, count, threshold) VALUES "
			"($classifier, $count, $threshold);";
		sqlite3_stmt* classifier_params_insert_stmt = 0;
		assert(SQLITE_OK == sqlite3_prepare_v2(db, classifier_params_insert_qs, sizeof(classifier_params_insert_qs), &classifier_params_insert_stmt, 0));
		const char feature_params_insert_qs[] =
			"REPLACE INTO feature_params "
			"(classifier, id, "
			"sx_0, sy_0, dx_0, dy_0, "
			"sx_1, sy_1, dx_1, dy_1, "
			"sx_2, sy_2, dx_2, dy_2, "
			"sx_3, sy_3, dx_3, dy_3, "
			"bias, w) VALUES "
			"($classifier, $id, " // 1
			"$sx_0, $sy_0, $dx_0, $dy_0, " // 5
			"$sx_1, $sy_1, $dx_1, $dy_1, " // 9
			"$sx_2, $sy_2, $dx_2, $dy_2, " // 13
			"$sx_3, $sy_3, $dx_3, $dy_3, " // 17
			"$bias, $w);"; // 19
		sqlite3_stmt* feature_params_insert_stmt = 0;
		assert(SQLITE_OK == sqlite3_prepare_v2(db, feature_params_insert_qs, sizeof(feature_params_insert_qs), &feature_params_insert_stmt, 0));
		int i, j, k;
		for (i = 0; i < cascade->count; i++)
		{
			ccv_scd_stump_classifier_t* classifier = cascade->classifiers + i;
			sqlite3_bind_int(classifier_params_insert_stmt, 1, i);
			sqlite3_bind_int(classifier_params_insert_stmt, 2, classifier->count);
			sqlite3_bind_double(classifier_params_insert_stmt, 3, classifier->threshold);
			assert(SQLITE_DONE == sqlite3_step(classifier_params_insert_stmt));
			sqlite3_reset(classifier_params_insert_stmt);
			sqlite3_clear_bindings(classifier_params_insert_stmt);
			for (j = 0; j < classifier->count; j++)
			{
				ccv_scd_stump_feature_t* feature = classifier->features + j;
				sqlite3_bind_int(feature_params_insert_stmt, 1, i);
				sqlite3_bind_int(feature_params_insert_stmt, 2, j);
				for (k = 0; k < 4; k++)
				{
					sqlite3_bind_int(feature_params_insert_stmt, 3 + k * 4, feature->sx[k]);
					sqlite3_bind_int(feature_params_insert_stmt, 4 + k * 4, feature->sy[k]);
					sqlite3_bind_int(feature_params_insert_stmt, 5 + k * 4, feature->dx[k]);
					sqlite3_bind_int(feature_params_insert_stmt, 6 + k * 4, feature->dy[k]);
				}
				sqlite3_bind_double(feature_params_insert_stmt, 19, feature->bias);
				sqlite3_bind_blob(feature_params_insert_stmt, 20, feature->w, sizeof(float) * 32, SQLITE_STATIC);
				assert(SQLITE_DONE == sqlite3_step(feature_params_insert_stmt));
				sqlite3_reset(feature_params_insert_stmt);
				sqlite3_clear_bindings(feature_params_insert_stmt);
			}
		}
		sqlite3_finalize(classifier_params_insert_stmt);
		sqlite3_finalize(feature_params_insert_stmt);
		sqlite3_close(db);
	}
}

ccv_scd_classifier_cascade_t* ccv_scd_classifier_cascade_read(const char* filename)
{
	int i;
	sqlite3* db = 0;
	ccv_scd_classifier_cascade_t* cascade = 0;
	if (SQLITE_OK == sqlite3_open(filename, &db))
	{
		const char cascade_params_qs[] =
			"SELECT count, " // 1
			"margin_left, margin_top, margin_right, margin_bottom, " // 5
			"size_width, size_height FROM cascade_params WHERE id = 0;"; // 7
		sqlite3_stmt* cascade_params_stmt = 0;
		if (SQLITE_OK == sqlite3_prepare_v2(db, cascade_params_qs, sizeof(cascade_params_qs), &cascade_params_stmt, 0))
		{
			if (sqlite3_step(cascade_params_stmt) == SQLITE_ROW)
			{
				cascade = (ccv_scd_classifier_cascade_t*)ccmalloc(sizeof(ccv_scd_classifier_cascade_t));
				cascade->count = sqlite3_column_int(cascade_params_stmt, 0);
				cascade->classifiers = (ccv_scd_stump_classifier_t*)cccalloc(cascade->count, sizeof(ccv_scd_stump_classifier_t));
				cascade->margin = ccv_margin(sqlite3_column_int(cascade_params_stmt, 1), sqlite3_column_int(cascade_params_stmt, 2), sqlite3_column_int(cascade_params_stmt, 3), sqlite3_column_int(cascade_params_stmt, 4));
				cascade->size = ccv_size(sqlite3_column_int(cascade_params_stmt, 5), sqlite3_column_int(cascade_params_stmt, 6));
			}
			sqlite3_finalize(cascade_params_stmt);
		}
		if (cascade)
		{
			const char classifier_params_qs[] =
				"SELECT classifier, count, threshold FROM classifier_params ORDER BY classifier ASC;";
			sqlite3_stmt* classifier_params_stmt = 0;
			if (SQLITE_OK == sqlite3_prepare_v2(db, classifier_params_qs, sizeof(classifier_params_qs), &classifier_params_stmt, 0))
			{
				while (sqlite3_step(classifier_params_stmt) == SQLITE_ROW)
					if (sqlite3_column_int(classifier_params_stmt, 0) < cascade->count)
					{
						ccv_scd_stump_classifier_t* classifier = cascade->classifiers + sqlite3_column_int(classifier_params_stmt, 0);
						classifier->count = sqlite3_column_int(classifier_params_stmt, 1);
						classifier->features = (ccv_scd_stump_feature_t*)ccmalloc(sizeof(ccv_scd_stump_feature_t) * classifier->count);
						classifier->threshold = (float)sqlite3_column_double(classifier_params_stmt, 2);
					}
				sqlite3_finalize(classifier_params_stmt);
			}
			const char feature_params_qs[] =
				"SELECT classifier, id, "
				"sx_0, sy_0, dx_0, dy_0, "
				"sx_1, sy_1, dx_1, dy_1, "
				"sx_2, sy_2, dx_2, dy_2, "
				"sx_3, sy_3, dx_3, dy_3, "
				"bias, w FROM feature_params ORDER BY classifier, id ASC;";
			sqlite3_stmt* feature_params_stmt = 0;
			if (SQLITE_OK == sqlite3_prepare_v2(db, feature_params_qs, sizeof(feature_params_qs), &feature_params_stmt, 0))
			{
				while (sqlite3_step(feature_params_stmt) == SQLITE_ROW)
					if (sqlite3_column_int(feature_params_stmt, 0) < cascade->count)
					{
						ccv_scd_stump_classifier_t* classifier = cascade->classifiers + sqlite3_column_int(feature_params_stmt, 0);
						if (sqlite3_column_int(feature_params_stmt, 1) < classifier->count)
						{
							ccv_scd_stump_feature_t* feature = classifier->features + sqlite3_column_int(feature_params_stmt, 1);
							for (i = 0; i < 4; i++)
							{
								feature->sx[i] = sqlite3_column_int(feature_params_stmt, 2 + i * 4);
								feature->sy[i] = sqlite3_column_int(feature_params_stmt, 3 + i * 4);
								feature->dx[i] = sqlite3_column_int(feature_params_stmt, 4 + i * 4);
								feature->dy[i] = sqlite3_column_int(feature_params_stmt, 5 + i * 4);
							}
							feature->bias = (float)sqlite3_column_double(feature_params_stmt, 18);
							int wnum = sqlite3_column_bytes(feature_params_stmt, 19);
							assert(wnum == 32 * sizeof(float));
							const void* w = sqlite3_column_blob(feature_params_stmt, 19);
							memcpy(feature->w, w, sizeof(float) * 32);
						}
					}
				sqlite3_finalize(feature_params_stmt);
			}
		}
		sqlite3_close(db);
	}
	return cascade;
}

void ccv_scd_classifier_cascade_free(ccv_scd_classifier_cascade_t* cascade)
{
	int i;
	for (i = 0; i < cascade->count; i++)
	{
		ccv_scd_stump_classifier_t* classifier = cascade->classifiers + i;
		ccfree(classifier->features);
	}
	ccfree(cascade->classifiers);
	ccfree(cascade);
}

static int _ccv_is_equal_same_class(const void* _r1, const void* _r2, void* data)
{
	const ccv_comp_t* r1 = (const ccv_comp_t*)_r1;
	const ccv_comp_t* r2 = (const ccv_comp_t*)_r2;

	if (r2->classification.id != r1->classification.id)
		return 0;

	int i = ccv_max(ccv_min(r2->rect.x + r2->rect.width, r1->rect.x + r1->rect.width) - ccv_max(r2->rect.x, r1->rect.x), 0) * ccv_max(ccv_min(r2->rect.y + r2->rect.height, r1->rect.y + r1->rect.height) - ccv_max(r2->rect.y, r1->rect.y), 0);
	int m = ccv_min(r2->rect.width * r2->rect.height, r1->rect.width * r1->rect.height);

	return i >= 0.3 * m; // IoM > 0.3 like HeadHunter does
}

ccv_array_t* ccv_scd_detect_objects(ccv_dense_matrix_t* a, ccv_scd_classifier_cascade_t** cascades, int count, ccv_scd_param_t params)
{
	int i, j, k, x, y, p, q;
	int scale_upto = 1;
	float up_ratio = 1.0;
	for (i = 0; i < count; i++)
		up_ratio = ccv_max(up_ratio, ccv_max((float)cascades[i]->size.width / params.size.width, (float)cascades[i]->size.height / params.size.height));
	if (up_ratio - 1.0 > 1e-4)
	{
		ccv_dense_matrix_t* resized = 0;
		ccv_resample(a, &resized, 0, (int)(a->rows * up_ratio + 0.5), (int)(a->cols * up_ratio + 0.5), CCV_INTER_CUBIC);
		a = resized;
	}
	for (i = 0; i < count; i++)
		scale_upto = ccv_max(scale_upto, (int)(log(ccv_min((double)a->rows / (cascades[i]->size.height - cascades[i]->margin.top - cascades[i]->margin.bottom), (double)a->cols / (cascades[i]->size.width - cascades[i]->margin.left - cascades[i]->margin.right))) / log(2.) - DBL_MIN) + 1);
	ccv_dense_matrix_t** pyr = (ccv_dense_matrix_t**)alloca(sizeof(ccv_dense_matrix_t*) * scale_upto);
	pyr[0] = a;
	for (i = 1; i < scale_upto; i++)
	{
		pyr[i] = 0;
		ccv_sample_down(pyr[i - 1], &pyr[i], 0, 0, 0);
	}
#if defined(HAVE_SSE2)
	__m128 surf[8];
#else
	float surf[32];
#endif
	ccv_array_t** seq = (ccv_array_t**)alloca(sizeof(ccv_array_t*) * count);
	for (i = 0; i < count; i++)
		seq[i] = ccv_array_new(sizeof(ccv_comp_t), 64, 0);
	for (i = 0; i < scale_upto; i++)
	{
		// run it
		for (j = 0; j < count; j++)
		{
			double scale_ratio = pow(2., 1. / (params.interval + 1));
			double scale = 1;
			ccv_scd_classifier_cascade_t* cascade = cascades[j];
			for (k = 0; k <= params.interval; k++)
			{
				int rows = (int)(pyr[i]->rows / scale + 0.5);
				int cols = (int)(pyr[i]->cols / scale + 0.5);
				if (rows < cascade->size.height || cols < cascade->size.width)
					break;
				ccv_dense_matrix_t* image = k == 0 ? pyr[i] : 0;
				if (k > 0)
					ccv_resample(pyr[i], &image, 0, rows, cols, CCV_INTER_AREA);
				ccv_dense_matrix_t* scd = 0;
				if (cascade->margin.left == 0 && cascade->margin.top == 0 && cascade->margin.right == 0 && cascade->margin.bottom == 0)
				{
					ccv_scd(image, &scd, 0);
					if (k > 0)
						ccv_matrix_free(image);
				} else {
					ccv_dense_matrix_t* bordered = 0;
					ccv_border(image, (ccv_matrix_t**)&bordered, 0, cascade->margin);
					if (k > 0)
						ccv_matrix_free(image);
					ccv_scd(bordered, &scd, 0);
					ccv_matrix_free(bordered);
				}
				ccv_dense_matrix_t* sat = 0;
				ccv_sat(scd, &sat, 0, CCV_PADDING_ZERO);
				assert(CCV_GET_CHANNEL(sat->type) == CCV_SCD_CHANNEL);
				ccv_matrix_free(scd);
				float* ptr = sat->data.f32;
				for (y = 0; y < rows; y += params.step_through)
				{
					if (y >= sat->rows - cascade->size.height - 1)
						break;
					for (x = 0; x < cols; x += params.step_through)
					{
						if (x >= sat->cols - cascade->size.width - 1)
							break;
						int pass = 1;
						float sum = 0;
						for (p = 0; p < cascade->count; p++)
						{
							ccv_scd_stump_classifier_t* classifier = cascade->classifiers + p;
							float v = 0;
							for (q = 0; q < classifier->count; q++)
							{
								ccv_scd_stump_feature_t* feature = classifier->features + q;
#if defined(HAVE_SSE2)
								_ccv_scd_run_feature_at_sse2(ptr + x * CCV_SCD_CHANNEL, sat->cols, feature, surf);
								__m128 u0 = _mm_add_ps(_mm_mul_ps(surf[0], _mm_loadu_ps(feature->w)), _mm_mul_ps(surf[1], _mm_loadu_ps(feature->w + 4)));
								__m128 u1 = _mm_add_ps(_mm_mul_ps(surf[2], _mm_loadu_ps(feature->w + 8)), _mm_mul_ps(surf[3], _mm_loadu_ps(feature->w + 12)));
								__m128 u2 = _mm_add_ps(_mm_mul_ps(surf[4], _mm_loadu_ps(feature->w + 16)), _mm_mul_ps(surf[5], _mm_loadu_ps(feature->w + 20)));
								__m128 u3 = _mm_add_ps(_mm_mul_ps(surf[6], _mm_loadu_ps(feature->w + 24)), _mm_mul_ps(surf[7], _mm_loadu_ps(feature->w + 28)));
								u0 = _mm_add_ps(u0, u1);
								u2 = _mm_add_ps(u2, u3);
								union {
									float f[4];
									__m128 p;
								} ux;
								ux.p = _mm_add_ps(u0, u2);
								float u = expf(feature->bias + ux.f[0] + ux.f[1] + ux.f[2] + ux.f[3]);
#else
								_ccv_scd_run_feature_at(ptr + x * CCV_SCD_CHANNEL, sat->cols, feature, surf);
								float u = feature->bias;
								int r;
								for (r = 0; r < 32; r++)
									u += surf[r] * feature->w[r];
								u = expf(u);
#endif
								v += (u - 1) / (u + 1);
							}
							if (v <= classifier->threshold)
							{
								pass = 0;
								break;
							}
							sum = v / classifier->count;
						}
						if (pass)
						{
							ccv_comp_t comp;
							comp.rect = ccv_rect((int)((x + 0.5) * (scale / up_ratio) * (1 << i) - 0.5),
												 (int)((y + 0.5) * (scale / up_ratio) * (1 << i) - 0.5),
												 (cascade->size.width - cascade->margin.left - cascade->margin.right) * (scale / up_ratio) * (1 << i),
												 (cascade->size.height - cascade->margin.top - cascade->margin.bottom) * (scale / up_ratio) * (1 << i));
							comp.neighbors = 1;
							comp.classification.id = j + 1;
							comp.classification.confidence = sum + (cascade->count - 1);
							ccv_array_push(seq[j], &comp);
						}
					}
					ptr += sat->cols * CCV_SCD_CHANNEL * params.step_through;
				}
				ccv_matrix_free(sat);
				scale *= scale_ratio;
			}
		}
	}

	for (i = 1; i < scale_upto; i++)
		ccv_matrix_free(pyr[i]);
	if (up_ratio - 1.0 > 1e-4)
		ccv_matrix_free(a);

	ccv_array_t* result_seq = ccv_array_new(sizeof(ccv_comp_t), 64, 0);
	for (k = 0; k < count; k++)
	{
		/* simple non-maximum suppression, we merge when intersected area / min area > 0.3  */
		if(params.min_neighbors == 0)
		{
			for (i = 0; i < seq[k]->rnum; i++)
			{
				ccv_comp_t* comp = (ccv_comp_t*)ccv_array_get(seq[k], i);
				ccv_array_push(result_seq, comp);
			}
		} else {
			ccv_array_t* idx_seq = 0;
			// group retrieved rectangles in order to filter out noise
			int ncomp = ccv_array_group(seq[k], &idx_seq, _ccv_is_equal_same_class, 0);
			ccv_comp_t* comps = (ccv_comp_t*)cccalloc(ncomp + 1, sizeof(ccv_comp_t));

			// count number of neighbors
			for (i = 0; i < seq[k]->rnum; i++)
			{
				ccv_comp_t r1 = *(ccv_comp_t*)ccv_array_get(seq[k], i);
				int idx = *(int*)ccv_array_get(idx_seq, i);

				comps[idx].classification.id = r1.classification.id;
				if (r1.classification.confidence > comps[idx].classification.confidence || comps[idx].neighbors == 0)
				{
					comps[idx].rect = r1.rect;
					comps[idx].classification.confidence = r1.classification.confidence;
				}

				++comps[idx].neighbors;
			}

			// push merged bounding box to result_seq
			for (i = 0; i < ncomp; i++)
			{
				int n = comps[i].neighbors;
				if (n >= params.min_neighbors)
					ccv_array_push(result_seq, comps + i);
			}
			ccv_array_free(idx_seq);
			ccfree(comps);
		}
		ccv_array_free(seq[k]);
	}

	return result_seq;
}