对DeepLearning最初的印象是,大量的训练样本+机器学习,也就是说原来传统的机器学习会遇到的问题,不能解决的问题,换成DeepLearning同样解决不了。比如目标识别中因为光照变化,目标被遮挡,目标的几何变化造成的识别率大幅下降,在DeepLearning中同样也不能很好解决。但是不是说DeepLearning就一无事处,最近几年这么热也决不是因为名字取得好。DeepLearning比较明显的优势就是在特征选择上,想想之前做生物特征识别时,各种找特征,还得考虑什么光照不变,旋转不变,抗尺寸变换,抗遮挡,那叫一个累呀。现在可好啦,一个Convolution Layer,再配Fully Connected Layer,最后来个Softmax,丢一堆带标签的样本进去自动给你找出特征。当然这个只是一个接触DeepLearning不到一个月的小白的肤浅认识,大家听听就好。
1. 数据准备
2. 模型建立
3. 模型训练
参考caffe2官网给的AICamera例子,建立Android Studio工程(github工程地址:https://github.com/lyapple2008/MNIST_CNN_APP ),其中最主要的代码如下所示
| void loadToNetDef(AAssetManager *mgr, caffe2::NetDef *net, const char *filename) { AAsset *asset = AAssetManager_open(mgr, filename, AASSET_MODE_BUFFER); assert(asset != nullptr); const void *data = AAsset_getBuffer(asset); assert(data != nullptr); off_t len = AAsset_getLength(asset); assert(len != 0); if (!net->ParseFromArray(data, len)) { alog("Couldn't parse net from data.\n"); } AAsset_close(asset); }
extern "C" void Java_com_example_beyoung_handwrittendigit_MainActivity_initCaffe2( JNIEnv *env, jobject, jobject assetManager) { AAssetManager *mgr = AAssetManager_fromJava(env, assetManager); alog("Attempting to load protobuf netdefs..."); loadToNetDef(mgr, &_initNet, "mnist/init_net.pb"); loadToNetDef(mgr, &_predictNet, "mnist/predict_net.pb"); alog("done."); alog("Instantiating predictor..."); _predictor = new caffe2::Predictor(_initNet, _predictNet); if (_predictor) { alog("done..."); } else { alog("fail to instantiat predictor..."); } }
extern "C" JNIEXPORT jstring JNICALL Java_com_example_beyoung_handwrittendigit_MainActivity_recognitionFromCaffe2( JNIEnv *env, jobject, jint h, jint w, jintArray data) { if (!_predictor) { return env->NewStringUTF("Loading..."); }
jsize len = env->GetArrayLength(data); jint *img_data = env->GetIntArrayElements(data, 0); jint img_size = h * w; assert(img_size <= INPUT_DATA_SIZE);
for (auto i = 0; i < h; ++i) { std::ostringstream stringStream; for (auto j = 0; j < w; ++j) { int color = img_data[i * w + j]; float grey = 0.0; if (color != 0) { grey = 1.0; } input_data[i * w + j] = grey; if (color != 0) { color = 1; } stringStream << color << " "; } alog("%s", stringStream.str().c_str()); }
caffe2::TensorCPU input; input.Resize(std::vector<int>({1, IMG_C, IMG_H, IMG_W})); memcpy(input.mutable_data<float>(), input_data, INPUT_DATA_SIZE * sizeof(float)); caffe2::Predictor::TensorVector input_vec{&input}; caffe2::Predictor::TensorVector output_vec; _predictor->run(input_vec, &output_vec);
constexpr int k = 3; float max[k] = {0}; int max_index[k] = {0}; if (output_vec.capacity() > 0) { for (auto output : output_vec) { for (auto i = 0; i < output->size(); ++i) { for (auto j = 0; j < k; ++j) { if (output->template data<float>()[i] > max[j]) { for (auto _j = k - 1; _j > j; --_j) { max[_j - 1] = max[_j]; max_index[_j - 1] = max_index[_j]; } max[j] = output->template data<float>()[i]; max_index[j] = i; goto skip; } } skip:; } } }
std::ostringstream stringStream; for (auto j = 0; j < k; ++j) { stringStream << max_index[j] << ": " << max[j]*100 << "%\n"; }
return env->NewStringUTF(stringStream.str().c_str()); }