编译caffe2
看一下caffe2的CMakeLists.txt,其中很多模块都是服务端c++ inference时候用不到的
在默认配置的基础上,其他用不到的模块有
- BUILD_PYTHON OFF
- BUILD_TEST OFF
- USE_CUDA OFF
- USE_LEVELDB OFF
- USE_LMDB OFF
- USE_METAL OFF
- USE_MOBILE_OPENGL OFF
- USE_MPI OFF
- USE_NCCL OFF
- USE_NNPACK OFF
- USE_OPENCV OFF
- USE_OPENMP ON (很影响inference的性能)
inference工程
CMake配置
CMakeLists.txt需要注意的几个点
add_definitions(-DCAFFE2_USE_GOOGLE_GLOG)
add_definitions(-DCAFFE2_USE_GFLAGS)
set(CMAKE_SKIP_BUILD_RPATH FALSE)
set(CMAKE_INSTALL_RPATH $ORIGIN)
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH FALSE)
set(CMAKE_BUILD_TYPE Release)
include
inference会用到的caffe2的几个头文件都在core\proto\utils
几个文件夹中
#include <iostream>
#include <string>
#include <vector>
#include <memory>
#include <opencv2/opencv.hpp>
#include <caffe2/core/init.h>
#include <caffe2/core/predictor.h>
#include <caffe2/utils/proto_utils.h>
using namespace std;
using namespace cv;
using namespace caffe2;
class Caffe2Net {
public:
Caffe2Net(string initNet,string predictNet);
vector<float> predict(Mat img);
protected:
TensorCPU preProcess(Mat img);
vector<float> postProcess(TensorCPU output);
Workspace workspace;
unique_ptr<NetBase> predict_net;
};
net init
Caffe2Net::Caffe2Net(string initNet, string predictNet)
:workspace(nullptr)
{
#ifdef WITH_CUDA
DeviceOption option;
option.set_device_type(CUDA);
new CUDAContext(option);
#endif
NetDef init_net_def, predict_net_def;
CAFFE_ENFORCE(ReadProtoFromFile(initNet, &init_net_def));
CAFFE_ENFORCE(ReadProtoFromFile(predictNet, &predict_net_def));
#ifdef WITH_CUDA
init_net_def.mutable_device_option()->set_device_type(CUDA);
predict_net_def.mutable_device_option()->set_device_type(CUDA);
#else
init_net_def.mutable_device_option()->set_device_type(CPU);
predict_net_def.mutable_device_option()->set_device_type(CPU);
#endif
workspace.RunNetOnce(init_net_def);
predict_net = CreateNet(predict_net_def,&workspace);
}
image preprocess
TensorCPU Caffe2Net::preProcess(Mat img) {
// resize and crop
cv::resize(image, image, cv::Size(256, 256));
image = image(cv::Rect(16,16,224,224));
// convert to float, normalize to [-1,1]
image.convertTo(image, CV_32FC3, 1.0, -128);
image = image*0.0078125
// convert NHWC to NCHW
vector<cv::Mat> channels(3);
cv::split(image, channels);
std::vector<float> data;
for (auto &c : channels) {
data.insert(data.end(), (float *)c.datastart, (float *)c.dataend);
}
std::vector<TIndex> dims({1, 3, 224, 224});
return TensorCPU(dims, data, NULL);
}
net inference
vector<float> Caffe2Net::predict(Mat img)
{
//create input blob
#ifdef WITH_CUDA
TensorCUDA input = TensorCUDA(preProcess(img));
auto tensor = workspace.CreateBlob("data")->GetMutable<TensorCUDA>();
#else
TensorCPU input = preProcess(img);
auto tensor = workspace.CreateBlob("data")->GetMutable<TensorCPU>();
#endif
tensor->ResizeLike(input);
tensor->ShareData(input);
//predict
predict_net->Run();
//get output blob
#ifdef WITH_CUDA
TensorCPU output = TensorCPU(workspace.GetBlob("fc1")->Get<TensorCUDA>());
#else
TensorCPU output = TensorCPU(workspace.GetBlob("fc1")->Get<TensorCPU>());
#endif
return postProcess(output);
}
vector<float> Caffe2Net::postProcess(TensorCPU output)
{
const float * probs = output.data<float>();
vector<TIndex> dims = output.dims();
assert(2 == output.ndim());
assert(1 == dims[0]);
assert(512 == dims[1]);
vector<float> retVal(dims[1]);
copy(probs, probs+dims[1], retVal.begin());
return retVal;
}
destruction
google::protobuf::ShutdownProtobufLibrary();
tips
修改spatialBN源码支持fc layer
原生的caffe2只支持卷积层的batch norm操作,有点尴尬,github上也有人遇到过这个问题https://github.com/caffe2/caffe2/issues/865,给出的方案也很姜,将fc层reshape到4通道,最后两通道设为1,再做spatial_bn,但这种实现不太优美,看了caffe2的spatial_bn的实现后,通道的限制其实没有必要,所以修改源码
vim caffe2/operators/spatial_batch_norm_op.cc
//CAFFE_ENFORCE(X.ndim() >= 3 && X.ndim() <= 5);
CAFFE_ENFORCE(X.ndim() >= 2 && X.ndim() <= 5);
const int N = X.dim32(0);
const int C =
(order_ == StorageOrder::NCHW ? X.dim32(1) : X.dim32(X.ndim() - 1));
//const int H = (order_ == StorageOrder::NCHW ? X.dim32(2) : X.dim32(1));
const int H = X.ndim() > 2
? (order_ == StorageOrder::NCHW ? X.dim32(2) : X.dim32(1))
: 1;