caffe2学习(三)推理预测C++

编译caffe2

看一下caffe2的CMakeLists.txt,其中很多模块都是服务端c++ inference时候用不到的

在默认配置的基础上,其他用不到的模块有

  • BUILD_PYTHON OFF
  • BUILD_TEST OFF
  • USE_CUDA OFF
  • USE_LEVELDB OFF
  • USE_LMDB OFF
  • USE_METAL OFF
  • USE_MOBILE_OPENGL OFF
  • USE_MPI OFF
  • USE_NCCL OFF
  • USE_NNPACK OFF
  • USE_OPENCV OFF
  • USE_OPENMP ON (很影响inference的性能)

inference工程

CMake配置

CMakeLists.txt需要注意的几个点

add_definitions(-DCAFFE2_USE_GOOGLE_GLOG)
add_definitions(-DCAFFE2_USE_GFLAGS)

set(CMAKE_SKIP_BUILD_RPATH FALSE)
set(CMAKE_INSTALL_RPATH $ORIGIN)
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH FALSE)
set(CMAKE_BUILD_TYPE Release)

include

inference会用到的caffe2的几个头文件都在core\proto\utils几个文件夹中

#include <iostream>
#include <string>
#include <vector>
#include <memory>
#include <opencv2/opencv.hpp>
#include <caffe2/core/init.h>
#include <caffe2/core/predictor.h>
#include <caffe2/utils/proto_utils.h>

using namespace std;
using namespace cv;
using namespace caffe2;

class Caffe2Net {
public:
    Caffe2Net(string initNet,string predictNet);
    vector<float> predict(Mat img);
protected:
    TensorCPU preProcess(Mat img);
    vector<float> postProcess(TensorCPU output);

    Workspace workspace;
    unique_ptr<NetBase> predict_net;
};

net init

Caffe2Net::Caffe2Net(string initNet, string predictNet)
:workspace(nullptr)
{
#ifdef WITH_CUDA
    DeviceOption option;
    option.set_device_type(CUDA);
    new CUDAContext(option);
#endif
    NetDef init_net_def, predict_net_def;
    CAFFE_ENFORCE(ReadProtoFromFile(initNet, &init_net_def));
    CAFFE_ENFORCE(ReadProtoFromFile(predictNet, &predict_net_def));
#ifdef WITH_CUDA
    init_net_def.mutable_device_option()->set_device_type(CUDA);
    predict_net_def.mutable_device_option()->set_device_type(CUDA);
#else
    init_net_def.mutable_device_option()->set_device_type(CPU);
    predict_net_def.mutable_device_option()->set_device_type(CPU);    
#endif
    workspace.RunNetOnce(init_net_def);
    predict_net = CreateNet(predict_net_def,&workspace);
}

image preprocess

TensorCPU Caffe2Net::preProcess(Mat img) {
    // resize and crop
    cv::resize(image, image, cv::Size(256, 256));
    image = image(cv::Rect(16,16,224,224));
    // convert to float, normalize to [-1,1]
    image.convertTo(image, CV_32FC3, 1.0, -128);
    image = image*0.0078125
    // convert NHWC to NCHW
    vector<cv::Mat> channels(3);
    cv::split(image, channels);
    std::vector<float> data;
    for (auto &c : channels) {
        data.insert(data.end(), (float *)c.datastart, (float *)c.dataend);
    }
    std::vector<TIndex> dims({1, 3, 224, 224});
    return TensorCPU(dims, data, NULL);
}

net inference

vector<float> Caffe2Net::predict(Mat img)
{
    //create input blob
#ifdef WITH_CUDA
    TensorCUDA input = TensorCUDA(preProcess(img));
    auto tensor = workspace.CreateBlob("data")->GetMutable<TensorCUDA>();
#else
    TensorCPU input = preProcess(img);
    auto tensor = workspace.CreateBlob("data")->GetMutable<TensorCPU>();
#endif
    tensor->ResizeLike(input);
    tensor->ShareData(input);
    //predict
    predict_net->Run();
    //get output blob
#ifdef WITH_CUDA
    TensorCPU output = TensorCPU(workspace.GetBlob("fc1")->Get<TensorCUDA>());
#else
    TensorCPU output = TensorCPU(workspace.GetBlob("fc1")->Get<TensorCPU>());
#endif
    return postProcess(output);
}

vector<float> Caffe2Net::postProcess(TensorCPU output)
{
    const float * probs = output.data<float>();
    vector<TIndex> dims = output.dims();
    assert(2 == output.ndim());
    assert(1 == dims[0]);
    assert(512 == dims[1]);
    vector<float> retVal(dims[1]);
    copy(probs, probs+dims[1], retVal.begin());
    return retVal;
}

destruction

google::protobuf::ShutdownProtobufLibrary();

tips

修改spatialBN源码支持fc layer

原生的caffe2只支持卷积层的batch norm操作,有点尴尬,github上也有人遇到过这个问题https://github.com/caffe2/caffe2/issues/865,给出的方案也很姜,将fc层reshape到4通道,最后两通道设为1,再做spatial_bn,但这种实现不太优美,看了caffe2的spatial_bn的实现后,通道的限制其实没有必要,所以修改源码

vim caffe2/operators/spatial_batch_norm_op.cc
//CAFFE_ENFORCE(X.ndim() >= 3 && X.ndim() <= 5);
CAFFE_ENFORCE(X.ndim() >= 2 && X.ndim() <= 5);
const int N = X.dim32(0);
const int C =
    (order_ == StorageOrder::NCHW ? X.dim32(1) : X.dim32(X.ndim() - 1));
//const int H = (order_ == StorageOrder::NCHW ? X.dim32(2) : X.dim32(1));
const int H = X.ndim() > 2
    ? (order_ == StorageOrder::NCHW ? X.dim32(2) : X.dim32(1))
    : 1;