よーめー日記: Mask R-CNNをやってみた

NewralNetwork Runtimeってとっても便利ですね。

おじさん、NewralNetwork Runtimeを使って、前からやりたかったMask R-CNNをやってみました。

Mask R-CNNって写真の中にある物の、位置、名前、マスク値がわかります。

マスク値がわかると何がよいかっていうと、「物の向き」や「道路の走れる領域」とかもマスク値からわかります。

あと物までの距離（深度）がわかれば、カメラの映像だけで自動運転をやる基礎技術がだいたいそろったことになるのですが、それは先月末にGoogleが画期的なアルゴリズムを見つけたようです。

https://ai.googleblog.com/2019/05/moving-camera-moving-people-deep.html

これもデータが手に入り次第試してみたいと思います。

なので、本日はMask R-CNNをやってみます。

まず、いつものようにどっかのサイトを調べます。

https://www.learnopencv.com/mask-r-cnn-instance-segmentation-with-pytorch/

次に、先日作ったいつもの最小のNewralNetwork Runtimeを取ってきます。

https://drive.google.com/file/d/1P4VC3_gZXwANeL0yup-5Ni56IFyvs_VM/view?usp=sharingminoipencv

こんな感じでコードを書けばよさそう。

---------------------------------------

#include<stdio.h>

#include<iostream>

#include<fstream>

#include<string>

#include<vector>

#define STB_IMAGE_IMPLEMENTATION

#define STB_IMAGE_WRITE_IMPLEMENTATION

#include "opencv2_core.hpp"

#include "opencv2_imgproc.hpp"

#include "opencv2_imgproc_imgproc_c.h"

#include "opencv2_dnn.hpp"

#include "stb_image.h"

#include "stb_image_write.h"

void changeb_g(unsigned char* p, int x, int y, int c)

{

int ct = x*y;

int i;

unsigned char t;

for (i = 0; i < ct; i++) {

t = p[0];

p[0] = p[2];

p[2] = t;

p[3] = 255;

p += c;

}

std::vector<std::string> classes;

std::vector<cv::Scalar> colors;

float confThreshold = 0.5; // Confidence threshold

float maskThreshold = 0.3; // Mask threshold

void label_init()

{

// Load names of classes

std::string classesFile = "mscoco_labels.names";

std::ifstream ifs(classesFile.c_str());

std::string line;

while (getline(ifs, line)) classes.push_back(line);

// Load the colors

std::string colorsFile = "colors.txt";

std::ifstream colorFptr(colorsFile.c_str());

while (getline(colorFptr, line)) {

char* pEnd;

double r, g, b;

r = strtod(line.c_str(), &pEnd);

g = strtod(pEnd, NULL);

b = strtod(pEnd, NULL);

cv::Scalar color = cv::Scalar(r, g, b, 255.0);

colors.push_back(cv::Scalar(r, g, b, 255.0));

}

std::string format(const char* format, ...)

{

va_list args;

va_start(args, format);

#ifndef _MSC_VER

size_t size = std::snprintf(nullptr, 0, format, args) + 1; // Extra space for '\0'

std::unique_ptr<char[]> buf(new char[size]);

std::vsnprintf(buf.get(), size, format, args);

return std::string(buf.get(), buf.get() + size - 1); // We don't want the '\0' inside

#else

int size = _vscprintf(format, args);

std::string result(++size, 0);

vsnprintf_s((char*)result.data(), size, _TRUNCATE, format, args);

return result;

#endif

va_end(args);

}

// Draw the predicted bounding box, colorize and show the mask on the image

void drawBox(cv::Mat& frame, int classId, float conf, cv::Rect box, cv::Mat& objectMask)

{

//Draw a rectangle displaying the bounding box

rectangle(frame, cv::Point(box.x, box.y), cv::Point(box.x + box.width, box.y + box.height), cv::Scalar(255, 178, 50), 3);

//Get the label for the class name and its confidence

std::string label = format("%.2f", conf);

if (!classes.empty())

{

CV_Assert(classId < (int)classes.size());

label = classes[classId] + ":" + label;

}

//Display the label at the top of the bounding box

int baseLine;

cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);

box.y = cv::max(box.y, labelSize.height);

rectangle(frame, cv::Point(box.x, box.y - round(1.5*labelSize.height)), cv::Point(box.x + round(1.5*labelSize.width), box.y + baseLine), cv::Scalar(255, 255, 255), cv::FILLED);

putText(frame, label, cv::Point(box.x, box.y), cv::FONT_HERSHEY_SIMPLEX, 0.75, cv::Scalar(0, 0, 0), 1);

cv::Scalar color = colors[classId%colors.size()];

// Resize the mask, threshold, color and apply it on the image

resize(objectMask, objectMask, cv::Size(box.width, box.height));

cv::Mat mask = (objectMask > maskThreshold);

cv::Mat coloredRoi = (0.3 * color + 0.7 * frame(box));

coloredRoi.convertTo(coloredRoi, CV_8UC3);

// Draw the contours on the image

std::vector<cv::Mat> contours;

cv::Mat hierarchy;

mask.convertTo(mask, CV_8U);

findContours(mask, contours, hierarchy, cv::RETR_CCOMP, cv::CHAIN_APPROX_SIMPLE);

drawContours(coloredRoi, contours, -1, color, 5, cv::LINE_8, hierarchy, 100);

coloredRoi.copyTo(frame(box), mask);

}

// For each frame, extract the bounding box and mask for each detected object

void postprocess(cv::Mat& frame, const std::vector<cv::Mat>& outs)

{

cv::Mat outDetections = outs[0];

cv::Mat outMasks = outs[1];

// Output size of masks is NxCxHxW where

// N - number of detected boxes

// C - number of classes (excluding background)

// HxW - segmentation shape

const int numDetections = outDetections.size[2];

const int numClasses = outMasks.size[1];

outDetections = outDetections.reshape(1, outDetections.total() / 7);

for (int i = 0; i < numDetections; ++i)

{

float score = outDetections.at<float>(i, 2);

if (score > confThreshold)

{

// Extract the bounding box

int classId = static_cast<int>(outDetections.at<float>(i, 1));

int left = static_cast<int>(frame.cols * outDetections.at<float>(i, 3));

int top = static_cast<int>(frame.rows * outDetections.at<float>(i, 4));

int right = static_cast<int>(frame.cols * outDetections.at<float>(i, 5));

int bottom = static_cast<int>(frame.rows * outDetections.at<float>(i, 6));

left = cv::max(0, cv::min(left, frame.cols - 1));

top = cv::max(0, cv::min(top, frame.rows - 1));

right = cv::max(0, cv::min(right, frame.cols - 1));

bottom = cv::max(0, cv::min(bottom, frame.rows - 1));

cv::Rect box = cv::Rect(left, top, right - left + 1, bottom - top + 1);

// Extract the mask for the object

cv::Mat objectMask(outMasks.size[2], outMasks.size[3], CV_32F, outMasks.ptr<float>(i, classId));

// Draw bounding box, colorize and show the mask on the image

drawBox(frame, classId, score, box, objectMask);

}

int main()

{

unsigned char* p;

int x=-1, y=-1, n=-1;

p = stbi_load("zyobandou.jpg", &x, &y, &n, 4);

//p = stbi_load("apple.jpg", &x, &y, &n, 4);

if (p == NULL || x < 1 || y < 1)return 1;

changeb_g(p, x, y, 4);

cv::Mat color = cv::Mat(y, x, CV_8UC4);

memcpy(color.data, p, x * 4 * y);

stbi_image_free(p);

cv::Mat frame, blob;

cv::cvtColor(color, frame, CV_BGRA2BGR);

//cv::resize(img, img, cv::Size(224, 224));

std::string textGraph = "mask_rcnn_inception_v2_coco_2018_01_28.pbtxt";

std::string modelWeights = "frozen_inference_graph.pb";

cv::dnn::Net net = cv::dnn::readNetFromTensorflow(modelWeights, textGraph);

net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);

net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);

cv::dnn::blobFromImage(frame, blob, 1.0, cv::Size(frame.cols, frame.rows), cv::Scalar(), true, false);

net.setInput(blob);

label_init();

// Runs the forward pass to get output from the output layers

std::vector<cv::String> outNames(2);

outNames[0] = "detection_out_final";

outNames[1] = "detection_masks";

std::vector<cv::Mat> outs;

net.forward(outs, outNames);

// Extract the bounding box and mask for each of the detected objects

postprocess(frame, outs);

cv::cvtColor(frame, color, CV_BGR2BGRA);

x = color.cols;

y = color.rows;

changeb_g(color.data, x, y, 4);

stbi_write_png("result.png", x, y, 4, color.data, 4 * x);

return 0;

}

---------------------------------------

たまには日本の画像で試験してみようと思い、常磐道の写真で実験。

なんかそれっぽくちゃんとできてるじゃん！
こういうのを数時間のプログラムで作れる時代なんですね。

よーめー日記

2019年7月12日金曜日

Mask R-CNNをやってみた

0 件のコメント:

コメントを投稿