OpenShot Library | libopenshot  0.7.0
ObjectDetection.cpp
Go to the documentation of this file.
1 
10 // Copyright (c) 2008-2019 OpenShot Studios, LLC
11 //
12 // SPDX-License-Identifier: LGPL-3.0-or-later
13 
14 #include <fstream>
15 #include <iostream>
16 #include <algorithm>
17 
19 #include "effects/Tracker.h"
20 #include "Exceptions.h"
21 #include "Timeline.h"
22 #include "objdetectdata.pb.h"
23 
24 #include <QImage>
25 #include <QPainter>
26 #include <QRectF>
27 #include <QString>
28 #include <QStringList>
29 using namespace std;
30 using namespace openshot;
31 
32 
33 // Default constructor
34 ObjectDetection::ObjectDetection()
35  : display_box_text(1.0)
36  , display_boxes(1.0)
37 {
38  // Init effect metadata
39  init_effect_details();
40 
41  // We haven’t loaded any protobuf yet, so there's nothing to pick.
43 }
44 
45 // Init effect settings
46 void ObjectDetection::init_effect_details()
47 {
50 
52  info.class_name = "ObjectDetection";
53  info.name = "Object Detector";
54  info.description = "Detect objects through the video.";
55  info.has_audio = false;
56  info.has_video = true;
57  info.has_tracked_object = true;
58 }
59 
60 // This method is required for all derived classes of EffectBase, and returns a
61 // modified openshot::Frame object
62 std::shared_ptr<Frame> ObjectDetection::GetFrame(std::shared_ptr<Frame> frame, int64_t frame_number) {
63  // Get the frame's QImage
64  std::shared_ptr<QImage> frame_image = frame->GetImage();
65 
66  // Check if frame isn't NULL
67  if(!frame_image || frame_image->isNull()) {
68  return frame;
69  }
70 
71  QPainter painter(frame_image.get());
72  painter.setRenderHints(QPainter::Antialiasing | QPainter::SmoothPixmapTransform);
73 
74  if (detectionsData.find(frame_number) != detectionsData.end()) {
75  DetectionData detections = detectionsData[frame_number];
76  for (int i = 0; i < detections.boxes.size(); i++) {
77  if (detections.confidences.at(i) < confidence_threshold ||
78  (!display_classes.empty() &&
79  std::find(display_classes.begin(), display_classes.end(), classNames[detections.classIds.at(i)]) == display_classes.end())) {
80  continue;
81  }
82 
83  int objectId = detections.objectIds.at(i);
84  auto trackedObject_it = trackedObjects.find(objectId);
85 
86  if (trackedObject_it != trackedObjects.end()) {
87  std::shared_ptr<TrackedObjectBBox> trackedObject = std::static_pointer_cast<TrackedObjectBBox>(trackedObject_it->second);
88 
89  Clip* parentClip = (Clip*) trackedObject->ParentClip();
90  if (parentClip && trackedObject->Contains(frame_number) && trackedObject->visible.GetValue(frame_number) == 1) {
91  BBox trackedBox = trackedObject->GetBox(frame_number);
92  QRectF boxRect((trackedBox.cx - trackedBox.width / 2) * frame_image->width(),
93  (trackedBox.cy - trackedBox.height / 2) * frame_image->height(),
94  trackedBox.width * frame_image->width(),
95  trackedBox.height * frame_image->height());
96 
97  // Get properties of tracked object (i.e. colors, stroke width, etc...)
98  std::vector<int> stroke_rgba = trackedObject->stroke.GetColorRGBA(frame_number);
99  std::vector<int> bg_rgba = trackedObject->background.GetColorRGBA(frame_number);
100  int stroke_width = trackedObject->stroke_width.GetValue(frame_number);
101  float stroke_alpha = trackedObject->stroke_alpha.GetValue(frame_number);
102  float bg_alpha = trackedObject->background_alpha.GetValue(frame_number);
103  float bg_corner = trackedObject->background_corner.GetValue(frame_number);
104 
105  // Set the pen for the border
106  QPen pen(QColor(stroke_rgba[0], stroke_rgba[1], stroke_rgba[2], 255 * stroke_alpha));
107  pen.setWidthF(trackedObject->ScaledStrokeWidth(
108  frame_number, frame_image->width(), frame_image->height()));
109  painter.setPen(pen);
110 
111  // Set the brush for the background
112  QBrush brush(QColor(bg_rgba[0], bg_rgba[1], bg_rgba[2], 255 * bg_alpha));
113  painter.setBrush(brush);
114 
115  if (display_boxes.GetValue(frame_number) == 1 && trackedObject->draw_box.GetValue(frame_number) == 1) {
116  // Only draw boxes if both properties are set to YES (draw all boxes, and draw box of the selected box)
117  painter.drawRoundedRect(boxRect, bg_corner, bg_corner);
118  }
119 
120  if(display_box_text.GetValue(frame_number) == 1) {
121  // Draw text label above bounding box
122  // Get the confidence and classId for the current detection
123  int classId = detections.classIds.at(i);
124 
125  // Get the label for the class name and its confidence
126  QString label = QString::number(objectId);
127  if (!classNames.empty()) {
128  label = QString::fromStdString(classNames[classId]) + ":" + label;
129  }
130 
131  // Set up the painter, font, and pen
132  QFont font;
133  font.setPixelSize(14);
134  painter.setFont(font);
135 
136  // Calculate the size of the text
137  QFontMetrics fontMetrics(font);
138  QSize labelSize = fontMetrics.size(Qt::TextSingleLine, label);
139 
140  // Define the top left point of the rectangle
141  double left = boxRect.center().x() - (labelSize.width() / 2.0);
142  double top = std::max(static_cast<int>(boxRect.top()), labelSize.height()) - 4.0;
143 
144  // Draw the text
145  painter.drawText(QPointF(left, top), label);
146  }
147  }
148  }
149  }
150  }
151 
152  painter.end();
153 
154  // The frame's QImage has been modified in place, so we just return the original frame
155  return frame;
156 }
157 
158 // Load protobuf data file
159 bool ObjectDetection::LoadObjDetectdData(std::string inputFilePath)
160 {
161  // Parse the file
162  pb_objdetect::ObjDetect objMessage;
163  std::fstream input(inputFilePath, std::ios::in | std::ios::binary);
164  if (!objMessage.ParseFromIstream(&input)) {
165  std::cerr << "Failed to parse protobuf message." << std::endl;
166  return false;
167  }
168 
169  // Clear out any old state
170  classNames.clear();
171  detectionsData.clear();
172  trackedObjects.clear();
173 
174  // Seed colors for each class
175  std::srand(1);
176  for (int i = 0; i < objMessage.classnames_size(); ++i) {
177  classNames.push_back(objMessage.classnames(i));
178  classesColor.push_back(cv::Scalar(
179  std::rand() % 205 + 50,
180  std::rand() % 205 + 50,
181  std::rand() % 205 + 50
182  ));
183  }
184 
185  // Walk every frame in the protobuf
186  for (size_t fi = 0; fi < objMessage.frame_size(); ++fi) {
187  const auto &pbFrame = objMessage.frame(fi);
188  size_t frameId = pbFrame.id();
189 
190  // Buffers for DetectionData
191  std::vector<int> classIds;
192  std::vector<float> confidences;
193  std::vector<cv::Rect_<float>> boxes;
194  std::vector<int> objectIds;
195 
196  // For each bounding box in this frame
197  for (int di = 0; di < pbFrame.bounding_box_size(); ++di) {
198  const auto &b = pbFrame.bounding_box(di);
199  float x = b.x(), y = b.y(), w = b.w(), h = b.h();
200  int classId = b.classid();
201  float confidence= b.confidence();
202  int objectId = b.objectid();
203 
204  // Record for DetectionData
205  classIds.push_back(classId);
206  confidences.push_back(confidence);
207  boxes.emplace_back(x, y, w, h);
208  objectIds.push_back(objectId);
209 
210  // Either append to an existing TrackedObjectBBox…
211  auto it = trackedObjects.find(objectId);
212  if (it != trackedObjects.end()) {
213  it->second->AddBox(frameId, x + w/2, y + h/2, w, h, 0.0);
214  }
215  else {
216  // …or create a brand-new one
217  TrackedObjectBBox tmpObj(
218  (int)classesColor[classId][0],
219  (int)classesColor[classId][1],
220  (int)classesColor[classId][2],
221  /*alpha=*/0
222  );
223  tmpObj.stroke_alpha = Keyframe(1.0);
224  tmpObj.AddBox(frameId, x + w/2, y + h/2, w, h, 0.0);
225 
226  auto ptr = std::make_shared<TrackedObjectBBox>(tmpObj);
227  ptr->ParentClip(this->ParentClip());
228 
229  // Prefix with effect UUID for a unique string ID
230  std::string prefix = this->Id();
231  if (!prefix.empty())
232  prefix += "-";
233  ptr->Id(prefix + std::to_string(objectId));
234  trackedObjects.emplace(objectId, ptr);
235  }
236  }
237 
238  // Save the DetectionData for this frame
239  detectionsData[frameId] = DetectionData(
240  classIds, confidences, boxes, frameId, objectIds
241  );
242  }
243 
244  google::protobuf::ShutdownProtobufLibrary();
245 
246  // Finally, pick a default selectedObjectIndex if we have any
247  if (!trackedObjects.empty()) {
248  selectedObjectIndex = trackedObjects.begin()->first;
249  }
250 
251  return true;
252 }
253 
254 // Get the indexes and IDs of all visible objects in the given frame
255 std::string ObjectDetection::GetVisibleObjects(int64_t frame_number) const{
256 
257  // Initialize the JSON objects
258  Json::Value root;
259  root["visible_objects_index"] = Json::Value(Json::arrayValue);
260  root["visible_objects_id"] = Json::Value(Json::arrayValue);
261  root["visible_class_names"] = Json::Value(Json::arrayValue);
262 
263  // Check if track data exists for the requested frame
264  if (detectionsData.find(frame_number) == detectionsData.end()){
265  return root.toStyledString();
266  }
267  DetectionData detections = detectionsData.at(frame_number);
268 
269  // Iterate through the tracked objects
270  for(int i = 0; i<detections.boxes.size(); i++){
271  // Does not show boxes with confidence below the threshold
272  if(detections.confidences.at(i) < confidence_threshold){
273  continue;
274  }
275 
276  // Get class name of tracked object
277  auto className = classNames[detections.classIds.at(i)];
278 
279  // If display_classes is not empty, check if className is in it
280  if (!display_classes.empty()) {
281  auto it = std::find(display_classes.begin(), display_classes.end(), className);
282  if (it == display_classes.end()) {
283  // If not in display_classes, skip this detection
284  continue;
285  }
286  root["visible_class_names"].append(className);
287  } else {
288  // include all class names
289  root["visible_class_names"].append(className);
290  }
291 
292  int objectId = detections.objectIds.at(i);
293  // Search for the object in the trackedObjects map
294  auto trackedObject = trackedObjects.find(objectId);
295 
296  // Get the tracked object JSON properties for this frame
297  Json::Value trackedObjectJSON = trackedObject->second->PropertiesJSON(frame_number);
298 
299  if (trackedObjectJSON["visible"]["value"].asBool() &&
300  trackedObject->second->ExactlyContains(frame_number)){
301  // Save the object's index and ID if it's visible in this frame
302  root["visible_objects_index"].append(trackedObject->first);
303  root["visible_objects_id"].append(trackedObject->second->Id());
304  }
305  }
306 
307  return root.toStyledString();
308 }
309 
310 // Generate JSON string of this object
311 std::string ObjectDetection::Json() const {
312 
313  // Return formatted string
314  return JsonValue().toStyledString();
315 }
316 
317 // Generate Json::Value for this object
318 Json::Value ObjectDetection::JsonValue() const {
319 
320  // Create root json object
321  Json::Value root = EffectBase::JsonValue(); // get parent properties
322  root["type"] = info.class_name;
323  root["protobuf_data_path"] = protobuf_data_path;
324  root["selected_object_index"] = selectedObjectIndex;
325  root["confidence_threshold"] = confidence_threshold;
326  root["display_box_text"] = display_box_text.JsonValue();
327  root["display_boxes"] = display_boxes.JsonValue();
328 
329  // Add tracked object's IDs to root
330  Json::Value objects;
331  for (auto const& trackedObject : trackedObjects){
332  Json::Value trackedObjectJSON = trackedObject.second->JsonValue();
333  // add object json
334  objects[trackedObject.second->Id()] = trackedObjectJSON;
335  }
336  root["objects"] = objects;
337 
338  // return JsonValue
339  return root;
340 }
341 
342 // Load JSON string into this object
343 void ObjectDetection::SetJson(const std::string value) {
344 
345  // Parse JSON string into JSON objects
346  try
347  {
348  const Json::Value root = openshot::stringToJson(value);
349  // Set all values that match
350  SetJsonValue(root);
351  }
352  catch (const std::exception& e)
353  {
354  // Error parsing JSON (or missing keys)
355  throw InvalidJSON("JSON is invalid (missing keys or invalid data types)");
356  }
357 }
358 
359 // Load Json::Value into this object
360 void ObjectDetection::SetJsonValue(const Json::Value root)
361 {
362  // Parent properties
364 
365  // If a protobuf path is provided, load & prefix IDs
366  if (!root["protobuf_data_path"].isNull()) {
367  std::string new_path = root["protobuf_data_path"].asString();
368  if (protobuf_data_path != new_path || trackedObjects.empty()) {
369  protobuf_data_path = new_path;
370  if (!LoadObjDetectdData(protobuf_data_path)) {
371  throw InvalidFile("Invalid protobuf data path", "");
372  }
373  }
374  }
375 
376  // Selected index, thresholds, UI flags, filters, etc.
377  if (!root["selected_object_index"].isNull())
378  selectedObjectIndex = root["selected_object_index"].asInt();
379  if (!root["confidence_threshold"].isNull())
380  confidence_threshold = root["confidence_threshold"].asFloat();
381  if (!root["display_box_text"].isNull())
382  display_box_text.SetJsonValue(root["display_box_text"]);
383  if (!root["display_boxes"].isNull())
384  display_boxes.SetJsonValue(root["display_boxes"]);
385 
386  if (!root["class_filter"].isNull()) {
387  class_filter = root["class_filter"].asString();
388  QStringList parts = QString::fromStdString(class_filter).split(',');
389  display_classes.clear();
390  for (auto &p : parts) {
391  auto s = p.trimmed().toLower();
392  if (!s.isEmpty()) {
393  display_classes.push_back(s.toStdString());
394  }
395  }
396  }
397 
398  // Apply any per-object overrides
399  if (!root["objects"].isNull()) {
400  // Iterate over the supplied objects (indexed by id or position)
401  const auto memberNames = root["objects"].getMemberNames();
402  for (const auto& name : memberNames)
403  {
404  // Determine the numeric index of this object
405  int index = -1;
406  bool numeric_key = std::all_of(name.begin(), name.end(), ::isdigit);
407  if (numeric_key) {
408  index = std::stoi(name);
409  }
410  else
411  {
412  size_t pos = name.find_last_of('-');
413  if (pos != std::string::npos) {
414  try {
415  index = std::stoi(name.substr(pos + 1));
416  } catch (...) {
417  index = -1;
418  }
419  }
420  }
421 
422  auto obj_it = trackedObjects.find(index);
423  if (obj_it != trackedObjects.end() && obj_it->second) {
424  // Update object id if provided as a non-numeric key
425  if (!numeric_key)
426  obj_it->second->Id(name);
427  obj_it->second->SetJsonValue(root["objects"][name]);
428  }
429  }
430  }
431  // Set the tracked object's ids (legacy format)
432  if (!root["objects_id"].isNull()) {
433  for (auto& kv : trackedObjects) {
434  if (!root["objects_id"][kv.first].isNull())
435  kv.second->Id(root["objects_id"][kv.first].asString());
436  }
437  }
438 }
439 
440 // Get all properties for a specific frame
441 std::string ObjectDetection::PropertiesJSON(int64_t requested_frame) const {
442 
443  // Generate JSON properties list
444  Json::Value root = BasePropertiesJSON(requested_frame);
445 
446  Json::Value objects;
447  if(trackedObjects.count(selectedObjectIndex) != 0){
448  auto selectedObject = trackedObjects.at(selectedObjectIndex);
449  if (selectedObject){
450  Json::Value trackedObjectJSON = selectedObject->PropertiesJSON(requested_frame);
451  // add object json
452  objects[selectedObject->Id()] = trackedObjectJSON;
453  }
454  }
455  root["objects"] = objects;
456 
457  root["selected_object_index"] = add_property_json("Selected Object", selectedObjectIndex, "int", "", NULL, 0, 200, false, requested_frame);
458  root["confidence_threshold"] = add_property_json("Confidence Theshold", confidence_threshold, "float", "", NULL, 0, 1, false, requested_frame);
459  root["class_filter"] = add_property_json("Class Filter", 0.0, "string", class_filter, NULL, -1, -1, false, requested_frame);
460 
461  root["display_box_text"] = add_property_json("Draw All Text", display_box_text.GetValue(requested_frame), "int", "", &display_box_text, 0, 1, false, requested_frame);
462  root["display_box_text"]["choices"].append(add_property_choice_json("Yes", true, display_box_text.GetValue(requested_frame)));
463  root["display_box_text"]["choices"].append(add_property_choice_json("No", false, display_box_text.GetValue(requested_frame)));
464 
465  root["display_boxes"] = add_property_json("Draw All Boxes", display_boxes.GetValue(requested_frame), "int", "", &display_boxes, 0, 1, false, requested_frame);
466  root["display_boxes"]["choices"].append(add_property_choice_json("Yes", true, display_boxes.GetValue(requested_frame)));
467  root["display_boxes"]["choices"].append(add_property_choice_json("No", false, display_boxes.GetValue(requested_frame)));
468 
469  // Return formatted string
470  return root.toStyledString();
471 }
openshot::ClipBase::add_property_json
Json::Value add_property_json(std::string name, float value, std::string type, std::string memo, const Keyframe *keyframe, float min_value, float max_value, bool readonly, int64_t requested_frame) const
Generate JSON for a property.
Definition: ClipBase.cpp:96
openshot::stringToJson
const Json::Value stringToJson(const std::string value)
Definition: Json.cpp:16
openshot::TrackedObjectBBox::stroke_alpha
Keyframe stroke_alpha
Stroke box opacity.
Definition: TrackedObjectBBox.h:146
openshot::ObjectDetection::SetJson
void SetJson(const std::string value) override
Load JSON string into this object.
Definition: ObjectDetection.cpp:343
openshot::ObjectDetection::GetFrame
std::shared_ptr< Frame > GetFrame(std::shared_ptr< Frame > frame, int64_t frame_number) override
This method is required for all derived classes of EffectBase, and returns a modified openshot::Frame...
Definition: ObjectDetection.cpp:62
openshot::TrackedObjectBBox::AddBox
void AddBox(int64_t _frame_num, float _cx, float _cy, float _width, float _height, float _angle) override
Add a BBox to the BoxVec map.
Definition: TrackedObjectBBox.cpp:48
openshot::EffectBase::info
EffectInfoStruct info
Information about the current effect.
Definition: EffectBase.h:110
openshot::ObjectDetection::JsonValue
Json::Value JsonValue() const override
Generate Json::Value for this object.
Definition: ObjectDetection.cpp:318
openshot::BBox::height
float height
bounding box height
Definition: TrackedObjectBBox.h:42
DetectionData
Definition: ObjectDetection.h:27
openshot
This namespace is the default namespace for all code in the openshot library.
Definition: AnimatedCurve.h:24
openshot::EffectBase::ParentClip
openshot::ClipBase * ParentClip()
Parent clip object of this effect (which can be unparented and NULL)
Definition: EffectBase.cpp:549
openshot::ClipBase::add_property_choice_json
Json::Value add_property_choice_json(std::string name, int value, int selected_value) const
Generate JSON choice for a property (dropdown properties)
Definition: ClipBase.cpp:132
ObjectDetection.h
Header file for Object Detection effect class.
openshot::Clip
This class represents a clip (used to arrange readers on the timeline)
Definition: Clip.h:89
openshot::EffectBase::JsonValue
virtual Json::Value JsonValue() const
Generate Json::Value for this object.
Definition: EffectBase.cpp:96
openshot::BBox::cy
float cy
y-coordinate of the bounding box center
Definition: TrackedObjectBBox.h:40
Timeline.h
Header file for Timeline class.
DetectionData::objectIds
std::vector< int > objectIds
Definition: ObjectDetection.h:46
openshot::Keyframe::SetJsonValue
void SetJsonValue(const Json::Value root)
Load Json::Value into this object.
Definition: KeyFrame.cpp:372
openshot::EffectBase::trackedObjects
std::map< int, std::shared_ptr< openshot::TrackedObjectBase > > trackedObjects
Map of Tracked Object's by their indices (used by Effects that track objects on clips)
Definition: EffectBase.h:107
openshot::Keyframe::JsonValue
Json::Value JsonValue() const
Generate Json::Value for this object.
Definition: KeyFrame.cpp:339
openshot::EffectBase::BasePropertiesJSON
Json::Value BasePropertiesJSON(int64_t requested_frame) const
Generate JSON object of base properties (recommended to be used by all effects)
Definition: EffectBase.cpp:236
openshot::TrackedObjectBBox
This class contains the properties of a tracked object and functions to manipulate it.
Definition: TrackedObjectBBox.h:130
openshot::Keyframe
A Keyframe is a collection of Point instances, which is used to vary a number or property over time.
Definition: KeyFrame.h:53
openshot::InvalidJSON
Exception for invalid JSON.
Definition: Exceptions.h:223
openshot::BBox::width
float width
bounding box width
Definition: TrackedObjectBBox.h:41
openshot::ObjectDetection::Json
std::string Json() const override
Generate JSON string of this object.
Definition: ObjectDetection.cpp:311
openshot::EffectBase::InitEffectInfo
void InitEffectInfo()
Definition: EffectBase.cpp:37
openshot::EffectInfoStruct::has_audio
bool has_audio
Determines if this effect manipulates the audio of a frame.
Definition: EffectBase.h:44
DetectionData::classIds
std::vector< int > classIds
Definition: ObjectDetection.h:43
DetectionData::confidences
std::vector< float > confidences
Definition: ObjectDetection.h:44
Tracker.h
Header file for Tracker effect class.
openshot::EffectInfoStruct::has_tracked_object
bool has_tracked_object
Determines if this effect track objects through the clip.
Definition: EffectBase.h:45
openshot::InvalidFile
Exception for files that can not be found or opened.
Definition: Exceptions.h:193
openshot::EffectInfoStruct::class_name
std::string class_name
The class name of the effect.
Definition: EffectBase.h:39
openshot::EffectInfoStruct::description
std::string description
The description of this effect and what it does.
Definition: EffectBase.h:41
openshot::BBox
This struct holds the information of a bounding-box.
Definition: TrackedObjectBBox.h:37
openshot::EffectInfoStruct::has_video
bool has_video
Determines if this effect manipulates the image of a frame.
Definition: EffectBase.h:43
openshot::ClipBase::Id
void Id(std::string value)
Definition: ClipBase.h:94
openshot::ObjectDetection::LoadObjDetectdData
bool LoadObjDetectdData(std::string inputFilePath)
Load protobuf data file.
Definition: ObjectDetection.cpp:159
openshot::ObjectDetection::PropertiesJSON
std::string PropertiesJSON(int64_t requested_frame) const override
Definition: ObjectDetection.cpp:441
DetectionData::boxes
std::vector< cv::Rect_< float > > boxes
Definition: ObjectDetection.h:45
openshot::EffectInfoStruct::name
std::string name
The name of the effect.
Definition: EffectBase.h:40
openshot::ObjectDetection::GetVisibleObjects
std::string GetVisibleObjects(int64_t frame_number) const override
Get the indexes and IDs of all visible objects in the given frame.
Definition: ObjectDetection.cpp:255
openshot::BBox::cx
float cx
x-coordinate of the bounding box center
Definition: TrackedObjectBBox.h:39
openshot::ObjectDetection::selectedObjectIndex
int selectedObjectIndex
Index of the Tracked Object that was selected to modify it's properties.
Definition: ObjectDetection.h:83
Exceptions.h
Header file for all Exception classes.
openshot::EffectBase::SetJsonValue
virtual void SetJsonValue(const Json::Value root)
Load Json::Value into this object.
Definition: EffectBase.cpp:139
openshot::Keyframe::GetValue
double GetValue(int64_t index) const
Get the value at a specific index.
Definition: KeyFrame.cpp:258
openshot::ReaderBase::ParentClip
openshot::ClipBase * ParentClip()
Parent clip object of this reader (which can be unparented and NULL)
Definition: ReaderBase.cpp:244
openshot::ObjectDetection::SetJsonValue
void SetJsonValue(const Json::Value root) override
Load Json::Value into this object.
Definition: ObjectDetection.cpp:360