Example#
Minimal TrackEngine example.
The example is based on OpenCV library as the easiest and well-known mean of capturing frames from a camera and drawing.
#include "../../inc/tsdk/ITrackEngine.h"
#include <opencv2/highgui.hpp>
#include <opencv2/videoio.hpp>
#include <opencv2/video.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/highgui.hpp>
#include <iostream>
#include <map>
#include <thread>
#include <future>
#ifdef WITH_GPU // to build with GPU support or not
#include "cuda_runtime.h"
#endif
#define USE_GPU false
#define USE_BATCHED_OBSERVERS true // preferable way
#define USE_FACE_DETECTOR true
#define USE_BODY_DETECTOR false
#define USE_IMAGE_CACHE false // allocations optimization
#define IMAGE_CACHE_SIZE 40
std::map<int,cv::Mat> frameImages;
std::map<int,cv::Mat> bestShotImages;
/**
* @brief Image wrapper. needed only for public access to protected method fsdk::Image::getRefCount
*/
class ImageWrapper : public fsdk::Image {
public:
ImageWrapper() {};
int getRefCount() const {
return fsdk::Image::getRefCount();
}
};
/**
* @brief Simple image cache to avoid allocations on GPU for peformance reasons
*/
class ImageCache {
public:
ImageCache(uint32_t size)
: m_images(size) {
}
fsdk::Image get(int width, int height, fsdk::Image::MemoryResidence memoryResidence) {
auto it = m_images.begin();
// find empty or free (ref count == 1) slot
for (; it != m_images.end(); ++it) {
if (!it->isValid() ||
(it->getRefCount() == 1 && width == it->getWidth() &&
height == it->getHeight() && it->getMemoryResidence() == memoryResidence)) {
break;
}
}
if (it == m_images.end()) {
return fsdk::Image();
}
// if empty, then create new one
if (!it->isValid()) {
it->create(width, height, fsdk::Format::R8G8B8, false, memoryResidence);
}
return static_cast<fsdk::Image&>(*it);
}
private:
std::vector<ImageWrapper> m_images;
};
std::vector<ImageCache> streamCaches;
static fsdk::Image getCachedImage(ImageCache &cache, int width, int height, fsdk::Image::MemoryResidence memoryResidence) {
fsdk::Image result = cache.get(width, height, memoryResidence);
if (!result.isValid()) {
result.create(width, height, fsdk::Format::R8G8B8, false, memoryResidence);
}
return result;
}
struct SuperObserver :
tsdk::IBestShotObserver,
tsdk::IVisualObserver,
tsdk::IDebugObserver,
tsdk::IBestShotPredicate,
tsdk::IVisualPredicate {
int m_streamId;
std::map<int, int> m_bestAreas;
SuperObserver(int streamId) : m_streamId {
}
SuperObserver() : m_streamId{} {}
~SuperObserver() override = default;
void bestShot(const tsdk::DetectionDescr& detection, const tsdk::AdditionalFrameData* data) override {
if (detection.image.getMemoryResidence() == fsdk::Image::MemoryResidence::MemoryGPU) // for gpu transfer to cpu or use cv::GpuMat
return;
// save best shot crop to map
const cv::Mat cvFrame(detection.image.getHeight(), detection.image.getWidth(), CV_8UC3, const_cast<void*>(detection.image.getData()));
const auto rect = detection.detection.getRect();
bestShotImages[detection.trackId] = cvFrame(cv::Rect(rect.x, rect.y, rect.width, rect.height)).clone();
}
void trackEnd(const tsdk::TrackId& trackId) override {
if (USE_FACE_DETECTOR) {
// track with id = 'trackId' finished
}
else if (USE_BODY_DETECTOR) {
// track with id = 'trackId' moved to non-active tracks group or finished
// we can't get actual reason from this callback (due to function signature compatibility with older versions)
// users should use new batched observer api to get it (see 'TrackEndCallbackData')
}
}
void trackReIdentificate(tsdk::FrameId frameId, tsdk::TrackId trackId, tsdk::TrackId reidTrackId) override {
// track with id = 'trackId' matched to one of the old non-active tracks with id = 'reidTrackId'
// after this callback trackEnd will be called for track with id = 'trackId' (like, reidTrackId replaces trackId),
// track with id = 'reidTrackId' will be in non-active state again or active: that depends on whether track with id = 'trackId' was updated on the last frame ot not
}
void trackStatusUpdate(tsdk::FrameId frameId, tsdk::TrackId trackId, tsdk::TrackStatus status) override {
}
void visual(const tsdk::FrameId &frameId,
const fsdk::Image &image,
const tsdk::TrackInfo * trackInfo,
const int nTrack,
const tsdk::AdditionalFrameData* data) override {
if (image.getMemoryResidence() == fsdk::Image::MemoryResidence::MemoryGPU) // for gpu transfer to cpu or use cv::GpuMat
return;
// convert fsdk::Image to cv::Mat
const cv::Mat cvFrame(image.getHeight(), image.getWidth(), CV_8UC3, const_cast<void*>(image.getData()));
// save frame to the map
frameImages[m_streamId] = cvFrame.clone();
for (size_t i = 0; i < nTrack; i++) {
// draw detection rectangle on frame
cv::putText(frameImages[m_streamId],
std::to_string(trackInfo[i].trackId),
cv::Point(trackInfo[i].rect.x + trackInfo[i].rect.width / 2, trackInfo[i].rect.y + trackInfo[i].rect.height / 2),
cv::FONT_HERSHEY_SIMPLEX,
1,
cv::Scalar(10, 200, 10),
2);
cv::rectangle(frameImages[m_streamId],
cv::Rect(trackInfo[i].rect.x,
trackInfo[i].rect.y,
trackInfo[i].rect.width,
trackInfo[i].rect.height),
trackInfo[i].isDetector ? cv::Scalar(150, 10, 10) : cv::Scalar(10, 10, 150), 2);
}
}
bool checkBestShot(const tsdk::DetectionDescr& descr, const tsdk::AdditionalFrameData* data) override {
// the bigger the better (example of best shot logic)
/*if (m_bestAreas.find(descr.trackId) == m_bestAreas.end())
m_bestAreas[descr.trackId] = 0;
if (descr.detection.rect.getArea() > m_bestAreas[descr.trackId]) {
m_bestAreas[descr.trackId] = descr.detection.rect.getArea();
return true;
}*/
return true;
}
bool needRGBImage(const tsdk::FrameId frameId, const tsdk::AdditionalFrameData*) override {
return true;
}
// callbacks, mostly, for debug purposes
void debugForegroundSubtraction(const tsdk::FrameId& frameId, const fsdk::Image& firstMask,
const fsdk::Image& secondMask, fsdk::Rect * regions, int nRegions) override {
};
void debugDetection(const tsdk::DetectionDebugInfo& descr) override {
};
};
struct BatchedSuperObserver :
tsdk::IBatchBestShotObserver,
tsdk::IBatchVisualObserver,
tsdk::IBatchDebugObserver {
BatchedSuperObserver() = default;
~BatchedSuperObserver() override = default;
// realization like per-stream observers (see `SuperObserver`)
void bestShot(const fsdk::Span<tsdk::StreamId> &streamIDs, const fsdk::Span<tsdk::BestShotCallbackData> &data) override {
}
void trackEnd(const fsdk::Span<tsdk::StreamId> &streamIDs, const fsdk::Span<tsdk::TrackEndCallbackData> &data) override {
}
void trackStatusUpdate(const fsdk::Span<tsdk::StreamId> &streamIDs, const fsdk::Span<tsdk::TrackStatusUpdateCallbackData> &data) override {
}
void trackReIdentificate(const fsdk::Span<tsdk::StreamId> &streamIDs, const fsdk::Span<tsdk::TrackReIdentificateCallbackData> &data) override {
}
void visual(const fsdk::Span<tsdk::StreamId> &streamIDs, const fsdk::Span<tsdk::VisualCallbackData> &data) override {
}
void debugForegroundSubtraction(const fsdk::Span<tsdk::StreamId> &streamIDs,
const fsdk::Span<tsdk::DebugForegroundSubtractionCallbackData> &data) override {
}
void debugDetection(const fsdk::Span<tsdk::StreamId> &streamIDs,
const fsdk::Span<tsdk::DebugDetectionCallbackData> &data) override {
}
};
int main(int argc, char** argv) {
if (USE_FACE_DETECTOR && USE_BODY_DETECTOR) {
std::cout << "Both face and body detectors are't supported yet" << std::endl;
exit(EXIT_FAILURE);
}
int keyboard;
int streamCount = 1;
std::vector<cv::VideoCapture> captures;
captures.reserve(argc);
const std::chrono::high_resolution_clock::time_point start = std::chrono::high_resolution_clock::now();
bool usbCam = false;
if (argc > 1) {
for (int i = 1; i < argc; i++) {
cv::VideoCapture capture;
capture.open(argv[i]);
if (!capture.isOpened()) {
//error in opening the video input
std::cout << "video" << argv[i] << " not opened"<< std::endl;
exit(EXIT_FAILURE);
} else {
double frameCount = capture.get(cv::CAP_PROP_FRAME_COUNT);
std::cout << argv[i] << " opened." << frameCount << "frames total" << std::endl;
}
captures.emplace_back(std::move(capture));
}
} else {
cv::VideoCapture capture;
capture.open(0);
if (!capture.isOpened()) {
//error in opening the video input
std::cout << "video from webcam not opened"<< std::endl;
exit(EXIT_FAILURE);
}
usbCam = true;
captures.emplace_back(std::move(capture));
}
streamCount = captures.size();
// create FaceEngine and then TrackEngine objects
fsdk::ISettingsProviderPtr config = fsdk::createSettingsProvider("./data/faceengine.conf").getValue();
auto faceEngine = fsdk::createFaceEngine("./data/").getValue();
faceEngine->setSettingsProvider(config);
fsdk::ISettingsProviderPtr configTE = fsdk::createSettingsProvider("./data/trackengine.conf").getValue();
configTE->setValue("detectors", "use-face-detector", USE_FACE_DETECTOR);
configTE->setValue("detectors", "use-body-detector", USE_BODY_DETECTOR);
// enable vlTracker, if there are many streams, because it's intended for multiple streams processing
if (streamCount > 1) {
configTE->setValue("other", "tracker-type", "vlTracker");
}
#ifdef WITH_GPU
// WARN! gpu supports only 'vlTracker' or 'none' tracker
if (USE_GPU) {
configTE->setValue("other", "tracker-type", "vlTracker");
}
#endif
auto trackEngine = tsdk::createTrackEngine(faceEngine, configTE).getValue();
std::vector<fsdk::Ref<tsdk::IStream>> streamsList;
std::vector<SuperObserver> observers(streamCount);
std::vector<std::future<void>> threads;
BatchedSuperObserver batchedSuperObserver;
threads.reserve(streamCount);
std::atomic<bool> stop;
streamCaches.resize(streamCount, IMAGE_CACHE_SIZE);
auto threadFunc = [&](int captureIndex){
uint32_t index = 0;
auto& capture = captures[captureIndex];
cv::Mat frame; //current frame
if(capture.isOpened()) {
while (!stop && capture.read(frame)) {
if (!usbCam)
index = static_cast<int>(capture.get(cv::CAP_PROP_POS_FRAMES));
else
index++;
if (!frame.empty()) {
const fsdk::Image cvImageCPUWrapper(frame.cols, frame.rows, fsdk::Format::R8G8B8, frame.data, false); // no copy, just wrapper
fsdk::Image image;
#ifdef WITH_GPU
if (USE_GPU) {
if (USE_IMAGE_CACHE) {
fsdk::Image cachedImage = getCachedImage(streamCaches[captureIndex], frame.cols, frame.rows, fsdk::Image::MemoryResidence::MemoryGPU);
cudaMemcpy(const_cast<void*>(cachedImage.getData()), const_cast<void*>(cvImageCPUWrapper.getData()),
cvImageCPUWrapper.getDataSize(), cudaMemcpyHostToDevice);
}
else {
image.create(cvImageCPUWrapper, fsdk::Image::MemoryResidence::MemoryGPU);
}
}
else
#endif
{
image = cvImageCPUWrapper.clone();
}
std::cout << "Image:" << image.getWidth() << "x" << image.getHeight() << " residence: " << static_cast<int>(image.getMemoryResidence()) << std::endl;
streamsList[captureIndex]->pushFrameWaitFor(image, index, nullptr, std::numeric_limits<uint32_t>::max());
}
if (index % 1000 == 0) {
if (!usbCam) {
const double frameCount = capture.get(cv::CAP_PROP_FRAME_COUNT);
const double framePos = capture.get(cv::CAP_PROP_POS_FRAMES);
std::cout << "stream " << captureIndex << " progress:" << (framePos / frameCount) * 100.0 << "%"
<< std::endl;
} else {
std::cout << "stream " << captureIndex << " progress:" << index << " frames" << std::endl;
}
}
}
std::cout << "stream " << captureIndex << " ended" << std::endl;
capture.release();
} else {
std::cout << "stream " << captureIndex << " is not opened" << std::endl;
}
};
if (USE_BATCHED_OBSERVERS) {
// set batched callbacks
trackEngine->setBatchBestShotObserver(&batchedSuperObserver);
trackEngine->setBatchVisualObserver(&batchedSuperObserver);
trackEngine->setBatchDebugObserver(&batchedSuperObserver);
}
int observerIndex = 0;
for (int i = 0; i < streamCount; i++) {
// create stream
fsdk::Ref<tsdk::IStream> stream = fsdk::acquire(trackEngine->createStream());
observers[observerIndex].m_streamId = observerIndex;
if (!USE_BATCHED_OBSERVERS) {
// set per-stream callbacks
stream->setBestShotObserver(&observers[observerIndex]);
stream->setVisualObserver(&observers[observerIndex]);
stream->setDebugObserver(&observers[observerIndex]);
}
// always per-stream predicates
// NOTE: here we use "super" observers just to simplify code, actually, separate vector of predicates should be created
stream->setBestShotPredicate(&observers[observerIndex]);
stream->setVisualPredicate(&observers[observerIndex]);
// by default all observers are enabled, this is just demonstration of api using
stream->setObserverEnabled(tsdk::StreamObserverType::SOT_BEST_SHOT, true);
stream->setObserverEnabled(tsdk::StreamObserverType::SOT_VISUAL, true);
stream->setObserverEnabled(tsdk::StreamObserverType::SOT_DEBUG, true);
streamsList.emplace_back(stream);
threads.emplace_back(std::async(std::launch::async, threadFunc, i));
std::cout << "stream " << i << " started" << std::endl;
observerIndex++;
}
while (true) {
bool notFinished = false;
for (auto &thread: threads) {
if (thread.wait_for(std::chrono::milliseconds(10)) == std::future_status::timeout)
notFinished = true;
}
if (!notFinished)
break;
}
// it's recommended to join each stream manually
for (auto &stream : streamsList) {
stream->join();
}
// this internally calls join for all streams (that wasn't joined yet) and stops processing
trackEngine->stop();
const std::chrono::high_resolution_clock::time_point now = std::chrono::high_resolution_clock::now();
const std::chrono::milliseconds duration =
std::chrono::duration_cast<std::chrono::milliseconds>(now - start);
std::cout << "TOTAL DURATION: " << duration.count() << std::endl;
}