Example#
Minimal TrackEngine example.
The example is based on OpenCV library as the easiest and well-known mean of capturing frames from a camera and drawing.
// Simple example of TE with opencv cv::VideoCapture used as media player
#include "../../inc/tsdk/ITrackEngine.h"
#include <opencv2/highgui.hpp>
#include <opencv2/videoio.hpp>
#include <opencv2/video.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/highgui.hpp>
#include <iostream>
#include <map>
#include <thread>
#include <future>
#ifdef WITH_GPU // to build with GPU support or not
#include "cuda_runtime.h"
#endif
// different options
#define USE_ROI false // use ROI of processing
#define IS_REALTIME false // is realtime app (for realtime apps frames should be skipped if overload and TE frames-buffer is full)
#define USE_GPU false // use GPU processing
// settings of callbacks, only one of them can be set at time
// if all these options set `false`, then simple per-stream observers are used (they're deprecated now, so choose one of these settings)
#define USE_ESTIMATOR_API false // use estimator API, the most flexible way for developers to use TE, but requires more code
// frame/tracking_result queues, gathering batches, etc.
#define USE_BATCHED_OBSERVERS false // use async API and batched observers (one observer for all streams and per event type)
#define USE_UNIFIED_OBSERVER true // use async API and one unified observer for all tracking events and streams (preferable way)
// used detectors
// if both are enabled then `human` tracking works
#define USE_FACE_DETECTOR true // enable face detector
#define USE_BODY_DETECTOR true // enable body detector
// Optimization tips
// for GPU image cache is preferable way in order to avoid overhead of memory allocations
#define USE_IMAGE_CACHE true // reuse images from cache
#define IMAGE_CACHE_SIZE 40 // cache size
#define FLOWER_CACHE_SIZE 1024 // increase flower cache for better performance
// NOTE: on CPU higher cache size slighlty increases memory consumption
std::map<int,cv::Mat> frameImages;
std::map<int,cv::Mat> bestShotImages;
namespace {
template<typename Type>
static fsdk::Span<Type> vectorToSpan(const std::vector<Type> &vec) {
return fsdk::Span<Type>(const_cast<Type*>(vec.data()), vec.size());
}
}
/**
* @brief Image wrapper. needed only for public access to protected method fsdk::Image::getRefCount
*/
class ImageWrapper : public fsdk::Image {
public:
ImageWrapper() {};
int getRefCount() const {
return fsdk::Image::getRefCount();
}
};
/**
* @brief Simple image cache to avoid allocations on GPU for peformance reasons
*/
class ImageCache {
public:
ImageCache(uint32_t size)
: m_images(size) {
}
fsdk::Image get(int width, int height, fsdk::Image::MemoryResidence memoryResidence) {
auto it = m_images.begin();
// find empty or free (ref count == 1) slot
for (; it != m_images.end(); ++it) {
if (!it->isValid() ||
(it->getRefCount() == 1 && width == it->getWidth() &&
height == it->getHeight() && it->getMemoryResidence() == memoryResidence)) {
break;
}
}
if (it == m_images.end()) {
return fsdk::Image();
}
// if empty, then create new one
if (!it->isValid()) {
it->create(width, height, fsdk::Format::R8G8B8, false, memoryResidence);
}
return static_cast<fsdk::Image&>(*it);
}
private:
std::vector<ImageWrapper> m_images;
};
#ifdef WITH_GPU
static fsdk::Image getCachedImage(ImageCache &cache, int width, int height, fsdk::Image::MemoryResidence memoryResidence) {
fsdk::Image result = cache.get(width, height, memoryResidence);
if (!result.isValid()) {
result.create(width, height, fsdk::Format::R8G8B8, false, memoryResidence);
}
return result;
}
#endif
struct FrameAdditionalData : tsdk::AdditionalFrameData {
tsdk::StreamId streamId;
tsdk::FrameId frameId;
FrameAdditionalData(tsdk::StreamId streamId, tsdk::FrameId frameId)
: streamId(streamId)
, frameId(frameId) {
}
};
struct SuperObserver :
tsdk::IBestShotObserver,
tsdk::IVisualObserver,
tsdk::IDebugObserver,
tsdk::IBestShotPredicate,
tsdk::IVisualPredicate {
int m_streamId;
std::map<int, int> m_bestAreas;
SuperObserver(int streamId) : m_streamId {
}
SuperObserver() : m_streamId{} {}
~SuperObserver() override = default;
void bestShot(const tsdk::DetectionDescr& detection, const tsdk::AdditionalFrameData* data) override {
if (detection.image.getMemoryResidence() == fsdk::Image::MemoryResidence::MemoryGPU) // for gpu transfer to cpu or use cv::GpuMat
return;
// save best shot crop to map
const cv::Mat cvFrame(detection.image.getHeight(), detection.image.getWidth(), CV_8UC3, const_cast<void*>(detection.image.getData()));
const auto rect = detection.detection.getRect();
bestShotImages[detection.trackId] = cvFrame(cv::Rect(rect.x, rect.y, rect.width, rect.height)).clone();
}
void trackEnd(const tsdk::TrackId& trackId) override {
// track with id = 'trackId' finished
}
void visual(const tsdk::FrameId &frameId,
const fsdk::Image &image,
const tsdk::TrackInfo * trackInfo,
const int nTrack,
const tsdk::AdditionalFrameData* data) override {
if (image.getMemoryResidence() == fsdk::Image::MemoryResidence::MemoryGPU) // for gpu transfer to cpu or use cv::GpuMat
return;
// convert fsdk::Image to cv::Mat
const cv::Mat cvFrame(image.getHeight(), image.getWidth(), CV_8UC3, const_cast<void*>(image.getData()));
// save frame to the map
frameImages[m_streamId] = cvFrame.clone();
for (int i = 0; i < nTrack; i++) {
// draw detection rectangle on frame
cv::putText(frameImages[m_streamId],
std::to_string(trackInfo[i].trackId),
cv::Point(trackInfo[i].rect.x + trackInfo[i].rect.width / 2, trackInfo[i].rect.y + trackInfo[i].rect.height / 2),
cv::FONT_HERSHEY_SIMPLEX,
1,
cv::Scalar(10, 200, 10),
2);
cv::rectangle(frameImages[m_streamId],
cv::Rect(trackInfo[i].rect.x,
trackInfo[i].rect.y,
trackInfo[i].rect.width,
trackInfo[i].rect.height),
trackInfo[i].isDetector ? cv::Scalar(150, 10, 10) : cv::Scalar(10, 10, 150), 2);
}
}
bool checkBestShot(const tsdk::DetectionDescr& descr, const tsdk::AdditionalFrameData* data) override {
// here can be code of best shot predicate if need
return true;
}
bool needRGBImage(const tsdk::FrameId frameId, const tsdk::AdditionalFrameData*) override {
return true;
}
// callbacks, mostly, for debug purposes
void debugForegroundSubtraction(const tsdk::FrameId& frameId, const fsdk::Image& firstMask,
const fsdk::Image& secondMask, fsdk::Rect * regions, int nRegions) override {
};
void debugDetection(const tsdk::DetectionDebugInfo& descr) override {
};
};
struct BatchedSuperObserver :
tsdk::IBatchBestShotObserver,
tsdk::IBatchVisualObserver,
tsdk::IBatchDebugObserver {
BatchedSuperObserver() = default;
~BatchedSuperObserver() override = default;
// here simple realization via per-stream observers (see `SuperObserver`) just for demonstration
void bestShot(const fsdk::Span<tsdk::StreamId> &streamIDs, const fsdk::Span<tsdk::BestShotCallbackData> &data) override {
for (size_t i = 0; i != streamIDs.size(); ++i) {
SuperObserver(streamIDs[i]).bestShot(data[i].descr, data[i].frameData);
}
}
void trackStart(const fsdk::Span<tsdk::StreamId> &streamIDs, const fsdk::Span<tsdk::TrackStartCallbackData> &data) override {
for (size_t i = 0; i != streamIDs.size(); ++i) {
SuperObserver(streamIDs[i]).trackStart(data[i].frameId, data[i].trackId);
}
}
void trackEnd(const fsdk::Span<tsdk::StreamId> &streamIDs, const fsdk::Span<tsdk::TrackEndCallbackData> &data) override {
for (size_t i = 0; i != streamIDs.size(); ++i) {
SuperObserver(streamIDs[i]).trackEnd(data[i].trackId);
}
}
void visual(const fsdk::Span<tsdk::StreamId> &streamIDs, const fsdk::Span<tsdk::VisualCallbackData> &data) override {
for (size_t i = 0; i != streamIDs.size(); ++i) {
SuperObserver(streamIDs[i]).visual(data[i].frameId, data[i].image, data[i].trackInfo, data[i].nTrack, nullptr);
}
}
void debugForegroundSubtraction(const fsdk::Span<tsdk::StreamId> &streamIDs,
const fsdk::Span<tsdk::DebugForegroundSubtractionCallbackData> &data) override {
for (size_t i = 0; i != streamIDs.size(); ++i) {
SuperObserver(streamIDs[i]).debugForegroundSubtraction(data[i].frameId, data[i].firstMask, data[i].secondMask, data[i].regions, data[i].nRegions);
}
}
void debugDetection(const fsdk::Span<tsdk::StreamId> &streamIDs,
const fsdk::Span<tsdk::DebugDetectionCallbackData> &data) override {
for (size_t i = 0; i != streamIDs.size(); ++i) {
tsdk::DetectionDebugInfo dbgInfo;
dbgInfo.descr = data[i].descr;
dbgInfo.isBestDetection = data[i].isBestDetection;
dbgInfo.isDetector = data[i].isDetector;
dbgInfo.isFiltered = data[i].isFiltered;
SuperObserver(streamIDs[i]).debugDetection(dbgInfo);
}
}
};
struct TrackingResultObserver : tsdk::ITrackingResultObserver {
void ready(fsdk::Ref<tsdk::ITrackingResultBatch> result) override {
// any postprocessing tracking resuls code here
if (!result) {
return;
}
// we reuse code of BatchedSuperObserver
auto streamsCount = result->getStreamIds().size();
auto streamIds = result->getStreamIds();
for (size_t streamInd = 0; streamInd != streamsCount; ++streamInd) {
auto streamFrames = result->getStreamFrameIds(streamIds[streamInd]);
for (auto frameId : streamFrames) {
auto streamFrameResults = result->getTrackingResult(streamIds[streamInd], frameId);
auto trackStart = streamFrameResults.trackStart;
auto trackEnd = streamFrameResults.trackEnd;
auto tracks = streamFrameResults.tracks;
auto debugData = streamFrameResults.debugDetections;
auto debugForegroundSubtractions = streamFrameResults.debugForegroundSubtractions;
auto detections = streamFrameResults.detections;
// input and output arrays of ids should be equal
assert(streamFrameResults.streamId == streamIds[streamInd]);
if (!debugForegroundSubtractions.empty()) {
std::vector<tsdk::StreamId> _streamIds;
_streamIds.resize(debugForegroundSubtractions.size(), streamFrameResults.streamId);
BatchedSuperObserver().debugForegroundSubtraction(vectorToSpan(_streamIds), debugForegroundSubtractions);
}
if (!debugData.empty()) {
std::vector<tsdk::StreamId> _streamIds;
_streamIds.resize(debugData.size(), streamFrameResults.streamId);
BatchedSuperObserver().debugDetection(vectorToSpan(_streamIds), debugData);
}
if (!detections.empty()) {
std::vector<tsdk::StreamId> _streamIds;
_streamIds.resize(detections.size(), streamFrameResults.streamId);
BatchedSuperObserver().bestShot(vectorToSpan(_streamIds), detections);
}
if (!tracks.empty()) {
std::vector<tsdk::StreamId> _streamIds;
_streamIds.resize(tracks.size(), streamFrameResults.streamId);
BatchedSuperObserver().visual(vectorToSpan(_streamIds), tracks);
}
if (!trackStart.empty()) {
std::vector<tsdk::StreamId> _streamIds;
_streamIds.resize(trackStart.size(), streamFrameResults.streamId);
BatchedSuperObserver().trackStart(vectorToSpan(_streamIds), trackStart);
}
if (!trackEnd.empty()) {
std::vector<tsdk::StreamId> _streamIds;
_streamIds.resize(trackEnd.size(), streamFrameResults.streamId);
BatchedSuperObserver().trackEnd(vectorToSpan(_streamIds), trackEnd);
}
}
}
}
~TrackingResultObserver() override {};
};
int main(int argc, char** argv) {
if (!USE_FACE_DETECTOR && !USE_BODY_DETECTOR) {
std::cerr << "Both face and body detectors are disabled" << std::endl;
exit(EXIT_FAILURE);
}
#ifndef WITH_GPU
if (USE_GPU) {
std::cerr << "GPU build is off, GPU can't be used." << std::endl;
exit(EXIT_FAILURE);
}
#endif
int streamCount = 1;
std::vector<ImageCache> streamCaches;
std::vector<cv::VideoCapture> captures;
captures.reserve(argc);
bool usbCam = false;
if (argc > 1) {
for (int i = 1; i < argc; i++) {
cv::VideoCapture capture;
capture.open(argv[i]);
if (!capture.isOpened()) {
//error in opening the video input
std::cout << "video" << argv[i] << " not opened"<< std::endl;
exit(EXIT_FAILURE);
} else {
double frameCount = capture.get(cv::CAP_PROP_FRAME_COUNT);
std::cout << argv[i] << " opened." << frameCount << "frames total" << std::endl;
}
captures.emplace_back(std::move(capture));
}
} else {
cv::VideoCapture capture;
capture.open(0);
if (!capture.isOpened()) {
//error in opening the video input
std::cout << "video from webcam not opened"<< std::endl;
exit(EXIT_FAILURE);
}
usbCam = true;
captures.emplace_back(std::move(capture));
}
streamCount = captures.size();
if (USE_ESTIMATOR_API) {
if (streamCount > 1) {
std::cout << "Estimator API allows to process multiple sources, but TE example supports only one source for estmator API now," <<
" so one Source will be used for tracking of " << streamCount << " Streams." << std::endl;
}
}
// create FaceEngine and then TrackEngine objects
fsdk::ISettingsProviderPtr config = fsdk::createSettingsProvider("./data/faceengine.conf").getValue();
auto faceEngine = fsdk::createFaceEngine("./data/").getValue();
faceEngine->setSettingsProvider(config);
auto runtimeSettings = faceEngine->getRuntimeSettingsProvider();
fsdk::ISettingsProviderPtr configTE = fsdk::createSettingsProvider("./data/trackengine.conf").getValue();
configTE->setValue("detectors", "use-face-detector", USE_FACE_DETECTOR);
configTE->setValue("detectors", "use-body-detector", USE_BODY_DETECTOR);
// enable vlTracker, if there are many streams, because it's intended for multiple streams processing
if (streamCount > 1 || USE_GPU) { // WARN! gpu supports only 'vlTracker' or 'none' tracker
configTE->setValue("other", "tracker-type", "vlTracker");
}
// set binary FRG for GPU and disable for CPU for max perf
configTE->setValue("FRG", "use-binary-frg", USE_GPU ? 1 : 0);
if (USE_ESTIMATOR_API) {
configTE->setValue("other", "callback-mode", 0); // must set 0 for estimator API
}
if (USE_GPU) {
// NOTE: for GPU also valid parameters from runtime config must be set:
// "Runtime":"defaultGpuDevice" to actual used GPU number, "Runtime":"deviceClass" to "gpu".
// it can be changed in the config file or here from runtime settings provider
if (runtimeSettings->getValue("Runtime", "defaultGpuDevice").asInt(-1) == -1) {
runtimeSettings->setValue("Runtime", "defaultGpuDevice", 0);
}
runtimeSettings->setValue("Runtime", "deviceClass", "gpu");
}
runtimeSettings->setValue("Runtime", "programCacheSize", FLOWER_CACHE_SIZE);
auto trackEngine = tsdk::createTrackEngine(faceEngine, configTE).getValue();
std::vector<fsdk::Ref<tsdk::IStream>> streamsList;
std::vector<SuperObserver> observers(streamCount);
std::vector<std::future<void>> threads;
TrackingResultObserver trackingResultObserver;
BatchedSuperObserver batchedSuperObserver;
if (USE_ESTIMATOR_API) {
// for estimator API callback isn't used
}
else { // else set callback(s)
if (USE_UNIFIED_OBSERVER) {
trackEngine->setTrackingResultObserver(&trackingResultObserver);
}
else if (USE_BATCHED_OBSERVERS) {
// set batched callbacks
trackEngine->setBatchBestShotObserver(&batchedSuperObserver);
trackEngine->setBatchVisualObserver(&batchedSuperObserver);
trackEngine->setBatchDebugObserver(&batchedSuperObserver);
}
}
streamCaches.resize(streamCount, IMAGE_CACHE_SIZE);
std::atomic<bool> stop;
auto threadFunc = [&trackEngine, &captures, &streamsList, &streamCaches, &stop, usbCam](int streamInd) {
uint32_t index = 0;
auto& capture = captures[streamInd];
cv::Mat frame; //current frame
if (capture.isOpened()) {
while (!stop && capture.read(frame)) {
if (!usbCam)
index = static_cast<int>(capture.get(cv::CAP_PROP_POS_FRAMES));
else
index++;
if (!frame.empty()) {
const fsdk::Image cvImageCPUWrapper(frame.cols, frame.rows, fsdk::Format::R8G8B8, frame.data, false); // no copy, just wrapper
fsdk::Image image;
#ifdef WITH_GPU
if (USE_GPU) {
if (USE_IMAGE_CACHE) {
fsdk::Image cachedImage = getCachedImage(streamCaches[streamInd], frame.cols, frame.rows, fsdk::Image::MemoryResidence::MemoryGPU);
cudaMemcpy(const_cast<void*>(cachedImage.getData()), const_cast<void*>(cvImageCPUWrapper.getData()),
cvImageCPUWrapper.getDataSize(), cudaMemcpyHostToDevice);
}
else {
image.create(cvImageCPUWrapper, fsdk::Image::MemoryResidence::MemoryGPU);
}
}
else
#endif
{
image = cvImageCPUWrapper.clone(); // clone because TE internally keeps last frame image for tracks data
// performance overhead is possible otherwise
}
if (USE_ESTIMATOR_API) {
// here we track the same stream/image in batch of size `streamCount`
// just to demonstrate estimator API using, in real case ofc different streams can be processed
// in order to do that, some code should be written for gathering batch of frames from different streams
// and calling `track` with that batch in another thread
// the best approach is to use one thread loop with `track` per each TE object created
// NOTE:`track` is thread safe (blocking call)
std::vector<tsdk::StreamId> streamIds;
std::vector<tsdk::Frame> frames;
for (size_t i = 0; i != streamsList.size(); ++i) {
streamIds.emplace_back(streamsList[i]->getId());
frames.emplace_back();
frames.back().image = image;
frames.back().frameId = index;
frames.back().userData = new FrameAdditionalData(streamsList[i]->getId(), index);
}
const auto validateRes = trackEngine->validate(fsdk::Span<tsdk::StreamId>(streamIds), fsdk::Span<tsdk::Frame>(frames));
if (!validateRes) {
std::cerr << "Wrong input for `track`" << std::endl;
}
else {
try {
auto result = trackEngine->track(fsdk::Span<tsdk::StreamId>(streamIds), fsdk::Span<tsdk::Frame>(frames));
if (result)
TrackingResultObserver().ready(result.getValue());
}
catch (const std::exception &e) {
std::cerr << "`Track` exception: " << std::string(e.what()) << std::endl;
}
}
}
else {
tsdk::Frame frame;
frame.image = image;
frame.frameId = index;
frame.userData = nullptr;
if (!streamsList[streamInd]->validateFrame(frame))
std::cerr << "Wrong input frame " << index << " for `pushFrame*`, Stream with index: " << streamInd << std::endl;
else {
const bool pushFrameRes = IS_REALTIME ? streamsList[streamInd]->pushFrame(frame) :
streamsList[streamInd]->pushFrameWaitFor(frame, std::numeric_limits<uint32_t>::max());
if (!pushFrameRes) {
std::cerr << "Failed to push frame: " << index << " for Stream with index: " << streamInd << std::endl;
}
}
}
}
if (index % 1000 == 0) {
if (!usbCam) {
const double frameCount = capture.get(cv::CAP_PROP_FRAME_COUNT);
const double framePos = capture.get(cv::CAP_PROP_POS_FRAMES);
std::cout << "Stream " << streamInd << " progress:" << (framePos / frameCount) * 100.0 << "%"
<< std::endl;
} else {
std::cout << "Stream " << streamInd << " progress:" << index << " frames" << std::endl;
}
}
}
std::cout << "Stream " << streamInd << " ended" << std::endl;
capture.release();
}
else {
std::cout << "Stream " << streamInd << " is not opened" << std::endl;
}
};
// ROI feature
tsdk::StreamParamsOpt streamParamsOpt;
if (USE_ROI) {
// detect tracks only on bottom half of the frame
streamParamsOpt.humanRelativeROI = fsdk::FloatRect(0.0f, 0.5f, 1.0f, 0.5f); // x, y, width, height
}
// create streams
int observerIndex = 0;
for (int i = 0; i < streamCount; i++) {
observers[observerIndex].m_streamId = observerIndex;
fsdk::Ref<tsdk::IStream> stream = USE_ROI ?
fsdk::acquire(trackEngine->createStreamWithParams(streamParamsOpt)) : fsdk::acquire(trackEngine->createStream());
if (!USE_BATCHED_OBSERVERS && !USE_UNIFIED_OBSERVER && !USE_ESTIMATOR_API) {
// set per-stream callbacks
stream->setBestShotObserver(&observers[observerIndex]);
stream->setVisualObserver(&observers[observerIndex]);
stream->setDebugObserver(&observers[observerIndex]);
}
// always per-stream predicates
// NOTE: here we use "super" observers just to simplify code, actually, separate vector of predicates should be created
stream->setBestShotPredicate(&observers[observerIndex]);
stream->setVisualPredicate(&observers[observerIndex]);
// by default all observers are enabled, this is just demonstration of API using
stream->setObserverEnabled(tsdk::StreamObserverType::SOT_BEST_SHOT, true);
stream->setObserverEnabled(tsdk::StreamObserverType::SOT_VISUAL, true);
stream->setObserverEnabled(tsdk::StreamObserverType::SOT_DEBUG, true);
streamsList.emplace_back(stream);
if (!USE_ESTIMATOR_API)
threads.emplace_back(std::async(std::launch::async, threadFunc, i));
std::cout << "Stream " << i << " started" << std::endl;
observerIndex++;
}
// for estimator API create only one source
if (USE_ESTIMATOR_API)
threads.emplace_back(std::async(std::launch::async, threadFunc, 0));
while (true) {
bool notFinished = false;
for (auto &thread : threads) {
if (thread.wait_for(std::chrono::milliseconds(10)) == std::future_status::timeout)
notFinished = true;
}
if (!notFinished)
break;
}
// it's recommended to `join` each stream before and to stop TE before TE object release
for (auto &stream : streamsList) {
if (USE_ESTIMATOR_API) {
auto remainingResults = stream->stop(); // returns all remaining tracking events
// for face tracking it's only `trackEnd` for remaining tracks
// for body/human also events for remaining frames
// for body we may get tracking results with delay, so TE may still keep several last frames and their events
// note `Receiving tracking results` section in docs.
if (remainingResults)
TrackingResultObserver().ready(remainingResults);
} else
stream->join(); // wait all queued frames/callbacks to be processed
}
trackEngine->stop();
}