Skip to content

Example#

Minimal TrackEngine example.

The example is based on OpenCV library as the easiest and well-known mean of capturing frames from a camera and drawing.

#include "../../inc/tsdk/ITrackEngine.h"
#include <opencv2/highgui.hpp>
#include <opencv2/videoio.hpp>
#include <opencv2/video.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/highgui.hpp>
#include <iostream>
#include <map>
#include <thread>
#include <future>

#ifdef WITH_GPU         // to build with GPU support or not
#include "cuda_runtime.h"
#endif

#define USE_GPU false
#define USE_BATCHED_OBSERVERS true // preferable way
#define USE_FACE_DETECTOR true
#define USE_BODY_DETECTOR false

#define USE_IMAGE_CACHE false // allocations optimization
#define IMAGE_CACHE_SIZE 40

std::map<int,cv::Mat> frameImages;
std::map<int,cv::Mat> bestShotImages;

/**
* @brief Image wrapper. needed only for public access to protected method fsdk::Image::getRefCount
*/
class ImageWrapper : public fsdk::Image {
public:
    ImageWrapper() {};

    int getRefCount() const {
        return fsdk::Image::getRefCount();
    }
};

/**
* @brief Simple image cache to avoid allocations on GPU for peformance reasons
*/
class ImageCache {
public:
    ImageCache(uint32_t size)
        : m_images(size) {

    }

    fsdk::Image get(int width, int height, fsdk::Image::MemoryResidence memoryResidence) {
        auto it = m_images.begin();

        // find empty or free (ref count == 1) slot
        for (; it != m_images.end(); ++it) {
            if (!it->isValid() ||
                (it->getRefCount() == 1 && width == it->getWidth() &&
                height == it->getHeight() && it->getMemoryResidence() == memoryResidence)) {
                break;
            }
        }

        if (it == m_images.end()) {
            return fsdk::Image();
        }

        // if empty, then create new one
        if (!it->isValid()) {
            it->create(width, height, fsdk::Format::R8G8B8, false, memoryResidence);
        }

        return static_cast<fsdk::Image&>(*it);
    }

private:
    std::vector<ImageWrapper> m_images;
};

std::vector<ImageCache> streamCaches;

static fsdk::Image getCachedImage(ImageCache &cache, int width, int height, fsdk::Image::MemoryResidence memoryResidence) {
    fsdk::Image result = cache.get(width, height, memoryResidence);

    if (!result.isValid()) {
        result.create(width, height, fsdk::Format::R8G8B8, false, memoryResidence);
    }

    return result;
}

struct SuperObserver :
    tsdk::IBestShotObserver,
    tsdk::IVisualObserver,
    tsdk::IDebugObserver,
    tsdk::IBestShotPredicate,
    tsdk::IVisualPredicate {

    int m_streamId;
    std::map<int, int> m_bestAreas;
    SuperObserver(int streamId) : m_streamId {

    }

    SuperObserver() : m_streamId{} {}

    ~SuperObserver() override = default;

    void bestShot(const tsdk::DetectionDescr& detection, const tsdk::AdditionalFrameData* data) override {
        if (detection.image.getMemoryResidence() == fsdk::Image::MemoryResidence::MemoryGPU) // for gpu transfer to cpu or use cv::GpuMat
            return;

        // save best shot crop to map
        const cv::Mat cvFrame(detection.image.getHeight(), detection.image.getWidth(), CV_8UC3, const_cast<void*>(detection.image.getData()));
        const auto rect = detection.detection.getRect();
        bestShotImages[detection.trackId] = cvFrame(cv::Rect(rect.x, rect.y, rect.width, rect.height)).clone();
    }

    void trackEnd(const tsdk::TrackId& trackId) override {
        if (USE_FACE_DETECTOR) {
            // track with id = 'trackId' finished
        }
        else if (USE_BODY_DETECTOR) {
            // track with id = 'trackId' moved to non-active tracks group or finished
            // we can't get actual reason from this callback (due to function signature compatibility with older versions)
            // users should use new batched observer api to get it (see 'TrackEndCallbackData')
        }
    }

    void trackReIdentificate(tsdk::FrameId frameId, tsdk::TrackId trackId, tsdk::TrackId reidTrackId) override {
        // track with id = 'trackId' matched to one of the old non-active tracks with id = 'reidTrackId'
        // after this callback trackEnd will be called for track with id = 'trackId' (like, reidTrackId replaces trackId),
        // track with id = 'reidTrackId' will be in non-active state again or active: that depends on whether track with id = 'trackId' was updated on the last frame ot not
    }

    void trackStatusUpdate(tsdk::FrameId frameId, tsdk::TrackId trackId, tsdk::TrackStatus status) override {
    }

    void visual(const tsdk::FrameId &frameId,
                const fsdk::Image &image,
                const tsdk::TrackInfo * trackInfo,
                const int nTrack,
                const tsdk::AdditionalFrameData* data) override {
        if (image.getMemoryResidence() == fsdk::Image::MemoryResidence::MemoryGPU) // for gpu transfer to cpu or use cv::GpuMat
            return;

        // convert fsdk::Image to cv::Mat
        const cv::Mat cvFrame(image.getHeight(), image.getWidth(), CV_8UC3, const_cast<void*>(image.getData()));
        // save frame to the map
        frameImages[m_streamId] = cvFrame.clone();
        for (size_t i = 0; i < nTrack; i++) {
            // draw detection rectangle on frame
            cv::putText(frameImages[m_streamId],
                        std::to_string(trackInfo[i].trackId),
                        cv::Point(trackInfo[i].rect.x + trackInfo[i].rect.width / 2, trackInfo[i].rect.y + trackInfo[i].rect.height / 2),
                        cv::FONT_HERSHEY_SIMPLEX,
                        1,
                        cv::Scalar(10, 200, 10),
                        2);
            cv::rectangle(frameImages[m_streamId],
                          cv::Rect(trackInfo[i].rect.x,
                                   trackInfo[i].rect.y,
                                   trackInfo[i].rect.width,
                                   trackInfo[i].rect.height),
                          trackInfo[i].isDetector ? cv::Scalar(150, 10, 10) : cv::Scalar(10, 10, 150), 2);
        }
    }

    bool checkBestShot(const tsdk::DetectionDescr& descr, const tsdk::AdditionalFrameData* data) override {
        // the bigger the better (example of best shot logic)
        /*if (m_bestAreas.find(descr.trackId) == m_bestAreas.end())
            m_bestAreas[descr.trackId] = 0;

        if (descr.detection.rect.getArea() > m_bestAreas[descr.trackId]) {
            m_bestAreas[descr.trackId] = descr.detection.rect.getArea();
            return true;
        }*/
        return true;
    }

    bool needRGBImage(const tsdk::FrameId frameId, const tsdk::AdditionalFrameData*) override {
        return true;
    }

    // callbacks, mostly, for debug purposes
    void debugForegroundSubtraction(const tsdk::FrameId& frameId, const fsdk::Image& firstMask,
        const fsdk::Image& secondMask, fsdk::Rect * regions, int nRegions) override {
    };

    void debugDetection(const tsdk::DetectionDebugInfo& descr) override {
    };
};

struct BatchedSuperObserver :
    tsdk::IBatchBestShotObserver,
    tsdk::IBatchVisualObserver,
    tsdk::IBatchDebugObserver {

    BatchedSuperObserver() = default;
    ~BatchedSuperObserver() override = default;

    // realization like per-stream observers (see `SuperObserver`)
    void bestShot(const fsdk::Span<tsdk::StreamId> &streamIDs, const fsdk::Span<tsdk::BestShotCallbackData> &data) override {
    }

    void trackEnd(const fsdk::Span<tsdk::StreamId> &streamIDs, const fsdk::Span<tsdk::TrackEndCallbackData> &data) override {
    }

    void trackStatusUpdate(const fsdk::Span<tsdk::StreamId> &streamIDs, const fsdk::Span<tsdk::TrackStatusUpdateCallbackData> &data) override {
    }

    void trackReIdentificate(const fsdk::Span<tsdk::StreamId> &streamIDs, const fsdk::Span<tsdk::TrackReIdentificateCallbackData> &data) override {
    }

    void visual(const fsdk::Span<tsdk::StreamId> &streamIDs, const fsdk::Span<tsdk::VisualCallbackData> &data) override {
    }

    void debugForegroundSubtraction(const fsdk::Span<tsdk::StreamId> &streamIDs,
                                const fsdk::Span<tsdk::DebugForegroundSubtractionCallbackData> &data) override {
    }

    void debugDetection(const fsdk::Span<tsdk::StreamId> &streamIDs,
                    const fsdk::Span<tsdk::DebugDetectionCallbackData> &data) override {
    }
};

int main(int argc, char** argv) {
    if (USE_FACE_DETECTOR && USE_BODY_DETECTOR) {
        std::cout << "Both face and body detectors are't supported yet" << std::endl;
        exit(EXIT_FAILURE);
    }

    int keyboard;
    int streamCount = 1;
    std::vector<cv::VideoCapture> captures;
    captures.reserve(argc);
    const std::chrono::high_resolution_clock::time_point start = std::chrono::high_resolution_clock::now();
    bool usbCam = false;

    if (argc > 1) {
        for (int i = 1; i < argc; i++) {
            cv::VideoCapture capture;
            capture.open(argv[i]);

            if (!capture.isOpened()) {
                //error in opening the video input
                std::cout << "video" << argv[i] << " not opened"<< std::endl;
                exit(EXIT_FAILURE);
            } else {
                double frameCount = capture.get(cv::CAP_PROP_FRAME_COUNT);
                std::cout << argv[i] << " opened." << frameCount << "frames total" << std::endl;
            }
            captures.emplace_back(std::move(capture));
        }
    } else {
        cv::VideoCapture capture;
        capture.open(0);
        if (!capture.isOpened()) {
            //error in opening the video input
            std::cout << "video from webcam not opened"<< std::endl;
            exit(EXIT_FAILURE);
        }
        usbCam = true;
        captures.emplace_back(std::move(capture));
    }

    streamCount = captures.size();

    // create FaceEngine and then TrackEngine objects
    fsdk::ISettingsProviderPtr config = fsdk::createSettingsProvider("./data/faceengine.conf").getValue();
    auto faceEngine = fsdk::createFaceEngine("./data/").getValue();
    faceEngine->setSettingsProvider(config);

    fsdk::ISettingsProviderPtr configTE = fsdk::createSettingsProvider("./data/trackengine.conf").getValue();
    configTE->setValue("detectors", "use-face-detector", USE_FACE_DETECTOR);
    configTE->setValue("detectors", "use-body-detector", USE_BODY_DETECTOR);

    // enable vlTracker, if there are many streams, because it's intended for multiple streams processing
    if (streamCount > 1) {
        configTE->setValue("other", "tracker-type", "vlTracker");
    }
#ifdef WITH_GPU
    // WARN! gpu supports only 'vlTracker' or 'none' tracker
    if (USE_GPU) {
        configTE->setValue("other", "tracker-type", "vlTracker");
    }
#endif

    auto trackEngine = tsdk::createTrackEngine(faceEngine, configTE).getValue();

    std::vector<fsdk::Ref<tsdk::IStream>> streamsList;
    std::vector<SuperObserver> observers(streamCount);
    std::vector<std::future<void>> threads;

    BatchedSuperObserver batchedSuperObserver;

    threads.reserve(streamCount);
    std::atomic<bool> stop;

    streamCaches.resize(streamCount, IMAGE_CACHE_SIZE);

    auto threadFunc = [&](int captureIndex){
        uint32_t index = 0;
        auto& capture = captures[captureIndex];
        cv::Mat frame; //current frame

        if(capture.isOpened()) {
            while (!stop && capture.read(frame)) {
                if (!usbCam)
                    index = static_cast<int>(capture.get(cv::CAP_PROP_POS_FRAMES));
                else
                    index++;

                if (!frame.empty()) {
                    const fsdk::Image cvImageCPUWrapper(frame.cols, frame.rows, fsdk::Format::R8G8B8, frame.data, false); // no copy, just wrapper

                    fsdk::Image image;
#ifdef WITH_GPU
                    if (USE_GPU) {
                        if (USE_IMAGE_CACHE) {
                            fsdk::Image cachedImage = getCachedImage(streamCaches[captureIndex], frame.cols, frame.rows, fsdk::Image::MemoryResidence::MemoryGPU);

                            cudaMemcpy(const_cast<void*>(cachedImage.getData()), const_cast<void*>(cvImageCPUWrapper.getData()),
                                cvImageCPUWrapper.getDataSize(), cudaMemcpyHostToDevice);
                        }
                        else {
                            image.create(cvImageCPUWrapper, fsdk::Image::MemoryResidence::MemoryGPU);
                        }
                    }
                    else
#endif
                    {
                        image = cvImageCPUWrapper.clone();
                    }
                    std::cout << "Image:" << image.getWidth() << "x" << image.getHeight() << " residence: " << static_cast<int>(image.getMemoryResidence()) << std::endl;
                    streamsList[captureIndex]->pushFrameWaitFor(image, index, nullptr, std::numeric_limits<uint32_t>::max());
                }

                if (index % 1000 == 0) {
                    if (!usbCam) {
                        const double frameCount = capture.get(cv::CAP_PROP_FRAME_COUNT);
                        const double framePos = capture.get(cv::CAP_PROP_POS_FRAMES);
                        std::cout << "stream " << captureIndex << " progress:" << (framePos / frameCount) * 100.0 << "%"
                                  << std::endl;
                    } else {
                        std::cout << "stream " << captureIndex << " progress:" << index << " frames" << std::endl;
                    }
                }
            }
            std::cout << "stream " << captureIndex << " ended" << std::endl;
            capture.release();
        } else {
            std::cout << "stream " << captureIndex << " is not opened" << std::endl;
        }
    };

    if (USE_BATCHED_OBSERVERS) {
        // set batched callbacks
        trackEngine->setBatchBestShotObserver(&batchedSuperObserver);
        trackEngine->setBatchVisualObserver(&batchedSuperObserver);
        trackEngine->setBatchDebugObserver(&batchedSuperObserver);
    }

    int observerIndex = 0;
    for (int i = 0; i < streamCount; i++) {
        // create stream
        fsdk::Ref<tsdk::IStream> stream = fsdk::acquire(trackEngine->createStream());
        observers[observerIndex].m_streamId = observerIndex;

        if (!USE_BATCHED_OBSERVERS) {
            // set per-stream callbacks
            stream->setBestShotObserver(&observers[observerIndex]);
            stream->setVisualObserver(&observers[observerIndex]);
            stream->setDebugObserver(&observers[observerIndex]);
        }

        // always per-stream predicates
        // NOTE: here we use "super" observers just to simplify code, actually, separate vector of predicates should be created
        stream->setBestShotPredicate(&observers[observerIndex]);
        stream->setVisualPredicate(&observers[observerIndex]);

        // by default all observers are enabled, this is just demonstration of api using
        stream->setObserverEnabled(tsdk::StreamObserverType::SOT_BEST_SHOT, true);
        stream->setObserverEnabled(tsdk::StreamObserverType::SOT_VISUAL, true);
        stream->setObserverEnabled(tsdk::StreamObserverType::SOT_DEBUG, true);

        streamsList.emplace_back(stream);

        threads.emplace_back(std::async(std::launch::async, threadFunc, i));
        std::cout << "stream " << i << " started" << std::endl;

        observerIndex++;
    }

    while (true) {
        bool notFinished = false;

        for (auto &thread: threads) {
            if (thread.wait_for(std::chrono::milliseconds(10)) == std::future_status::timeout)
                notFinished = true;
        }
        if (!notFinished)
            break;
    }

    // it's recommended to join each stream manually
    for (auto &stream : streamsList) {
        stream->join();
    }

    // this internally calls join for all streams (that wasn't joined yet) and stops processing
    trackEngine->stop();

    const std::chrono::high_resolution_clock::time_point now = std::chrono::high_resolution_clock::now();
    const std::chrono::milliseconds duration =
            std::chrono::duration_cast<std::chrono::milliseconds>(now - start);
    std::cout << "TOTAL DURATION: " << duration.count() << std::endl;
}