Xiao Ling

Posted on Dec 20, 2024 • Originally published at dynamsoft.com

Building a Lightweight C++ Camera Library for Barcode Scanning on Linux

#linux #camera #cpp #barcode

When searching for a camera SDK to develop desktop camera applications on Linux, OpenCV often comes to mind due to its simple APIs for camera access. However, the size of the OpenCV library is big, making it redundant for smaller projects, especially those that only require basic camera functionality. To avoid unnecessarily inflating the project size, a better approach is to build a lightweight C++ camera library from scratch as an alternative to OpenCV.

In this article, we will first use the Linux V4L2 API to build a access the camera. Then, we will combine the custom camera library with the Dynamsoft Barcode Reader SDK to build a barcode scanner on Linux.

Linux Camera Demo Video

Prequisites

CMake
GCC
Video4Linux2 (libv4l2)

X11 (libX11)

sudo apt update
sudo apt install cmake build-essential libv4l-dev libx11-dev

Designing the Camera API

Let's begin by considering the design of the camera API. The API should be simple and intuitive to use. Below are some essential functions the camera API should provide:

Query Cameras: Enumerate the available cameras and provide options for selecting a camera.
Open a Camera: Open a specific camera by its index.
Capture a Frame: Capture a frame from the camera for processing or viewing. The frame object should encapsulate image data, width, height, and pixel format.
Save a Frame to a File: Save a captured frame as an image file.
Close Camera: Safely release the camera resources when no longer needed.

Implementing Camera-Related Functions

Querying Cameras

Define a structure to store camera information:

struct CAMERA_API CaptureDeviceInfo
{
    char friendlyName[512];
};

Create a function to enumerate video capture devices (e.g., webcams) available on a Linux system using the Video4Linux2 (V4L2) API：


CAMERA_API std::vector<CaptureDeviceInfo> ListCaptureDevices()
{
    std::vector<CaptureDeviceInfo> devices;

    for (int i = 0; i < 10; ++i) 
    {
        std::string devicePath = "/dev/video" + std::to_string(i);

        int fd = open(devicePath.c_str(), O_RDWR | O_NONBLOCK, 0);
        if (fd == -1)
        {
            continue; 
        }

        struct v4l2_capability cap;
        if (ioctl(fd, VIDIOC_QUERYCAP, &cap) == 0)
        {
            if (cap.capabilities & V4L2_CAP_VIDEO_CAPTURE)
            {
                CaptureDeviceInfo deviceInfo = {};

                strncpy(deviceInfo.friendlyName, reinterpret_cast<const char *>(cap.card), sizeof(deviceInfo.friendlyName) - 1);

                deviceInfo.friendlyName[sizeof(deviceInfo.friendlyName) - 1] = '\0';

                devices.push_back(deviceInfo);
            }
        }

        close(fd);
    }

    return devices;
}

Explanation

The ListCaptureDevices function iterates through /dev/video* devices to find available cameras.
It uses the ioctl system call with the VIDIOC_QUERYCAP command to query the capabilities of each device.
The cap.card field contains the device's name, which is stored in friendlyName.

Opening a Camera

Construct a device path and open it:

std::string devicePath = "/dev/video" + std::to_string(cameraIndex);

fd = open(devicePath.c_str(), O_RDWR);
if (fd < 0)
{
    perror("Error opening video device");
    return false;
}

Configure video width, height, and pixel format:

struct v4l2_format fmt;
memset(&fmt, 0, sizeof(fmt));
fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
fmt.fmt.pix.width = frameWidth;
fmt.fmt.pix.height = frameHeight;
fmt.fmt.pix.pixelformat = V4L2_PIX_FMT_YUYV;

if (ioctl(fd, VIDIOC_S_FMT, &fmt) < 0)
{
    perror("Error setting resolution");
    return false;
}

Prepares the device by requesting buffers, mapping them into memory, and queuing them:

bool Camera::InitDevice()
{
    struct v4l2_requestbuffers req;
    memset(&req, 0, sizeof(req));
    req.count = 4;
    req.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
    req.memory = V4L2_MEMORY_MMAP;

    if (ioctl(fd, VIDIOC_REQBUFS, &req) < 0)
    {
        perror("Failed to request buffers");
        return false;
    }

    buffers = new Buffer[req.count];
    bufferCount = req.count;

    for (unsigned int i = 0; i < bufferCount; ++i)
    {
        struct v4l2_buffer buf;
        memset(&buf, 0, sizeof(buf));
        buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
        buf.memory = V4L2_MEMORY_MMAP;
        buf.index = i;

        if (ioctl(fd, VIDIOC_QUERYBUF, &buf) < 0)
        {
            perror("Failed to query buffer");
            return false;
        }

        buffers[i].length = buf.length;
        buffers[i].start = mmap(nullptr, buf.length, PROT_READ | PROT_WRITE, MAP_SHARED, fd, buf.m.offset);

        if (buffers[i].start == MAP_FAILED)
        {
            perror("Failed to map buffer");
            return false;
        }

        if (ioctl(fd, VIDIOC_QBUF, &buf) < 0)
        {
            perror("Failed to queue buffer");
            return false;
        }
    }
    return true;
}

Starts the video capture stream by enabling the capture mode：

bool Camera::StartCapture()
{
    enum v4l2_buf_type type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
    return ioctl(fd, VIDIOC_STREAMON, &type) >= 0;
}

Capturing a Frame

Dequeue a buffer to retrieve the frame data:

struct v4l2_buffer buf;
memset(&buf, 0, sizeof(buf));
buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
buf.memory = V4L2_MEMORY_MMAP;

if (ioctl(fd, VIDIOC_DQBUF, &buf) < 0)
{
    perror("Failed to dequeue buffer");
    return {};
}

Store the RGB frame data along with its dimensions and size:

struct FrameData
{
    unsigned char *rgbData;
    int width;
    int height;
    int size;
};

FrameData frame;
frame.width = frameWidth;
frame.height = frameHeight;
frame.size = frameWidth * frameHeight * 3; 
frame.rgbData = new unsigned char[frame.size];

Convert the raw YUYV format to RGB:

int rgbIndex = 0;
for (int i = 0; i < width * height * 2; i += 4)
{
    // Extract YUV values
    unsigned char y1 = yuy2Data[i];
    unsigned char u = yuy2Data[i + 1];
    unsigned char y2 = yuy2Data[i + 2];
    unsigned char v = yuy2Data[i + 3];

    rgbData[rgbIndex++] = clamp(y1 + 1.402 * (v - 128), 0.0, 255.0);
    rgbData[rgbIndex++] = clamp(y1 - 0.344136 * (u - 128) - 0.714136 * (v - 128), 0.0, 255.0);
    rgbData[rgbIndex++] = clamp(y1 + 1.772 * (u - 128), 0.0, 255.0);

    // Convert second pixel (Y2, U, V) to RGB
    rgbData[rgbIndex++] = clamp(y2 + 1.402 * (v - 128), 0.0, 255.0);
    rgbData[rgbIndex++] = clamp(y2 - 0.344136 * (u - 128) - 0.714136 * (v - 128), 0.0, 255.0);
    rgbData[rgbIndex++] = clamp(y2 + 1.772 * (u - 128), 0.0, 255.0);
}

Requeue the buffer for reuse:

if (ioctl(fd, VIDIOC_QBUF, &buf) < 0)
{
    perror("Failed to queue buffer");
}

Saving a Frame to a JPEG File

Use the stb_image_write.h library to save the frame data:

#define STB_IMAGE_WRITE_IMPLEMENTATION
#include "stb_image_write.h"
void saveFrameAsJPEG(const unsigned char *data, int width, int height, const std::string &filename)
{
    if (stbi_write_jpg(filename.c_str(), width, height, 3, data, 90))
    {
        std::cout << "Saved frame to " << filename << std::endl;
    }
    else
    {
        std::cerr << "Error saving frame as JPEG." << std::endl;
    }
}

Closing a Camera

Stop the video capture stream:

void Camera::StopCapture()
{
    enum v4l2_buf_type type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
    ioctl(fd, VIDIOC_STREAMOFF, &type);
}

Unmap the buffers:

void Camera::UninitDevice()
{
    if (buffers)
    {
        for (unsigned int i = 0; i < bufferCount; ++i)
        {
            munmap(buffers[i].start, buffers[i].length);
        }
        delete[] buffers;
        buffers = nullptr;
    }
}

Close the device:

void Camera::Close()
{
    if (fd >= 0)
    {
        close(fd);
        fd = -1;
    }
}

Capturing and Saving Frames in a Command-Line Application

Finally, write a simple program to capture and save camera frames:


#include <iostream>
#include "Camera.h"

int main()
{
    Camera camera;

    std::cout << "Capturing frames...\n";
    if (camera.Open(0))
    {
        for (int i = 0; i < 2; ++i)
        {
            FrameData frame = camera.CaptureFrame();
            if (frame.rgbData)
            {
                std::string filename = "frame_" + std::to_string(i) + ".jpg";
                saveFrameAsJPEG(frame.rgbData, camera.frameWidth, camera.frameHeight, filename);
            }

            ReleaseFrame(frame);
            sleep(1);
        }
    }
    return 0;
}

Displaying Camera Frames in Real-Time

To enhance the user experience, we can display camera frames in real-time using the X11 library. Inspired by the OpenCV API, the following functions are defined:

Create(): Create a window.

CameraWindow::CameraWindow(int w, int h, const std::string &t)
: width(w), height(h), title(t), display(nullptr), window(0), gc(nullptr)
{
}

CameraWindow::~CameraWindow()
{
    if (gc)
    {
        XFreeGC(display, gc);
    }
    if (window)
    {
        XDestroyWindow(display, window);
    }
    if (display)
    {
        XCloseDisplay(display);
    }
}

bool CameraWindow::Create()
{
    display = XOpenDisplay(nullptr);
    if (!display)
    {
        std::cerr << "Failed to open X display." << std::endl;
        return false;
    }

    int screen = DefaultScreen(display);
    Window rootWindow = RootWindow(display, screen);

    window = XCreateSimpleWindow(
        display, rootWindow,
        0, 0, width, height, 1,
        BlackPixel(display, screen),
        WhitePixel(display, screen));

    if (!window)
    {
        std::cerr << "Failed to create window." << std::endl;
        return false;
    }

    XStoreName(display, window, title.c_str());

    gc = XCreateGC(display, window, 0, nullptr);
    if (!gc)
    {
        std::cerr << "Failed to create graphics context." << std::endl;
        return false;
    }

    wmDeleteMessage = XInternAtom(display, "WM_DELETE_WINDOW", False);
    XSetWMProtocols(display, window, &wmDeleteMessage, 1);

    XSelectInput(display, window, ExposureMask | KeyPressMask | StructureNotifyMask);
    XMapWindow(display, window);

    return true;
}

Show(): Display the window.

void CameraWindow::Show()
{
    XFlush(display);
}

ShowFrame(): Render a frame in the window. The input frame data is in RGB888, which is converted to RGBA8888 for X11 display.

void CameraWindow::ShowFrame(const unsigned char *rgbData, int frameWidth, int frameHeight)
{
    if (!display || !window || !gc || !rgbData)
    {
        std::cerr << "Invalid display, window, gc, or rgbData." << std::endl;
        return;
    }

    int stride = frameWidth * 4; 
    std::vector<unsigned char> rgbaBuffer(frameWidth * frameHeight * 4);

    for (int i = 0; i < frameWidth * frameHeight; ++i)
    {
        rgbaBuffer[i * 4 + 0] = rgbData[i * 3 + 2]; 
        rgbaBuffer[i * 4 + 1] = rgbData[i * 3 + 1]; 
        rgbaBuffer[i * 4 + 2] = rgbData[i * 3 + 0]; 
        rgbaBuffer[i * 4 + 3] = 255;                
    }

    XImage *image = XCreateImage(
        display,
        DefaultVisual(display, DefaultScreen(display)),
        24,      
        ZPixmap, 
        0,       
        reinterpret_cast<char *>(rgbaBuffer.data()),
        frameWidth, frameHeight,
        32, 
        0   
    );

    if (!image)
    {
        std::cerr << "Failed to create XImage." << std::endl;
        return;
    }

    XPutImage(display, window, gc, image, 0, 0, 0, 0, frameWidth, frameHeight);
    XFlush(display);
    image->data = nullptr; 
    XDestroyImage(image);
}

WaitKey(): Wait for a key press event.

bool CameraWindow::WaitKey(char key)
{
    if (!display || !window)
        return false;

    while (XPending(display) > 0)
    {
        XEvent event;
        XNextEvent(display, &event);

        if (event.type == ClientMessage && (Atom)event.xclient.data.l[0] == wmDeleteMessage)
        {
            return false; 
        }

        if (event.type == KeyPress)
        {
            char buffer[32] = {0};
            KeySym keysym;
            int len = XLookupString(&event.xkey, buffer, sizeof(buffer), &keysym, nullptr);

            if (len > 0 && buffer[0] != '\0')
            {
                if (key == '\0') 
                {
                    return false;
                }
                else if (buffer[0] == key || buffer[0] == std::toupper(key))
                {
                    return false; 
                }
            }
        }
    }

    return true; 
}

DrawContour(): Draw contours based on input points.

void CameraWindow::DrawContour(const std::vector<std::pair<int, int>> &points)
{
    if (!display || !gc || !window || points.size() < 2)
        return;

    XSetForeground(display, gc, 0x00FF00); 

    for (size_t i = 0; i < points.size(); ++i)
    {
        const auto &start = points[i];
        const auto &end = points[(i + 1) % points.size()];
        XDrawLine(display, window, gc, start.first, start.second, end.first, end.second);
    }

    XFlush(display);
}

DrawText(): Render text on the window.

void CameraWindow::DrawText(const std::string &text, int x, int y, int fontSize, const Color &color)
{
    if (!gc || !display || !window)
        return;

    XSetForeground(display, gc, (color.r << 16) | (color.g << 8) | color.b);

    XFontStruct *font = XLoadQueryFont(display, "-*-helvetica-*-r-*-*-12-*-*-*-*-*-*-*");
    if (!font)
    {
        font = XLoadQueryFont(display, "fixed");
    }
    if (!font)
    {
        std::cerr << "Failed to load font." << std::endl;
        return;
    }

    XSetFont(display, gc, font->fid);
    XDrawString(display, window, gc, x, y, text.c_str(), text.size());
    XFreeFont(display, font);
}

Full Example Code

#include "Camera.h"
#include "CameraPreview.h"

int main()
{
    Camera camera;
    if (!camera.Open(0)) {
        std::cerr << "Failed to open camera." << std::endl;
        return -1;
    }

    CameraWindow window(640, 480, "LiteCam Preview");
    if (!window.Create()) {
        std::cerr << "Failed to create window." << std::endl;
        return -1;
    }

    window.Show();

    while (window.WaitKey('q')) {
        FrameData frame = camera.CaptureFrame();
        if (frame.rgbData) {
            window.ShowFrame(frame.rgbData, frame.width, frame.height);
            ReleaseFrame(frame);
        }
    }

    camera.Release();
    return 0;
}

Building a Linux Barcode Scanner Application

After implementing the camera library, you can now build a barcode scanner application using the Dynamsoft Barcode Reader SDK. This application will continuously capture frames from the camera and decode barcodes in real-time.

Steps to Build the Application

Obtain a valid license key and download the Dynamsoft C++ Barcode SDK.
Create a new C++ project and add the camera library and Dynamsoft Barcode Reader SDK to the project.

Configure the CMakeLists.txt to link the required libraries.

cmake_minimum_required(VERSION 3.10)
project(BarcodeScanner)

SET(CMAKE_CXX_FLAGS "-std=c++11 -O3 -Wl,-rpath=$ORIGIN")
SET(CMAKE_INSTALL_RPATH "$ORIGIN")
link_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../dist/lib/linux ${CMAKE_CURRENT_SOURCE_DIR}/../../../examples/10.x/sdk/platforms/linux)
set(DBR_LIBS "DynamsoftCore" "DynamsoftLicense" "DynamsoftCaptureVisionRouter" "DynamsoftUtility" pthread)

add_executable(BarcodeScanner main.cpp)
target_include_directories(BarcodeScanner PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../../dist/include ${CMAKE_CURRENT_SOURCE_DIR}/../../../examples/10.x/sdk/include)
target_link_libraries(BarcodeScanner litecam ${DBR_LIBS})

add_custom_command(TARGET BarcodeScanner POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_directory
${CMAKE_CURRENT_SOURCE_DIR}/../../../examples/10.x/sdk/platforms/linux/      
$<TARGET_FILE_DIR:BarcodeScanner>)

Use the following code to integrate the Dynamsoft Barcode Reader SDK with the camera library:

#include <iostream>
#include <vector>
#include <mutex>

#include "template.h"
#include "DynamsoftCaptureVisionRouter.h"
#include "DynamsoftUtility.h"
#include "Camera.h"
#include "CameraPreview.h"

using namespace dynamsoft::license;
using namespace dynamsoft::cvr;
using namespace dynamsoft::dbr;
using namespace dynamsoft::utility;
using namespace dynamsoft::basic_structures;

int main()
{
    int iRet = -1;
    char szErrorMsg[256];
    iRet = CLicenseManager::InitLicense("LICENSE-KEY", szErrorMsg, 256);
    if (iRet != EC_OK)
    {
        std::cout << szErrorMsg << std::endl;
    }
    int errorCode = 1;
    char errorMsg[512] = {0};

    CCaptureVisionRouter *cvr = new CCaptureVisionRouter;
    errorCode = cvr->InitSettings(jsonString.c_str(), errorMsg, 512);
    if (errorCode != EC_OK)
    {
        std::cout << "error:" << errorMsg << std::endl;
        return -1;
    }

    Camera camera;

    if (camera.Open(0))
    {
        CameraWindow window(camera.frameWidth, camera.frameHeight, "Camera Stream");
        if (!window.Create())
        {
            std::cerr << "Failed to create window." << std::endl;
            return -1;
        }

        window.Show();

        CameraWindow::Color textColor = {255, 0, 0};

        while (window.WaitKey('q'))
        {
            FrameData frame = camera.CaptureFrame();

            if (frame.rgbData)
            {
                window.ShowFrame(frame.rgbData, frame.width, frame.height);

                CImageData data(frame.size,
                                frame.rgbData,
                                frame.width,
                                frame.height,
                                frame.width * 3,
                                IPF_RGB_888,
                                0, 0);
                CCapturedResult *result = cvr->Capture(&data, "");

                if (result)
                {
                    CDecodedBarcodesResult *barcodeResult = result->GetDecodedBarcodesResult();
                    if (barcodeResult)
                    {
                        int count = barcodeResult->GetItemsCount();
                        for (int i = 0; i < count; i++)
                        {
                            const CBarcodeResultItem *barcodeResultItem = barcodeResult->GetItem(i);
                            CPoint *points = barcodeResultItem->GetLocation().points;

                            std::vector<std::pair<int, int>> corners = {
                                {points[0][0], points[0][1]},
                                {points[1][0], points[1][1]},
                                {points[2][0], points[2][1]},
                                {points[3][0], points[3][1]}};

                            window.DrawContour(corners);

                            window.DrawText(barcodeResultItem->GetText(), points[0][0], points[0][1], 24, textColor);
                        }
                    }

                    result->Release();
                }
            }

            ReleaseFrame(frame);
        }

        camera.Release();
    }

    return 0;
}

Note: Replace LICENSE-KEY with your valid license key.

Build and run the application.

mkdir build
cd build
cmake ..
cmake --build .

Source Code

https://github.com/yushulx/cmake-cpp-barcode-qrcode-mrz/tree/main/litecam

DEV Community