DEV Community

Xiao Ling
Xiao Ling

Posted on • Originally published at dynamsoft.com

How to Implement Camera Preview with Windows Media Foundation API in C++

In the previous article, we developed a LiteCam library for camera access on Linux. To extend its functionality to Windows, we will leverage the Media Foundation API.
This article explores how to use Media Foundation to access the camera on Windows, integrate it with the LiteCam library, and reuse the existing barcode scanning example code to build a camera-based barcode scanner for Windows.

Windows Camera Demo Video

Implementing Camera-Related Functions for Windows

Updating the Header File to Support Both Windows and Linux

To support both Windows and Linux, the Camera.h header file requires the following updates:

  • Include platform-specific headers:

    #ifdef _WIN32
    #include <windows.h>
    #include <mfapi.h>
    #include <mfidl.h>
    #include <mfobjects.h>
    #include <mfreadwrite.h>
    #include <wrl/client.h>
    #include <dshow.h>
    
    #elif __linux__
    #include <linux/videodev2.h>
    #include <fcntl.h>
    #include <unistd.h>
    #include <sys/ioctl.h>
    #include <sys/mman.h>
    
    struct Buffer
    {
        void *start;
        size_t length;
    };
    
    #endif
    
  • Define the CAMERA_API macro for platform-specific export visibility:

    #ifdef _WIN32
    #ifdef CAMERA_EXPORTS
    #define CAMERA_API __declspec(dllexport)
    #else
    #define CAMERA_API __declspec(dllimport)
    #endif
    #elif defined(__linux__) || defined(__APPLE__)
    #define CAMERA_API __attribute__((visibility("default")))
    #else
    #define CAMERA_API
    #endif
    
  • Modify the MediaTypeInfo and CaptureDeviceInfo structures to use appropriate string types:

    
    struct CAMERA_API MediaTypeInfo
    {
        uint32_t width;
        uint32_t height;
    #ifdef _WIN32
        wchar_t subtypeName[512]; 
    #else
        char subtypeName[512]; 
    #endif
    };
    
    struct CAMERA_API CaptureDeviceInfo
    {
    
    #ifdef _WIN32
        wchar_t friendlyName[512];
    #else
        char friendlyName[512]; 
    #endif
    };
    
  • Adjust pixel conversion logic in the ConvertYUY2ToRGB function:

    void ConvertYUY2ToRGB(const unsigned char *yuy2Data, unsigned char *rgbData, int width, int height)
    {
        int rgbIndex = 0;
        for (int i = 0; i < width * height * 2; i += 4)
        {
            unsigned char y1 = yuy2Data[i];
            unsigned char u = yuy2Data[i + 1];
            unsigned char y2 = yuy2Data[i + 2];
            unsigned char v = yuy2Data[i + 3];
    
    #ifdef _WIN32
            rgbData[rgbIndex++] = clamp(y1 + 1.772 * (u - 128), 0.0, 255.0);
            rgbData[rgbIndex++] = clamp(y1 - 0.344136 * (u - 128) - 0.714136 * (v - 128), 0.0, 255.0);
            rgbData[rgbIndex++] = clamp(y1 + 1.402 * (v - 128), 0.0, 255.0);
    
            rgbData[rgbIndex++] = clamp(y2 + 1.772 * (u - 128), 0.0, 255.0);
            rgbData[rgbIndex++] = clamp(y2 - 0.344136 * (u - 128) - 0.714136 * (v - 128), 0.0, 255.0);
            rgbData[rgbIndex++] = clamp(y2 + 1.402 * (v - 128), 0.0, 255.0);
    #else
            rgbData[rgbIndex++] = clamp(y1 + 1.402 * (v - 128), 0.0, 255.0);
            rgbData[rgbIndex++] = clamp(y1 - 0.344136 * (u - 128) - 0.714136 * (v - 128), 0.0, 255.0);
            rgbData[rgbIndex++] = clamp(y1 + 1.772 * (u - 128), 0.0, 255.0);
    
            rgbData[rgbIndex++] = clamp(y2 + 1.402 * (v - 128), 0.0, 255.0);
            rgbData[rgbIndex++] = clamp(y2 - 0.344136 * (u - 128) - 0.714136 * (v - 128), 0.0, 255.0);
            rgbData[rgbIndex++] = clamp(y2 + 1.772 * (u - 128), 0.0, 255.0);
    #endif
        }
    }
    

    The pixel order for red and blue channels is swapped between Windows and Linux.

  • Define the Camera class with platform-specific members and methods:

    class CAMERA_API Camera
    {
    public:
    #ifdef _WIN32
        Camera();
        ~Camera();
    #elif __linux__
        Camera() : fd(-1), frameWidth(640), frameHeight(480), buffers(nullptr), bufferCount(0) {}
        ~Camera() { Release(); }
    #endif
    
    private:
    #ifdef _WIN32
        void *reader;
    
        bool initialized;
        void InitializeMediaFoundation();
        void ShutdownMediaFoundation();
    #endif
    
    #ifdef __linux__
        int fd;
        Buffer *buffers;
        unsigned int bufferCount;
    
        bool InitDevice();
        void UninitDevice();
        bool StartCapture();
        void StopCapture();
    #endif
    
    };
    

Querying Cameras

Use the Media Foundation API to enumerate available cameras:

std::vector<CaptureDeviceInfo> ListCaptureDevices()
{
    HRESULT hr = S_OK;
    ComPtr<IMFAttributes> attributes;
    std::vector<CaptureDeviceInfo> devicesInfo;

    hr = MFCreateAttributes(&attributes, 1);
    if (FAILED(hr))
    {
        std::cerr << "Failed to create attributes." << std::endl;
        return devicesInfo;
    }

    hr = attributes->SetGUID(MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE, MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE_VIDCAP_GUID);
    if (FAILED(hr))
    {
        std::cerr << "Failed to set video capture device attribute." << std::endl;
        return devicesInfo;
    }

    UINT32 count = 0;
    IMFActivate **devices = nullptr;

    hr = MFEnumDeviceSources(attributes.Get(), &devices, &count);
    if (FAILED(hr) || count == 0)
    {
        std::cerr << "No video capture devices found." << std::endl;
        return devicesInfo;
    }

    for (UINT32 i = 0; i < count; ++i)
    {
        WCHAR *friendlyName = nullptr;
        UINT32 nameLength = 0;

        hr = devices[i]->GetAllocatedString(MF_DEVSOURCE_ATTRIBUTE_FRIENDLY_NAME, &friendlyName, &nameLength);
        if (SUCCEEDED(hr))
        {
            CaptureDeviceInfo info = {};
            wcsncpy(info.friendlyName, friendlyName, nameLength);
            devicesInfo.push_back(info);
            CoTaskMemFree(friendlyName);
        }
        devices[i]->Release();
    }

    CoTaskMemFree(devices);
    return devicesInfo;
}
Enter fullscreen mode Exit fullscreen mode

Explanation

  • Create an IMFAttributes object to specify the video capture device.
  • Enumerate video capture devices using MFEnumDeviceSources.
  • Retrieve the friendly name of each device using GetAllocatedString.

Opening a Camera

  1. Activate a specified camera by index:

    ComPtr<IMFMediaSource> mediaSource;
    hr = devices[cameraIndex]->ActivateObject(IID_PPV_ARGS(&mediaSource));
    for (UINT32 i = 0; i < count; i++)
        devices[i]->Release();
    CoTaskMemFree(devices);
    
    if (FAILED(hr))
        return false;
    
    ComPtr<IMFSourceReader> mfReader;
    hr = MFCreateSourceReaderFromMediaSource(mediaSource.Get(), nullptr, &mfReader);
    if (FAILED(hr))
        return false;
    

    The IMFSourceReader object is used to read video data from the camera.

  2. Configure video width, height, and pixel format. For example, YUY2 format with a frame size of 640x480:

    ComPtr<IMFMediaType> mediaType;
    hr = MFCreateMediaType(&mediaType);
    if (FAILED(hr))
        return false;
    
    hr = mediaType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
    hr = mediaType->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_YUY2);
    hr = MFSetAttributeSize(mediaType.Get(), MF_MT_FRAME_SIZE, frameWidth, frameHeight);
    
    if (SUCCEEDED(hr))
    {
        hr = mfReader->SetCurrentMediaType(MF_SOURCE_READER_FIRST_VIDEO_STREAM, nullptr, mediaType.Get());
    
        if (SUCCEEDED(hr))
        {
            reader = reinterpret_cast<void *>(mfReader.Detach());
            return true;
        }
    }
    

    After setting the media type, the IMFSourceReader object is stored in the reader member variable.

Capturing a Frame

  1. Read a sample from the camera:

    HRESULT hr;
    DWORD streamIndex, flags;
    LONGLONG timestamp;
    ComPtr<IMFSample> sample;
    FrameData frame;
    
    frame.width = frameWidth;
    frame.height = frameHeight;
    frame.rgbData = nullptr;
    
    IMFSourceReader *mfReader = reinterpret_cast<IMFSourceReader *>(reader);
    hr = mfReader->ReadSample(
        MF_SOURCE_READER_FIRST_VIDEO_STREAM,
        0,
        &streamIndex,
        &flags,
        &timestamp,
        &sample);
    
    if (FAILED(hr))
    {
        std::cerr << "Failed to read sample." << std::endl;
        return frame; 
    }
    
  2. Get the raw data from the sample and convert it to RGB888 format:

    if (sample)
    {
        ComPtr<IMFMediaBuffer> buffer;
        hr = sample->ConvertToContiguousBuffer(&buffer);
        if (FAILED(hr))
        {
            std::cerr << "Failed to convert sample to contiguous buffer." << std::endl;
            return frame; 
        }
    
        BYTE *rawData = nullptr;
        DWORD maxLength = 0, currentLength = 0;
        hr = buffer->Lock(&rawData, &maxLength, &currentLength);
        if (SUCCEEDED(hr))
        {
            frame.size = frameWidth * frameHeight * 3;
            frame.rgbData = new unsigned char[frame.size];
            if (!frame.rgbData)
            {
                std::cerr << "Failed to allocate memory for RGB data." << std::endl;
                return frame; 
            }
    
            ConvertYUY2ToRGB(rawData, frame.rgbData, frameWidth, frameHeight);
    
            buffer->Unlock();
        }
    }
    

Closing a Camera

Release the IMFSourceReader object and Media Foundation resources:

if (reader)
{
    ComPtr<IMFSourceReader> mfReader(static_cast<IMFSourceReader *>(reader));
    reader = nullptr;
}

if (initialized)
{
    MFShutdown();
    initialized = false;
}
Enter fullscreen mode Exit fullscreen mode

Implementing Display-Related Functions for Windows

Updating the Header File to Support Both Windows and Linux

To support cross-platform compatibility, the CameraPreview.h header file was updated as follows:

  • Define the CAMERA_API macro for Windows and Linux.

    #ifdef _WIN32
    #include <windows.h>
    #elif __linux__
    #include <X11/Xlib.h>
    #include <X11/Xutil.h>
    #elif __APPLE__
    #include <Cocoa/Cocoa.h>
    #endif
    
    #ifdef _WIN32
    #ifdef CAMERA_EXPORTS
    #define CAMERA_API __declspec(dllexport)
    #else
    #define CAMERA_API __declspec(dllimport)
    #endif
    #elif defined(__linux__) || defined(__APPLE__)
    #define CAMERA_API __attribute__((visibility("default")))
    #else
    #define CAMERA_API
    #endif
    
  • Add platform-specific window and rendering components:

    class CAMERA_API CameraWindow
    {
    
    private:
    
    #ifdef _WIN32
        static LRESULT CALLBACK WindowProc(HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM lParam);
    
        HWND hwnd;
        HDC hdc;
        WNDCLASS wc;
        HINSTANCE hInstance;
    #elif __linux__
        Display *display;
        Window window;
        GC gc; 
        Atom wmDeleteMessage;
    #endif
    };
    

Constructor and Destructor

The constructor initializes the window class and event callback. The destructor cleans up resources:

CameraWindow::CameraWindow(int w, int h, const std::string &t)
    : width(w), height(h), title(t), hwnd(nullptr), hdc(nullptr)
{

    hInstance = GetModuleHandle(nullptr);

    wc = {};
    wc.lpfnWndProc = WindowProc; 
    wc.hInstance = hInstance;
    wc.lpszClassName = "CameraWindowClass";
}

CameraWindow::~CameraWindow()
{
    if (hdc)
    {
        ReleaseDC(hwnd, hdc);
    }
    if (hwnd)
    {
        DestroyWindow(hwnd);
    }
    UnregisterClass("CameraWindowClass", hInstance);
}

LRESULT CALLBACK CameraWindow::WindowProc(HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM lParam)
{
    switch (uMsg)
    {
    case WM_DESTROY:
        PostQuitMessage(0);
        return 0;
    }
    return DefWindowProc(hwnd, uMsg, wParam, lParam);
}

Enter fullscreen mode Exit fullscreen mode

Creating a Window

Invoke CreateWindowEx to create a window and GetDC to get the device context:

bool CameraWindow::Create()
{
    if (!RegisterClass(&wc))
    {
        std::cerr << "Failed to register window class." << std::endl;
        return false;
    }

    hwnd = CreateWindowEx(
        0, "CameraWindowClass", title.c_str(), WS_OVERLAPPEDWINDOW,
        CW_USEDEFAULT, CW_USEDEFAULT, width, height,
        nullptr, nullptr, hInstance, nullptr);

    if (!hwnd)
    {
        std::cerr << "Failed to create window." << std::endl;
        return false;
    }

    hdc = GetDC(hwnd);
    return true;
}
Enter fullscreen mode Exit fullscreen mode

Showing the Window

Call ShowWindow to display the window:

void CameraWindow::Show()
{
    ShowWindow(hwnd, SW_SHOW);
}
Enter fullscreen mode Exit fullscreen mode

Processing a Keyboard Event

Capture keyboard input to exit the application:

bool CameraWindow::WaitKey(char key)
{
    MSG msg = {};
    while (PeekMessage(&msg, nullptr, 0, 0, PM_REMOVE))
    {
        TranslateMessage(&msg);
        DispatchMessage(&msg);

        if (msg.message == WM_QUIT)
        {
            return false; 
        }

        if (msg.message == WM_KEYDOWN)
        {
            char keyPressed = static_cast<char>(msg.wParam);

            if (key != '\0' && (keyPressed == key || keyPressed == std::toupper(key)))
            {
                return false; 
            }
        }
    }
    return true;
}
Enter fullscreen mode Exit fullscreen mode

Displaying a Camera Frame

Use StretchDIBits function to render the camera frame.

void CameraWindow::ShowFrame(const unsigned char *rgbData, int frameWidth, int frameHeight)
{
    if (!hdc || !rgbData)
        return;

    BITMAPINFO bmpInfo = {};
    bmpInfo.bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
    bmpInfo.bmiHeader.biWidth = frameWidth;
    bmpInfo.bmiHeader.biHeight = -frameHeight; 
    bmpInfo.bmiHeader.biPlanes = 1;
    bmpInfo.bmiHeader.biBitCount = 24; 
    bmpInfo.bmiHeader.biCompression = BI_RGB;

    StretchDIBits(
        hdc,
        0, 0, frameWidth, frameHeight, 
        0, 0, frameWidth, frameHeight,
        rgbData,                       
        &bmpInfo,                      
        DIB_RGB_COLORS,                
        SRCCOPY                        
    );
}
Enter fullscreen mode Exit fullscreen mode

Drawing Text on the Window

Draw text on the window using the TextOut function.

void CameraWindow::DrawText(const std::string &text, int x, int y, int fontSize, const Color &color)
{
    if (!hdc)
        return;

    SetTextColor(hdc, RGB(color.r, color.g, color.b));
    SetBkMode(hdc, TRANSPARENT);

    HFONT hFont = CreateFont(
        fontSize,                    
        0,                           
        0,                           
        0,                           
        FW_NORMAL,                  
        FALSE,                       
        FALSE,                       
        FALSE,                       
        DEFAULT_CHARSET,             
        OUT_DEFAULT_PRECIS,          
        CLIP_DEFAULT_PRECIS,         
        DEFAULT_QUALITY,             
        DEFAULT_PITCH | FF_DONTCARE, 
        "Arial");                    

    if (!hFont)
        return;

    HGDIOBJ oldFont = SelectObject(hdc, hFont);

    TextOut(hdc, x, y, text.c_str(), static_cast<int>(text.length()));

    SelectObject(hdc, oldFont);
    DeleteObject(hFont);
}

Enter fullscreen mode Exit fullscreen mode

Drawing Contours on the Window

Draw contours on the window using the MoveToEx and LineTo functions.

void CameraWindow::DrawContour(const std::vector<std::pair<int, int>> &points)
{
    if (!hdc || points.size() < 4)
        return;

    HPEN hPen = CreatePen(PS_SOLID, 2, RGB(0, 255, 0)); 
    HGDIOBJ oldPen = SelectObject(hdc, hPen);

    MoveToEx(hdc, points[0].first, points[0].second, nullptr);
    for (size_t i = 1; i < points.size(); ++i)
    {
        LineTo(hdc, points[i].first, points[i].second);
    }
    LineTo(hdc, points[0].first, points[0].second); 

    SelectObject(hdc, oldPen);
    DeleteObject(hPen);
}
Enter fullscreen mode Exit fullscreen mode

Building a Windows Barcode Scanner Application

To build the barcode scanner, no changes are needed for the barcode scanning logic. Follow these steps:

  1. Prepare the camera library and Dynamsoft C++ Barcode SDK for Windows.
  2. Update the CMakeLists.txt file to include the Windows-specific configuration.

    cmake_minimum_required(VERSION 3.10)
    project(BarcodeScanner)
    
    if(WIN32)
    
        if(CMAKE_BUILD_TYPE STREQUAL "Release")
            link_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../dist/lib/windows/release ${CMAKE_CURRENT_SOURCE_DIR}/../../../examples/10.x/sdk/platforms/win/lib)
        else()
            link_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../dist/lib/windows/debug ${CMAKE_CURRENT_SOURCE_DIR}/../../../examples/10.x/sdk/platforms/win/lib)
        endif()
    
        set(DBR_LIBS "DynamsoftCorex64" "DynamsoftLicensex64" "DynamsoftCaptureVisionRouterx64" "DynamsoftUtilityx64")
    elseif(UNIX)
        SET(CMAKE_CXX_FLAGS "-std=c++11 -O3 -Wl,-rpath=$ORIGIN")
        SET(CMAKE_INSTALL_RPATH "$ORIGIN")
        link_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../dist/lib/linux ${CMAKE_CURRENT_SOURCE_DIR}/../../../examples/10.x/sdk/platforms/linux)
        set(DBR_LIBS "DynamsoftCore" "DynamsoftLicense" "DynamsoftCaptureVisionRouter" "DynamsoftUtility" pthread)
    endif()
    
    # Create the executable
    add_executable(BarcodeScanner main.cpp)
    target_include_directories(BarcodeScanner PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../../dist/include ${CMAKE_CURRENT_SOURCE_DIR}/../../../examples/10.x/sdk/include)
    target_link_libraries(BarcodeScanner litecam ${DBR_LIBS})
    
    if(WIN32)
        if(CMAKE_BUILD_TYPE STREQUAL "Release")
            add_custom_command(TARGET BarcodeScanner POST_BUILD
            COMMAND ${CMAKE_COMMAND} -E copy_directory
            ${CMAKE_CURRENT_SOURCE_DIR}/../../dist/lib/windows/release   
            $<TARGET_FILE_DIR:BarcodeScanner>)
        else()
            add_custom_command(TARGET BarcodeScanner POST_BUILD
            COMMAND ${CMAKE_COMMAND} -E copy_directory
            ${CMAKE_CURRENT_SOURCE_DIR}/../../dist/lib/windows/debug   
            $<TARGET_FILE_DIR:BarcodeScanner>)
        endif()
    
        add_custom_command(TARGET BarcodeScanner POST_BUILD
        COMMAND ${CMAKE_COMMAND} -E copy_directory
        ${CMAKE_CURRENT_SOURCE_DIR}/../../../examples/10.x/sdk/platforms/win/bin/      
        $<TARGET_FILE_DIR:BarcodeScanner>)
    elseif(UNIX)
        add_custom_command(TARGET BarcodeScanner POST_BUILD
        COMMAND ${CMAKE_COMMAND} -E copy_directory
        ${CMAKE_CURRENT_SOURCE_DIR}/../../../examples/10.x/sdk/platforms/linux/      
        $<TARGET_FILE_DIR:BarcodeScanner>)
    endif()
    
  3. Build the application using CMake.

    mkdir build
    cd build
    cmake ..
    cmake --build .
    

    Windows Barcode Scanner

Source Code

https://github.com/yushulx/cmake-cpp-barcode-qrcode-mrz/tree/main/litecam

Top comments (0)