DEV Community

loading...
Cover image for Golang: 4 points Image transformation using GoCV / OpenCV

Golang: 4 points Image transformation using GoCV / OpenCV

gkumarau profile image Gourav Kumar Originally published at gouravkumar.dev ・5 min read

In this post, I will cover the following:

  • What is 4-points OpenCV image transformation?
  • How to implement 4-points OpenCV image transformation in Golang using Gocv?
  • Demonstrate usage of OpenCV getPerspectiveTransform function in Golang.
  • Demonstrate how to do computer vision and image processing in Golang
  • Demonstrate how to do numerical processing in Golang using gonum

Prerequisite

  • Basic knowledge of Golang.
  • Golang & your favourite IDE installed on your machine.
  • OpenCV v4.3.0 or above & pkgconfig installed in your machine. On mac you can run the following command to install: brew install opencv & brew install pkgconfig
  • GoCV git repository cloned to your local computer in GOPATH directory. Follow the instructions to install & clone gocv from its official website

What is OpenCV?

OpenCV also called Open source computer vision library is a popular open-source software library with more than 2500 algorithms for computer vision and machine learning problems. These algorithms are used widely to detect faces or moving objects, to generate high-resolution 3D images etc. You can read more about OpenCV on their official website

Luckily, there are various libraries and packages available in different languages to access OpenCV library algorithms, so you don't need to implement them from scratch. GoCV is the implementation of OpenCV in Golang.

What is 4 point OpenCV image transformation?

4-point image transformation is a process to straighten an image by selecting four points (corners) in an Image. In the cover image, the green highlighted four-points are used to transform the image. OpenCV's getPerspectiveTransform() is the function that helps to achieve the image transformation.


Enough theory, let's code

Most of the code provided below has decent comments to understand what's happening. The complete code can also be found at Github.

Feel free to use the below code as-is or customise for personal/commercial use at your own risk. For any feedback or question, reach out to me on Linkedin or Twitter.

package main

import (
    "gocv.io/x/gocv"
    "gonum.org/v1/gonum/floats"
    "gonum.org/v1/gonum/mat"
    "image"
    "math"
)

func main() {
    // Create a new gocv window to show the original image.
    original_window := gocv.NewWindow("Original")

    // Use IMRead function to read the image in colored form
    // Change the image location here to your custom image.
    original_image := gocv.IMRead("samples/original.png", gocv.IMReadColor)

    // Show the original image in the gocv window.
    // Press Return/ Enter to proceed next.
    for {
        original_window.IMShow(original_image)
        if original_window.WaitKey(1) >= 0 {
            break
        }
    }

    // Create a 4 x 2 matrix which has 4 rows and two columns
    // Each row represents the coordinates of a point.
    // Column (0, 1) represents the X, Y coordinate of a point.
    // We need to pass 4 points i.e 4 corners of image) or
    // as I call it, 4 points of interest to the below Matrix.
    // Important, the order of rows doesn't matter. We will
    // calculate which point is Top-left, top-right, bottom-left,
    // bottom-right automatically.
    pts := mat.NewDense(4, 2, []float64{
        69, 145,
        97, 735,
        938, 723,
        971, 170,
    })
    // Obviously, the above 4-points are hard-coded here.
    // Check my other article where I explain how to find the
    // four points automatically. It's a bit complex process
    // but fun to learn.

    // Create a new gocv window to show the transformed image.
    tranformed_window := gocv.NewWindow("Transformed")

    // Call our custom function FourPointTransform() to
    // transform the image. This function expects the
    // original gocv image and points of interest matrix.
    transformed_image := FourPointTransform(original_image, pts)

    // Show the original image in the gocv window.
    // Press Return/ Enter to proceed next.
    for {
        tranformed_window.IMShow(transformed_image)
        if tranformed_window.WaitKey(1) >= 0 {
            break
        }
    }
}

Enter fullscreen mode Exit fullscreen mode

// Our custom function to perform 4-point transformation.
func FourPointTransform(img gocv.Mat, pts *mat.Dense) gocv.Mat{

    // We need to order the points so that we can find top-right,
    // top-left, bottom-right, bottom-left points.
    // The function orderPoints() is custom written and code is
    // provided in the same article.
    rect := orderPoints(pts)
    tl := rect.RawRowView(0)
    tr := rect.RawRowView(1)
    br := rect.RawRowView(2)
    bl := rect.RawRowView(3)

    // compute the width of the new image, which will be the
    // maximum distance between bottom-right and bottom-left
    // x-coordiates or the top-right and top-left x-coordinates
    widthA := math.Sqrt(math.Pow((br[0] - bl[0]), 2) + math.Pow((br[1] - bl[1]), 2))
    widthB := math.Sqrt(math.Pow((tr[0] - tl[0]), 2) + math.Pow((tr[1] - tl[1]), 2))
    maxWidth := int(math.Max(widthA, widthB))

    // compute the height of the new image, which will be the
    // maximum distance between the top-right and bottom-right
    // y-coordinates or the top-left and bottom-left y-coordinates
    heightA := math.Sqrt(math.Pow((tr[0] - br[0]),2) + math.Pow((tr[1] - br[1]), 2))
    heightB := math.Sqrt(math.Pow((tl[0] - bl[0]), 2) + math.Pow((tl[1] - bl[1]), 2))
    maxHeight := int(math.Max(heightA, heightB))

    // now that we have the dimensions of the new image, construct
    // the set of destination points to obtain a "birds eye view",
    // (i.e. top-down view) of the image, again specifying points
    // in the top-left, top-right, bottom-right, and bottom-left order
    dst := mat.NewDense(4, 2, []float64{
        0, 0,
        (float64(maxWidth) - 1), 0,
        (float64(maxWidth) - 1), (float64(maxHeight) - 1),
        0, (float64(maxHeight) - 1),
    })

    // Call the gocv's GetPerspectiveTransform() function and
    // WarpPerspective() function which does the magic of transforming
    // the image and writing it to destination.
    M := gocv.GetPerspectiveTransform(convertDenseToImagePoint(rect), convertDenseToImagePoint(dst))
    gocv.WarpPerspective(img, &img, M, image.Point{X: maxWidth, Y: maxHeight})

    // convertDenseToImagePoint() function is custom written, it converts
    // gonum matrix (*mat.Dense) -> []image.Point
    // This is very important as at this moment, gocv doesn't support
    // *mat.Dense directly and I did a lot of search and couldn't find
    // any easy solution except writing a convertor.

    return img
}

func convertDenseToImagePoint(pts *mat.Dense) []image.Point {
    var sd []image.Point

    r, c := pts.Dims()
    if (c !=2 ) {
        return sd
    }
    for i := 0; i < r; i++ {
        row := pts.RowView(i)
        sd = append(sd, image.Point{
            X: int(row.AtVec(0)),
            Y: int(row.AtVec(1)),
        })
    }
    return sd
}

Enter fullscreen mode Exit fullscreen mode
func orderPoints(pts *mat.Dense) *mat.Dense{
    // initialzie a list of coordinates that will be ordered
    // such that the first entry in the list is the top-left,
    // the second entry is the top-right, the third is the
    // bottom-right, and the fourth is the bottom-left

    rect := mat.NewDense(4, 2, nil)

    // the top-left point will have the smallest sum, whereas
    // the bottom-right point will have the largest sum
    sumMinIndex, sumMaxIndex := findMinMaxSumIndex(*pts)
    rect.SetRow(0, pts.RawRowView(sumMinIndex))
    rect.SetRow(2, pts.RawRowView(sumMaxIndex))

    // now, compute the difference between the points, the
    // top-right point will have the smallest difference,
    // whereas the bottom-left will have the largest difference
    diffMinIndex, diffMaxIndex := findMinMaxDiffIndex(*pts)
    rect.SetRow(1, pts.RawRowView(diffMinIndex))
    rect.SetRow(3, pts.RawRowView(diffMaxIndex))

    // return the ordered coordinates
    return rect
}
Enter fullscreen mode Exit fullscreen mode
func findMinMaxSumIndex(pts mat.Dense) (int, int){
    r, c := pts.Dims()

    maxIndex := 0
    maxValue := 0.0
    minIndex := 0
    minValue := 0.0

    for i := 0; i < r; i++ {
        row := pts.RowView(i)
        sum := 0.0
        for j := 0; j < c; j++ {
            sum += row.AtVec(j)
        }

        if (i == 0 ) {
            maxValue = sum
            minValue = sum
        }

        //Find max value and index
        if (sum > maxValue) {
            maxValue = sum
            maxIndex = i
        }
        //Find min value and index
        if (sum < minValue) {
            minValue = sum
            minIndex = i
        }
    }
    return minIndex, maxIndex
}

func findMinMaxDiffIndex(pts mat.Dense) (int, int){
    r, c := pts.Dims()

    maxIndex := 0
    maxValue := 0.0
    minIndex := 0
    minValue := 0.0

    for i := 0; i < r; i++ {
        row := pts.RowView(i)
        diff := row.AtVec(c - 1) //Do check c is not Zero or 1
        for j := c - 2; j >= 0; j-- {
            diff -= row.AtVec(j)
        }

        if (i == 0 ) {
            maxValue = diff
            minValue = diff
        }

        //Find max value and index
        if (diff > maxValue) {
            maxValue = diff
            maxIndex = i
        }
        //Find min value and index
        if (diff < minValue) {
            minValue = diff
            minIndex = i
        }
    }
    return minIndex, maxIndex
}
Enter fullscreen mode Exit fullscreen mode

Discussion (0)

pic
Editor guide