Python Mouse Control Remotely With Your Hand

#programming #softwaredevelopment #python #machinelearning

Normally, we use computer mouse but we can use it with our hand remotely.

In Python, this is possible by using some libraries such as mediapipe, pyautogui and opencv. Here, mediapipe provides us to track our hand points that we use for actions which are left, right and double clicks.

pyautogui is used for mouse events which are moving and click actions. But the project can be optimized well.

from concurrent.futures import ThreadPoolExecutor
from math import sqrt
import cv2
import mediapipe as mp
import psutil
from entities.mouse import Mouse
from win32api import GetSystemMetrics

grabSize = 50

pool = ThreadPoolExecutor(max_workers=psutil.cpu_count())
mp_hands = mp.solutions.hands

hands = mp_hands.Hands(static_image_mode=False,
                       max_num_hands=1,
                       min_detection_confidence=0.5,
                       min_tracking_confidence=0.5
                       )

mp_draw = mp.solutions.drawing_utils
mouse = Mouse(GetSystemMetrics(0), GetSystemMetrics(1))


def resolution(cap, w, h):
    cap.set(3, w)
    cap.set(4, h)


def isExist(value):
    return bool(value)


def getDistance(a, b, hands_landmarks, scale=False, w=0, h=0):
    dx = hands_landmarks[0].landmark[a].x - hands_landmarks[0].landmark[b].x
    dy = hands_landmarks[0].landmark[a].y - hands_landmarks[0].landmark[b].y
    if (not scale):
        return sqrt(dx ** 2 + dy ** 2)
    else:
        return sqrt((dx * w) ** 2 + (dy * h) ** 2)


def draw(frame, hands_landmarks):
    h, w, c = frame.shape

    distance_three_to_seventeen = round(getDistance(3, 17, hands_landmarks, True, w, h))
    distance_zero_to_ten = round(getDistance(0, 10, hands_landmarks, True, w, h))
    distance_left = getDistance(4, 8, hands_landmarks)
    distance_double_click = getDistance(4, 12, hands_landmarks)
    distance_point = getDistance(9, 0, hands_landmarks)
    distance_right = getDistance(16, 4, hands_landmarks)
    distance_width = getDistance(4, 20, hands_landmarks)
    distance_height = getDistance(0, 12, hands_landmarks)

    wrapped_frame = frame[int(distance_zero_to_ten):int(h - distance_zero_to_ten),
                    int(distance_three_to_seventeen):int(w - distance_three_to_seventeen)]


    wh, ww, wc = wrapped_frame.shape
    wx = w * (hands_landmarks[0].landmark[0].x + hands_landmarks[0].landmark[9].x) / 2 - distance_three_to_seventeen
    wy = h * (hands_landmarks[0].landmark[0].y + hands_landmarks[0].landmark[9].y) / 2 - distance_zero_to_ten

    if (wx < 0):
        wx = 1
    if (wy < 0):
        wy = 1

    try:
        x = round((wx) * (mouse._swidth / (ww)))
        y = round((wy) * (mouse._sheight / (wh)))
    except ZeroDivisionError:
        return

    mouse.move(mouse._swidth - x + (0 if (mouse._swidth - x) < mouse._swidth / 2 else distance_width), y +
               (0 if (mouse._sheight - y) < mouse._sheight / 2 else distance_height))

    if (distance_left * 100 < distance_point * 100 / 5):
        mouse.click()
    elif (distance_right * 100 < distance_point * 100 / 5):
        mouse.click(button="right")
    elif (distance_double_click * 100 < distance_point * 100 / 5):
        mouse.click(button="double")


def start(cam_code=0):
    cap = cv2.VideoCapture(cam_code)

    while (True):
        ret, frame = cap.read()
        if not ret:
            print("failed to grab frame")
            break
        frame = cv2.GaussianBlur(frame, (3, 3), 0)
        grappedImage = frame[grabSize:frame.shape[0] - grabSize, grabSize:frame.shape[1] - grabSize]

        rgb_img = cv2.cvtColor(grappedImage, cv2.COLOR_BGR2RGB)
        results = hands.process(rgb_img)
        hands_landmarks = results.multi_hand_landmarks

        if (isExist(hands_landmarks)):

            try:
                cv2.imshow("hand", grappedImage[int(grappedImage.shape[0] * hands_landmarks[0].landmark[12].y):
                                                int(grappedImage.shape[0] * hands_landmarks[0].landmark[12].y + int(
                                                    grappedImage.shape[0] * hands_landmarks[0].landmark[0].y) - int(
                                                    grappedImage.shape[0] * hands_landmarks[0].landmark[12].y)),
                                   int(grappedImage.shape[1] * hands_landmarks[0].landmark[20].x):int(
                                       grappedImage.shape[1] * hands_landmarks[0].landmark[20].x) + int(
                                       grappedImage.shape[1] * hands_landmarks[0].landmark[4].x) - int(
                                       grappedImage.shape[1] * hands_landmarks[0].landmark[20].x)])
            except:
                pass;
            pool.submit(draw, grappedImage, hands_landmarks)

        cv2.imshow("Hand tracer", grappedImage)
        if (cv2.waitKey(1) == ord('q')):
            break

    cap.release()
    cv2.destroyAllWindows()

cap.py is used for grabbing frames. Frames contains the values of pixels’ color which is RGB format and hands_landmarks contains our hands and its’ points. Thread pool is used here to make the render time fast but maybe, this is not good solution so we can improve this.

pool = ThreadPoolExecutor(max_workers=psutil.cpu_count())

The pool size of threads was specified this according to gpu’s power.

Euclidean distance is very important here because I used it to calculate the distance between hand marks. This is for mouse actions.

if (distance_left * 100 < distance_point * 100 / 5):
        mouse.click()
elif (distance_right * 100 < distance_point * 100 / 5):
        mouse.click(button="right")
elif (distance_double_click * 100 < distance_point * 100 / 5):
        mouse.click(button="double")

I specified a technique to make a decision. On the screen, the position of mouse is specified by scaling from hand point calculated to our computer screen position.

import pyautogui,time,threading


class Mouse():
    def __init__(self,screen_width,screen_height):
        self._swidth=screen_width;
        self._sheight=screen_height;

    def move(self,x,y):
       if(y>0 and x>0 and y<self._sheight and x<self._swidth):
        pyautogui.moveTo((x,y))

    def start_click(self,button):
      if(self._swidth>pyautogui.position()[0] and pyautogui.position()[1]<self._sheight):
       if(button=="left"):
        pyautogui.click(pyautogui.position()[0],pyautogui.position()[1])
       elif(button=="right"):
        pyautogui.click(pyautogui.position()[0],pyautogui.position()[1],button="right") 
       else:
        pyautogui.doubleClick(pyautogui.position()[0],pyautogui.position()[1])

    def click(self,button="left"):
      time.sleep(0.15)
      thread=threading.Thread(target=self.start_click,args=(button,),daemon=False)
      thread.start();

def move(self,x,y):
       if(y>0 and x>0 and y<self._sheight and x<self._swidth):
        pyautogui.moveTo((x,y))

The position taken from frame is moved there. We can see the other actions such as left and right click.

def cam_control(code):
   cap=__import__("cv2").VideoCapture(code);
   res,frame=cap.read()
   __import__("cv2").imshow("test",frame)
   __import__("cv2").destroyAllWindows();
   cap.release();

You can check if your camera is working

from core.cap import start

if __name__ == '__main__':
    start(0)

Finally, we can start the application. The application can be improved because there are some problem such as clicking itself or being difficult to click in some positions.

Python Computer Vision - Mouse Control Remotely
I showed you that It is possible to use mouse with olur handa. There is also a different choice that lt is eyes with…

Example Video:
https://www.youtube.com/shorts/8_0IBCoxzPg

https://github.com/gokhanergen-tech/Remote-Computer-Control-With-Hand