Hello; I am new to tensorflow and machine learning in general.
I have been following this tutorial:
and I was wondering if someone could help me with utilizing the model I trained to be able to detect in real time. In the tutorial they use a web cam, but I would like to use a screen capture method so my bot can detect certain images from a web browser.
In the tutorial he used this code for webcam real time detection
# Webcam Capture Code
cap = cv2.VideoCapture(1)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
while cap.isOpened():
ret, frame = cap.read()
image_np = np.array(frame)
input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32)
detections = detect_fn(input_tensor)
num_detections = int(detections.pop('num_detections'))
detections = {key: value[0, :num_detections].numpy()
for key, value in detections.items()}
detections['num_detections'] = num_detections
# detection_classes should be ints.
detections['detection_classes'] = detections['detection_classes'].astype(np.int64)
label_id_offset = 1
image_np_with_detections = image_np.copy()
viz_utils.visualize_boxes_and_labels_on_image_array(
image_np_with_detections,
detections['detection_boxes'],
detections['detection_classes']+label_id_offset,
detections['detection_scores'],
category_index,
use_normalized_coordinates=True,
max_boxes_to_draw=5,
min_score_thresh=.8,
agnostic_mode=False)
cv2.imshow('object detection', cv2.resize(image_np_with_detections, (800, 600)))
if cv2.waitKey(10) & 0xFF == ord('q'):
cap.release()
cv2.destroyAllWindows()
break
I found this screen capture method using python. I thought I could utilize them together but I am not aware how.
#Grab Screen
import time
import cv2
import mss
import numpy
title = "FPS benchmark"
start_time = time.time()
display_time = 2 # displays the frame rate every 2 second
fps = 0
sct = mss.mss()
# Set monitor size to capture
monitor = {"top": 0, "left": 0, "width": 800, "height": 640}
def screen_recordMSS():
global fps, start_time
while True:
# Get raw pixels from the screen, save it to a Numpy array
img = numpy.array(sct.grab(monitor))
# to get real color we do this:
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
cv2.imshow(title, cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
fps+=1
TIME = time.time() - start_time
if (TIME) >= display_time :
print("FPS: ", fps / (TIME))
fps = 0
start_time = time.time()
# Press "q" to quit
if cv2.waitKey(25) & 0xFF == ord("q"):
cv2.destroyAllWindows()
break
screen_recordMSS()
Either way I appreciate your help, and even though I am a beginner I find this technology very interesting.