After all steps we need to combine trained RNN to real application which will recognize traffic signs from video stream.
Lets’s do it. Python program:
#!/usr/bin/env python ''' traffic signs detection with trained RNN traffic_ru_v1.pb USAGE: ts_detect_2.py [<video_source>] ''' # Python 2/3 compatibility from __future__ import print_function from __future__ import division # for puthon 2.7 to make result of division to be float import time # for measure time of procedure execution import os import random import cv2 import skimage.data import skimage.transform import skimage.exposure import matplotlib import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import numpy as np import tensorflow as tf import pandas as pd import math import imutils ################################################################################################### def findTS(pic): """ find traffic signs procedure """ #array of resized 32x32x3 images finded during search procedure images = [] rects = [] """ first step - preparing picture""" # read the picture image = cv2.imread(pic) #image = cv2.cvtColor(pic, cv2.COLOR_BGR2RGB) # define dimensions of image and center # measurement of picture starts from top left corner height, width = image.shape[:2] #print(str(height)+" "+str(width)) center_y = int(height/2) center_x = int(width/2) # define array of distance from center of image - it connected with area of contour # more distance from center - more bigger contour (look at the picture # test_1.jpg - 3 red squares shows this areas) dist_ = [center_x/3, center_x/2, center_x/1.5] # defining main interest zone of picture (left, right, top bottom borders) # this zone is approximate location of traffic sings # green zone at the picture test_1.jpg left_x = center_x - int(center_x*.7) right_x = width top_y = 0 bottom_y = center_y + int(center_y*.3) # crop zone of traffic signs location to search only in it crop_image = image[top_y:bottom_y, left_x:right_x] #cv2.imshow('img0',crop_image) # make canny image - first image for recognition of shapes # look at the test_1_crop_canny.jpg canny = cv2.Canny(crop_image, 50, 240) blur_canny = cv2.blur(canny,(2,2)) _,thresh_canny = cv2.threshold(blur_canny, 127, 255, cv2.THRESH_BINARY) # make color HSV image - second image for color mask recognition # Convert BGR to HSV hsv = cv2.cvtColor(crop_image, cv2.COLOR_BGR2HSV) # define the list of boundaries (lower and upper color space for # HSV # mask for red color consist from 2 parts (lower mask and upper mask) # lower mask (0-10) lower_red = np.array([0,50,50],np.uint8) upper_red = np.array([10,255,255],np.uint8) mask_red_lo = cv2.inRange(hsv, lower_red, upper_red) # upper mask (170-180) lower_red = np.array([160,50,50], np.uint8) upper_red = np.array([180,255,255], np.uint8) mask_red_hi = cv2.inRange(hsv, lower_red, upper_red) # blue color mask lower_blue=np.array([100,50,50],np.uint8) upper_blue=np.array([140,200,200],np.uint8) mask_blue = cv2.inRange(hsv, lower_blue, upper_blue) # yellow color mask lower_yellow=np.array([15,110,110],np.uint8) upper_yellow=np.array([25,255,255],np.uint8) mask_yellow = cv2.inRange(hsv, lower_yellow, upper_yellow) # join all masks # could be better to join yellow and red mask first - it can helps to detect # autumn trees and delete some amount of garbage, but this is TODO next mask = mask_red_lo+mask_red_hi+mask_yellow+mask_blue # find the colors within the specified boundaries and apply # the mask hsv_out = cv2.bitwise_and(hsv, hsv, mask = mask) # encrease brightness TODO later #h, s, v = cv2.split(hsv_out) #v += 50 #bright_hsv_out = cv2.merge((h, s, v)) #blurred image make lines from points and parts and increase quality (1-3,1-3) points blur_hsv_out = cv2.blur(hsv_out,(1,1)) # change from 1-3 to understand how it works # preparing HSV for countours - make gray and thresh gray = cv2.cvtColor(blur_hsv_out, cv2.COLOR_BGR2GRAY) # increasing intensity of finded colors with 0-255 value of threshold # look at the file test_1_hsv_binary to understand what the file thresh is _,thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY) # do not need to mix the file - it will be problem with contour recognition #dst = cv2.addWeighted(canny,0.3,thresh,0.7,0) #cv2.imshow('img1',thresh_canny) #cv2.imshow('img2',thresh) #cv2.waitKey(0) """step two - searching for contours in prepared images""" #calculating of finded candidates multiangles_n=0 # contours of the first image (thresh_canny) # cv2.RETR_TREE parameter shows all the contours internal and external image1,contours1,_= cv2.findContours(thresh_canny,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) #print("Contours total at first image: "+str(len(contours1))) #take only first biggest 15% of all elements #skipping small contours from tree branches etc. contours1 = sorted(contours1, key = cv2.contourArea, reverse = True)[:int(len(contours1)/6)] for cnt in contours1: # find perimeters of area - if it small and not convex - skipping perimeter = cv2.arcLength(cnt,True) if perimeter< 25 or cv2.isContourConvex=='False':#25 - lower - more objects higher-less continue #calculating rectangle parameters of contour (x,y),(w,h),angle = cv2.minAreaRect(cnt) # calculating koefficient between width and height to understand if shape is looks like traffic sign or not koeff_p = 0 if w>=h and h != 0: koeff_p = w/h elif w != 0: koeff_p = h/w if koeff_p > 2: # if rectangle is very thin then skip this contour continue # compute the center of the contour M = cv2.moments(cnt) cX = 0 cY = 0 if M["m00"] != 0: cX = int(M["m10"] / M["m00"]) cY = int(M["m01"] / M["m00"]) # transform cropped image coordinates to real image coordinates cX +=left_x cY +=top_y dist_c_p = math.sqrt(math.pow((center_x-cX),2) + math.pow((center_y-cY),2)) # skipping small contours close to the left and right sides of picture # remember res squares from test_1.jpg files? if dist_c_p > dist_[0] and dist_c_p <= dist_[1] and perimeter < 30: continue if dist_c_p > dist_[1] and dist_c_p <= dist_[2] and perimeter < 50: continue if dist_c_p > dist_[2] and perimeter < 70: continue # 0,15 - try to use different koefficient to better results approx_c = cv2.approxPolyDP(cnt,0.15*cv2.arcLength(cnt,True),True) #0,15 - lower - more objects higher-less if len(approx_c)>=3: # if contour has more then two angles... # calculating parameters of rectangle around contour to crop ROI of potential traffic sign x,y,w_b_rect,h_b_rect = cv2.boundingRect(cnt) #cv2.rectangle(image,(cX-int(w_b_rect/2)-10,cY-int(h_b_rect/2)-10),(cX+int(w_b_rect/2)+10,cY+int(h_b_rect/2)+10),(255,0,0),1) # put this ROI to images array for next recognition top_Y = cY-int(h_b_rect/2)-3 bot_Y = cY+int(h_b_rect/2)+3 left_X = cX-int(w_b_rect/2)-3 right_X = cX+int(w_b_rect/2)+3 if top_Y < 0: top_Y = 0 if bot_Y > height: bot_Y = height if left_X < 0: left_X = 0 if right_X > width: right_X = width #img_found = image[cY-int(h_b_rect/2)-3:cY+int(h_b_rect/2)+3, cX-int(w_b_rect/2)-3:cX+int(w_b_rect/2)+3] img_found = image[top_Y:bot_Y, left_X:right_X] img_resized = cv2.resize(img_found,(32,32)) #img_resized = imutils.resize(img_found, width=32, height=32) images.append(img_resized) # add image rects coordinates rects.append([cX-int(w_b_rect/2)-10,cY-int(h_b_rect/2)-10,cX+int(w_b_rect/2)+10,cY+int(h_b_rect/2)+10]) # save to the file - will be skip later TODO #cv2.imwrite("%recogn.jpg" % multiangles_n,image[cY-int(h_b_rect/2)-3:cY+int(h_b_rect/2)+3, cX-int(w_b_rect/2)-3:cX+int(w_b_rect/2)+3]) #increasing multiangles quantity multiangles_n+=1 # contours in second image (thresh) # in this picture we are only use RETR_EXTERNAL contours to avoid processing for example windows in yellow and red houses # and holes between plants etc image2,contours2,_= cv2.findContours(thresh,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE) #print("Contours total at second image: "+str(len(contours2))) # make first 10% biggest contours +- of elements contours2 = sorted(contours2, key = cv2.contourArea, reverse = True)[:int(len(contours2)/10)] for cnt in contours2: #calculating perimeter perimeter = cv2.arcLength(cnt,True) # if perimeter id too big or too small and is not convex skipping if perimeter>200 or perimeter<20 or cv2.isContourConvex=='False':#25 - lower - more objects higher-less continue #calculating rectangle parameters of contour (x,y),(w,h),angle = cv2.minAreaRect(cnt) # calculating koefficient between width and height to understand if shape is looks like traffic sign or not koeff_p = 0 if w>=h and h != 0: koeff_p = w/h elif w != 0: koeff_p = h/w if koeff_p > 2: # if rectangle is very thin then skip this contour continue # compute the center of the contour M = cv2.moments(cnt) cX = 0 cY = 0 if M["m00"] != 0: cX = int(M["m10"] / M["m00"]) cY = int(M["m01"] / M["m00"]) # transform cropped image coordinates to real image coordinates cX +=left_x cY +=top_y dist_c_p = math.sqrt(math.pow((center_x-cX),2) + math.pow((center_y-cY),2)) # skipping small contours close to the left and right sides of picture if dist_c_p > dist_[0] and dist_c_p <= dist_[1] and perimeter < 30: continue if dist_c_p > dist_[1] and dist_c_p <= dist_[2] and perimeter < 50: continue if dist_c_p > dist_[2] and perimeter < 70: continue approx_c = cv2.approxPolyDP(cnt,0.03*cv2.arcLength(cnt,True),True) #0,03 - lower - more objects higher-less if len(approx_c)>=3: x,y,w_b_rect,h_b_rect = cv2.boundingRect(cnt) #cv2.rectangle(image,(cX-int(w_b_rect/2)-10,cY-int(h_b_rect/2)-10),(cX+int(w_b_rect/2)+10,cY+int(h_b_rect/2)+10),(0,255,0),1) top_Y = cY-int(h_b_rect/2)-3 bot_Y = cY+int(h_b_rect/2)+3 left_X = cX-int(w_b_rect/2)-3 right_X = cX+int(w_b_rect/2)+3 if top_Y < 0: top_Y = 0 if bot_Y > height: bot_Y = height if left_X < 0: left_X = 0 if right_X > width: right_X = width #img_found = image[cY-int(h_b_rect/2)-3:cY+int(h_b_rect/2)+3, cX-int(w_b_rect/2)-3:cX+int(w_b_rect/2)+3] img_found = image[top_Y:bot_Y, left_X:right_X] img_resized = cv2.resize(img_found,(32,32)) #img_resized = imutils.resize(img_found, width=32, height=32) images.append(img_resized) # add image rects coordinates rects.append([cX-int(w_b_rect/2)-10,cY-int(h_b_rect/2)-10,cX+int(w_b_rect/2)+10,cY+int(h_b_rect/2)+10]) #cv2.imwrite("%recogn.jpg" % multiangles_n,image[cY-int(h_b_rect/2)-3:cY+int(h_b_rect/2)+3, cX-int(w_b_rect/2)-3:cX+int(w_b_rect/2)+3]) multiangles_n+=1 print(str(multiangles_n) + ' founded multiangles') #cv2.imshow('img',image) #cv2.waitKey(0) #cv2.destroyAllWindows() images_np = np.array(images) return images_np, rects ################################################################################################### class VideoSynthBase(object): def __init__(self, size=None, noise=0.0, bg = None, **params): self.bg = None self.frame_size = (640, 480) if bg is not None: self.bg = cv2.imread(bg, 1) h, w = self.bg.shape[:2] self.frame_size = (w, h) if size is not None: w, h = map(int, size.split('x')) self.frame_size = (w, h) self.bg = cv2.resize(self.bg, self.frame_size) self.noise = float(noise) def render(self, dst): pass def read(self, dst=None): w, h = self.frame_size if self.bg is None: buf = np.zeros((h, w, 3), np.uint8) else: buf = self.bg.copy() self.render(buf) if self.noise > 0.0: noise = np.zeros((h, w, 3), np.int8) cv2.randn(noise, np.zeros(3), np.ones(3)*255*self.noise) buf = cv2.add(buf, noise, dtype=cv2.CV_8UC3) return True, buf def isOpened(self): return True ################################################################################################## class Book(VideoSynthBase): def __init__(self, **kw): super(Book, self).__init__(**kw) backGr = cv2.imread('graf1.png') fgr = cv2.imread('box.png') self.render = TestSceneRender(backGr, fgr, speed = 1) def read(self, dst=None): noise = np.zeros(self.render.sceneBg.shape, np.int8) cv2.randn(noise, np.zeros(3), np.ones(3)*255*self.noise) return True, cv2.add(self.render.getNextFrame(), noise, dtype=cv2.CV_8UC3) ################################################################################################## class Cube(VideoSynthBase): def __init__(self, **kw): super(Cube, self).__init__(**kw) self.render = TestSceneRender(cv2.imread('pca_test1.jpg'), deformation = True, speed = 1) def read(self, dst=None): noise = np.zeros(self.render.sceneBg.shape, np.int8) cv2.randn(noise, np.zeros(3), np.ones(3)*255*self.noise) return True, cv2.add(self.render.getNextFrame(), noise, dtype=cv2.CV_8UC3) ################################################################################################### class Chess(VideoSynthBase): def __init__(self, **kw): super(Chess, self).__init__(**kw) w, h = self.frame_size self.grid_size = sx, sy = 10, 7 white_quads = [] black_quads = [] for i, j in np.ndindex(sy, sx): q = [[j, i, 0], [j+1, i, 0], [j+1, i+1, 0], [j, i+1, 0]] [white_quads, black_quads][(i + j) % 2].append(q) self.white_quads = np.float32(white_quads) self.black_quads = np.float32(black_quads) fx = 0.9 self.K = np.float64([[fx*w, 0, 0.5*(w-1)], [0, fx*w, 0.5*(h-1)], [0.0,0.0, 1.0]]) self.dist_coef = np.float64([-0.2, 0.1, 0, 0]) self.t = 0 def draw_quads(self, img, quads, color = (0, 255, 0)): img_quads = cv2.projectPoints(quads.reshape(-1, 3), self.rvec, self.tvec, self.K, self.dist_coef) [0] img_quads.shape = quads.shape[:2] + (2,) for q in img_quads: cv2.fillConvexPoly(img, np.int32(q*4), color, cv2.LINE_AA, shift=2) def render(self, dst): t = self.t self.t += 1.0/30.0 sx, sy = self.grid_size center = np.array([0.5*sx, 0.5*sy, 0.0]) phi = pi/3 + sin(t*3)*pi/8 c, s = cos(phi), sin(phi) ofs = np.array([sin(1.2*t), cos(1.8*t), 0]) * sx * 0.2 eye_pos = center + np.array([cos(t)*c, sin(t)*c, s]) * 15.0 + ofs target_pos = center + ofs R, self.tvec = common.lookat(eye_pos, target_pos) self.rvec = common.mtx2rvec(R) self.draw_quads(dst, self.white_quads, (245, 245, 245)) self.draw_quads(dst, self.black_quads, (10, 10, 10)) classes = dict(chess=Chess, book=Book, cube=Cube) presets = dict( empty = 'synth:', lena = 'synth:bg=10ecogn.jpg:noise=0.1', chess = 'synth:class=chess:bg=10ecogn.jpg:noise=0.1:size=640x480', book = 'synth:class=book:bg=graf1.png:noise=0.1:size=640x480', cube = 'synth:class=cube:bg=pca_test1.jpg:noise=0.0:size=640x480' ) ################################################################################################# def draw_str(dst, target, s): x, y = target cv2.putText(dst, s, (x+1, y+1), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 0), thickness = 2, lineType=cv2.LINE_AA) cv2.putText(dst, s, (x, y), cv2.FONT_HERSHEY_PLAIN, 1.0, (255, 255, 255), lineType=cv2.LINE_AA) ################################################################################################# def clock(): return cv2.getTickCount() / cv2.getTickFrequency() ################################################################################################# def create_capture(source = 0, fallback = presets['chess']): '''source: <int> or '<int>|<filename>|synth [:<param_name>=<value> [:...]]' ''' source = str(source).strip() chunks = source.split(':') # handle drive letter ('c:', ...) if len(chunks) > 1 and len(chunks[0]) == 1 and chunks[0].isalpha(): chunks[1] = chunks[0] + ':' + chunks[1] del chunks[0] source = chunks[0] try: source = int(source) except ValueError: pass params = dict( s.split('=') for s in chunks[1:] ) cap = None if source == 'synth': Class = classes.get(params.get('class', None), VideoSynthBase) try: cap = Class(**params) except: pass else: cap = cv2.VideoCapture(source) if 'size' in params: w, h = map(int, params['size'].split('x')) cap.set(cv2.CAP_PROP_FRAME_WIDTH, w) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, h) if cap is None or not cap.isOpened(): print('Warning: unable to open video source: ', source) if fallback is not None: return create_capture(fallback, None) return cap ########################################################################################################## # preprocessing images ########################################################################################################## def pre_processing_single_img (img): img_y = cv2.cvtColor(img, (cv2.COLOR_BGR2YUV))[:,:,0] img_y = (img_y / 255.).astype(np.float32) #img_y = exposure.adjust_log(img_y) img_y = (skimage.exposure.equalize_adapthist(img_y,) - 0.5) img_y = img_y.reshape(img_y.shape + (1,)) return img_y def pre_processing(X): #print(X.shape) X_out = np.empty((X.shape[0],X.shape[1],X.shape[2],1)).astype(np.float32) #print(X_out.shape) i=0 for idx, img in enumerate(X): X_out[idx] = pre_processing_single_img(img) i+=1 if i%1000 == 0: print(i) return X_out ############################################################################################# #main ############################################################################################# if __name__ == '__main__': np.seterr(divide='ignore', invalid='ignore') import sys, getopt print(__doc__) args, video_src = getopt.getopt(sys.argv[1:], '') try: video_src = video_src[0] except: video_src = 0 args = dict(args) # Load trained model modelFullPath = '/home/tensorflow/python_prog/traffic_ru_aug/graph_lenet_saves/traffic_ru_v1.pb' #modell path BATCH_SIZE = 128 # Read names of sign classes from csv file sign_names = pd.read_csv("numbers_to_classes.csv").values[:, 1] # starting capturing videoobject cam = create_capture(video_src, fallback='synth:bg=10ecogn.jpg:noise=0.05') # Creates graph from saved *.pb. with tf.gfile.FastGFile(modelFullPath, 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) _ = tf.import_graph_def(graph_def, name='') while (cam.isOpened()): # executing till escape not pressed with tf.Session() as sess: #starting tensorflow session # initializing tensors x = sess.graph.get_tensor_by_name('images:0') y = sess.graph.get_tensor_by_name('labels:0') logits = sess.graph.get_tensor_by_name('logits:0') apply_dropout = sess.graph.get_tensor_by_name('apply_dropout:0') ret, img = cam.read() if ret == False: # id capture return nothing skipping and trying ones again continue # saving captured image cv2.imwrite("now.jpg", img) vis = img.copy() # startinf time measurement t = clock() # find candidates for traffic signs and rectangle coordinates for each candidate signs, rects = findTS("now.jpg") if(len(signs)<1): # if we have not found any candidate for traffic sign continue # preprocesing traffic sign candidates signs_p = pre_processing(signs) feed_dict_new = feed_dict={x: signs_p, apply_dropout: False} #predictions = sess.run(logits,feed_dict = feed_dict_new) # generating top 3 prediction with best approximation from best to worst top3_pred = sess.run([logits, tf.nn.top_k(logits, 3)], feed_dict=feed_dict_new) for i in range(len(signs)): if top3_pred[1][0][i][0] > 13: # if quality of best from top 3 prediction is good # drawing rectangle around finded with good possibility traffic sign cv2.rectangle(vis, (rects[i][0], rects[i][1]), (rects[i][2], rects[i][3]), (0, 255, 0), 2) # drawing class name of traffic sign draw_str(vis, (rects[i][0], rects[i][1]-10), 'TS class: ' + sign_names[top3_pred[1][1][i][0]]) dt = clock() - t draw_str(vis, (20, 20), 'time: %.1f ms' % (dt*1000)) cv2.imshow('ts_detect', vis) if cv2.waitKey(5) == 27: break #cv2.waitKey(5) sess.close() cam.release() cv2.destroyAllWindows()