1. 일련의 모아진 데이터들로 knn을 위한 훈련용 사진 만들기
# -*- encoding: cp949 -*- #현재 파일 경로 : file_path = "%s%s" % (os.path.dirname(os.path.realpath(__file__))+"\\", filename) import os import Image b = Image.new('RGB', (2000,2240), (255, 255, 255)) cnt = 0 y = 0 for all_dirname, inner_dirnames, all_filenames in os.walk('train'): for i in inner_dirnames: for a,dir,f in os.walk('train\\'+i): x = 0 for file in f: if file != 'Thumbs.db': print x*20,y*20 if x==100: x=0 y += 1 b.paste(Image.open('train\\'+i+'\\'+file),(x*20,y*20)) cnt += 1 if cnt==800: cnt = 0 break x += 1 b.save('zero2.png')
2. hog관련 코드.
import cv2 import numpy as np SZ=20 bin_n = 16 # Number of bins svm_params = dict( kernel_type = cv2.SVM_LINEAR, svm_type = cv2.SVM_C_SVC, C=2.67, gamma=5.383 ) affine_flags = cv2.WARP_INVERSE_MAP|cv2.INTER_LINEAR def deskew(img): m = cv2.moments(img) if abs(m['mu02']) < 1e-2: return img.copy() skew = m['mu11']/m['mu02'] M = np.float32([[1, skew, -0.5*SZ*skew], [0, 1, 0]]) img = cv2.warpAffine(img,M,(SZ, SZ),flags=affine_flags) return img def hog(img): gx = cv2.Sobel(img, cv2.CV_32F, 1, 0) gy = cv2.Sobel(img, cv2.CV_32F, 0, 1) mag, ang = cv2.cartToPolar(gx, gy) bins = np.int32(bin_n*ang/(2*np.pi)) # quantizing binvalues in (0...16) bin_cells = bins[:10,:10], bins[10:,:10], bins[:10,10:], bins[10:,10:] mag_cells = mag[:10,:10], mag[10:,:10], mag[:10,10:], mag[10:,10:] hists = [np.bincount(b.ravel(), m.ravel(), bin_n) for b, m in zip(bin_cells, mag_cells)] hist = np.hstack(hists) # hist is a 64 bit vector return hist img = cv2.imread('zero2.png',0) cells = [np.hsplit(row,100) for row in np.vsplit(img,112)] im = cv2.imread('num_total.png',0) cell = [np.hsplit(row,11) for row in np.vsplit(im,23)] # First half is trainData, remaining is testData train_cells = [ i[:] for i in cells ] test_cells = [ i[:] for i in cell] ###### Now training ######################## deskewed = [map(deskew,row) for row in train_cells] hogdata = [map(hog,row) for row in deskewed] trainData = np.float32(hogdata).reshape(-1,64) responses = np.float32(np.repeat(np.arange(16),700)[:,np.newaxis]) svm = cv2.SVM() svm.train(trainData,responses, params=svm_params) svm.save('svm_data.dat') ###### Now testing ######################## deskewed = [map(deskew,row) for row in test_cells] hogdata = [map(hog,row) for row in deskewed] testData = np.float32(hogdata).reshape(-1,bin_n*4) result = svm.predict_all(testData) print len(result) for i in range(len(result)): if i%11==0: print if result[i][0]<10: print chr(int(result[i][0])+48), else: print chr(int(result[i][0])+55), ####### Check Accuracy ########################
3. bypass image captcha javascript으로 처리하려고 했던 삽질
# -*- encoding: cp949 -*- import urllib2, re, math from sympy import Matrix, solve_linear_system from sympy.abc import x, y from sympy.solvers import solve from sympy import Symbol x = Symbol('x') y = Symbol('y') import copy session = 'o73chcunp51fn346ob6h2h8394' req = urllib2.Request('https://www.hackthissite.org/missions/prog/6/image/') req.add_header('cookie','PHPSESSID='+session) page = urllib2.urlopen(req).read() num = map(int,re.findall('new Array\((.*?)\);',page)[0].split(',')) array = [] while num: if num[2]<10: array.append([num[0],num[1],num[2],num[3],num[4]]) num = num[5:] else: array.append([num[0],num[1],num[2],num[3]]) num = num[4:] print len(array) #253글자 #array = sorted(array, key = lambda x:(x[0],x[1])) #나중에 지울것. number_consist = copy.deepcopy(array) for i in range(len(number_consist)): number_consist[i] = [number_consist[i]] c = 0 for point in array: if point[2]==7: continue #p_point = [(point[0]+point[2])/2, (point[1]+point[3])/2] if len(point)==4 else [point[0],point[1]] if len(point)==4: #직선이면 끝부분에 방정식 풀기위한 정보 넣어둠. if point[2]-point[0]==0: #x좌표의 변화가 없을경우 x=n과 같은 식이다. a0 = 1; b0 = 0; n0 = point[0] elif point[3]-point[1]==0: #y좌표의 변화가 없을경우 y=n과 같은 식이다. a0 = 0; b0 = 1; n0 = point[1] else: #일반적인 직선의 경우 방정식. a0 = (point[3]-point[1])/(point[2]-point[0]) b0 = -1 n0 = (point[0]*(point[3]-point[1])/(point[2]-point[0])) - point[1] p_point = (a0, b0, n0) #직선일경우 튜플로 저장. else: #원이면 끝에 반지름만 넣어둠. p_point = [point[0],point[1],point[2]] #원일경우 리스트로 저장. 나중에 자료형으로 원인지 직선인지 판별 예정. flag = True print len(array),point,c c += 1 #if c==2: # break for n in number_consist: for __n in n: if __n!=point: if __n[2]==7: continue if len(__n)==4: #마찬가지로 직선이면 끝부분에 방정식 if __n[2]-__n[0]==0: #위와 같은 로직. a1 = 1; b1 = 0; n1 = __n[0] elif __n[3]-__n[1]==0: a1 = 0; b1 = 1; n1 = __n[1] else: a1 = (__n[3] - __n[1])/(__n[2] - __n[0]) b1 = -1 n1 = (__n[0]*(__n[3] - __n[1])/(__n[2] - __n[0])) - __n[1] n_point = (a1, b1, n1) #직선이면 방정식을 튜플로 저장, else: #원이면 반지름만 추가. n_point = [__n[0],__n[1],__n[2]] #원이면 리스트로 저장. #경우의 수는 3가지. if (type(p_point) is tuple) and (type(n_point) is tuple): #둘다 직선일때 k = solve_linear_system(Matrix(( p_point, n_point)), x, y) #교점의 좌표 if not k or p_point==n_point or len(k)!=2: break x1 = k[x]; y1 = k[y] x_min_point,x_max_point = (point[0],point[2]) if point[0] < point[2] else (point[2],point[0]) y_min_point,y_max_point = (point[1],point[3]) if point[1] < point[3] else (point[3],point[1]) x_min_n_point,x_max_n_point = (__n[0],__n[2]) if __n[0] < __n[2] else (__n[2],__n[0]) y_min_n_point,y_max_n_point = (__n[1],__n[3]) if __n[1] < __n[3] else (__n[3],__n[1]) if x_min_point-10 <= x1 and x1 <= x_max_point+10 and x_min_n_point-10 <= x1 and x1 <= x_max_n_point+10\ and y_min_point-10 <= y1 and y1 <= y_max_point+10 and y_min_n_point-10 <= y1 and y1 <= y_max_n_point+10: #찾은 교차점이 두 선분 안에 있으면 n.append(point) array.remove(point) flag = False break elif (type(p_point) is list) and (type(n_point) is list): #둘다 원일때 length = math.sqrt((p_point[0]-n_point[0])**2 + (p_point[1]-n_point[1])**2) #중심점 사이의 거리 if (p_point[2]+n_point[2])+1 >= length: #반지름의 합보다 두 중심점 사이의 거리가 작으면 원이 접해 있는것. n.append(point) array.remove(point) flag = False break else: #하나는 직선, 하나는 원일때 if type(p_point) is list: min_x,max_x = (__n[0],__n[2]) if __n[0] < __n[2] else (__n[2],__n[0]) min_y,max_y = (__n[1],__n[3]) if __n[1] < __n[3] else (__n[3],__n[1]) circle,line = (p_point,n_point) else: min_x,max_x = (point[0],point[2]) if point[0] < point[2] else (point[2],point[0]) min_y,max_y = (point[1],point[3]) if point[1] < point[3] else (point[3],point[1]) circle,line = (n_point,p_point) length = (abs(line[0]*circle[0] + line[1]*circle[1] - line[2])) / math.sqrt(line[0]**2 + line[1]**2) if circle[2] >= length and True: #sympy 뒤져서 원과 직선의 교점을 구한다음 그 교점이 선분 내부에 있는지 확인할 것!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! x = Symbol('x') y = Symbol('y') #print '\n',circle,line, #print list(solve([(x-circle[0])**2 + (y-circle[1])**2 - circle[2]**2, line[0]*x + line[1]*y - line[2]], x, y, set=True)[1]) equation = list(solve([(x-circle[0])**2 + (y-circle[1])**2 - circle[2]**2, line[0]*x + line[1]*y - line[2]], x, y, set=True)[1]) if len(equation)==2: answer1, answer2 = list(solve([(x-circle[0])**2 + (y-circle[1])**2 - circle[2]**2, line[0]*x + line[1]*y - line[2]], x, y, set=True)[1]) try: answer1, answer2 = map(float,answer1),map(float,answer2) except: continue if min_x-10 <= answer1[0] and min_x-10 <= answer2[0] and answer1[0] <= max_x+10 and answer2[0] <= max_x+10\ and min_y-10 <= answer1[1] and min_y-10 <= answer2[1] and answer1[1] <= max_y+10 and answer2[1] <= max_y+10: #여기 해야됨. 바로 윗줄에서 프린트한걸(해) 변수로 받아서 그 해가 min과 max사이에 있는지 확인하는작업. n.append(point) array.remove(point) flag = False break else: answer1 = list(solve([(x-circle[0])**2 + (y-circle[1])**2 - circle[2]**2, line[0]*x + line[1]*y - line[2]], x, y, set=True)[1]) try: answer1 = map(float,answer1) except: continue if min_x-10 <= answer1[0] and answer1[0] <= max_x+10 and min_y-10 <= answer1[1] and answer1[1] <= max_y+10 : #여기 해야됨. 바로 윗줄에서 프린트한걸(해) 변수로 받아서 그 해가 min과 max사이에 있는지 확인하는작업. n.append(point) array.remove(point) flag = False break if not flag: try: number_consist.remove([point]) except: pass break #if flag: # number_consist.remove(point) # array.remove(point) #else: # print 'nothing' #print number_consist for i in number_consist: if len(i)>1: print i #l = len(number_consist) #print 'sorted len =',l #for i in number_consist: # print str(i).replace('[','').replace(']','')+','
4. 머신러닝으로 처리하려고 했던 삽질
#현재 파일 경로 : file_path = "%s%s" % (os.path.dirname(os.path.realpath(__file__))+"\\", filename) import pyscreenshot as ImageGrab import time,Image import cv2 import numpy as np import cv2.cv as cv import pytesser import urllib2 import PIL time.sleep(1) min = [1000,1000]; max = [0,0] im = ImageGrab.grab(bbox=(0,100,1000,860)).save('cap.png') #적당히 넓은 범위 캡쳐후 저장. im = Image.open('cap.png').convert('RGB') min = [1000,760]; max = [0,0] for i in range(im.size[1]): #범위 조정을 위해 최소점과 최대점을 찾아서 그 부분을 잘라냄. for j in range(im.size[0]): if im.getpixel((j,i)) == (0,128,0) and i < min[0]: min[0] = i if im.getpixel((j,i)) == (0,128,0) and j < min[1]: min[1] = j if im.getpixel((j,i)) == (0,128,0) and i > max[0]: max[0] = i if im.getpixel((j,i)) == (0,128,0) and j > max[1]: max[1] = j im = Image.open('cap.png').crop((min[1],min[0],max[1]+30,max[0]+31)).save('cap.png') #이미지 위치에 맞게 적당히 잘라냄. im = Image.open('cap.png').convert('RGB') #잘 보기위해서 글자는 흰색으로 칠함. for i in range(im.size[1]): for j in range(im.size[0]): if im.getpixel((j,i)) == (0,0,0): im.putpixel((j,i),(255,255,255)) else: im.putpixel((j,i),(0,0,0)) #for i in range(im.size[1]): #십자선. # im.putpixel((im.size[0]/2,i),(0,0,0)) #for i in range(im.size[0]): # im.putpixel((i,im.size[1]/2),(0,0,0)) im.save('cap.png') answer = [] for i in range(0,360,10): #돌아갈 각도. 0,10,20.... img = cv2.imread('cap.png',0) rows,cols = img.shape M = cv2.getRotationMatrix2D((cols/2,rows/2),i,1) #이미지를 돌림. dst = cv2.warpAffine(img,M,(cols,rows)) cv2.imwrite('cap0.png',dst) #돌린 이미지를 cap0.png라고 저장. Image.open('cap0.png').crop((335,0,355,270)).save('cap%s.png' %str(i+1)) cnt = 1 for i in range(0,360,10): im = Image.open('cap%s.png' %str(i+1)) n = 15 if i<180 else 18 im.crop((0,248-int((i/10)*(n/36.0)),20,268-int((i/10)*(n/36.0)))).save('num%s.png' %str(cnt)) cnt += 1 for i in range(0,360,10): im = Image.open('cap%s.png' %str(i+1)) n = 19 if i<180 else 22 im.crop((0,228-int((i/10)*(n/36.0)),20,248-int((i/10)*(n/36.0)))).save('num%s.png' %str(cnt)) cnt += 1 for i in range(0,360,10): im = Image.open('cap%s.png' %str(i+1)) n = 25 if i<180 else 28 im.crop((0,205-int((i/10)*(n/36.0)),20,225-int((i/10)*(n/36.0)))).save('num%s.png' %str(cnt)) cnt += 1 for i in range(0,360,10): im = Image.open('cap%s.png' %str(i+1)) n = 31 if i<180 else 34 im.crop((0,178-int((i/10)*(n/36.0)),20,198-int((i/10)*(n/36.0)))).save('num%s.png' %str(cnt)) cnt += 1 for i in range(0,360,10): im = Image.open('cap%s.png' %str(i+1)) n = 36 if i<180 else 39 im.crop((0,143-int((i/10)*(n/36.0)),20,163-int((i/10)*(n/36.0)))).save('num%s.png' %str(cnt)) cnt += 1 for i in range(0,360,10): im = Image.open('cap%s.png' %str(i+1)) n = 42 if i<180 else 47 im.crop((0,103-int((i/10)*(n/36.0)),20,123-int((i/10)*(n/36.0)))).save('num%s.png' %str(cnt)) cnt += 1 for i in range(0,360,10): im = Image.open('cap%s.png' %str(i+1)) n = 50 if i<180 else 57 im.crop((0,55-int((i/10)*(n/36.0)),20,75-int((i/10)*(n/36.0)))).save('num%s.png' %str(cnt)) cnt += 1 im = Image.open('cap1.png') im.crop((0,0,20,20)).save('num%s.png' %str(cnt)) sort_image = Image.new('RGB', (220,460), (255, 255, 255)) x = 0; y = 0 for i in range(1,254): sort_image.paste(Image.open('num%s.png' %i),(x*20,y*20)) x += 1 if i%11==0: y += 1 x = 0 sort_image.save('num_total.png') img = cv2.imread('num_total.png',0) ret,thresh1 = cv2.threshold(img,120,255,cv2.THRESH_BINARY) cv2.imwrite('num_total0.png',thresh1) print pytesser.image_to_string(Image.open('num_total.png')) print pytesser.image_to_string(Image.open('num_total.png')) exit(1) req = urllib2.Request('https://www.hackthissite.org/missions/prog/6/','solution='+send) req.add_header('cookie','PHPSESSID=g8kl8ggr529uqoq86o48i6d2s7') req.add_header('referer','https://www.hackthissite.org/missions/prog/6/index.php') print urllib2.urlopen(req).read()[:10000] exit(1)
