private/memo

bypass the image captcha 나중을 위한 중간코드 저장소

qkqhxla1 2015. 9. 24. 21:25

1. 일련의 모아진 데이터들로 knn을 위한 훈련용 사진 만들기

# -*- encoding: cp949 -*-
#현재 파일 경로 : file_path = "%s%s" % (os.path.dirname(os.path.realpath(__file__))+"\\", filename)
import os
import Image
b = Image.new('RGB', (2000,2240), (255, 255, 255))

cnt = 0
y = 0
for all_dirname, inner_dirnames, all_filenames in os.walk('train'):
    for i in inner_dirnames:
        for a,dir,f in os.walk('train\\'+i):
            x = 0
            for file in f:
                if file != 'Thumbs.db':
                    print x*20,y*20
                    if x==100: 
                        x=0
                        y += 1
                    b.paste(Image.open('train\\'+i+'\\'+file),(x*20,y*20))
                    cnt += 1
                    if cnt==800:
                        cnt = 0
                        break
                x += 1
b.save('zero2.png')

2. hog관련 코드.

import cv2
import numpy as np

SZ=20
bin_n = 16 # Number of bins

svm_params = dict( kernel_type = cv2.SVM_LINEAR,
                    svm_type = cv2.SVM_C_SVC,
                    C=2.67, gamma=5.383 )

affine_flags = cv2.WARP_INVERSE_MAP|cv2.INTER_LINEAR

def deskew(img):
    m = cv2.moments(img)
    if abs(m['mu02']) < 1e-2:
        return img.copy()
    skew = m['mu11']/m['mu02']
    M = np.float32([[1, skew, -0.5*SZ*skew], [0, 1, 0]])
    img = cv2.warpAffine(img,M,(SZ, SZ),flags=affine_flags)
    return img

def hog(img):
    gx = cv2.Sobel(img, cv2.CV_32F, 1, 0)
    gy = cv2.Sobel(img, cv2.CV_32F, 0, 1)
    mag, ang = cv2.cartToPolar(gx, gy)
    bins = np.int32(bin_n*ang/(2*np.pi))    # quantizing binvalues in (0...16)
    bin_cells = bins[:10,:10], bins[10:,:10], bins[:10,10:], bins[10:,10:]
    mag_cells = mag[:10,:10], mag[10:,:10], mag[:10,10:], mag[10:,10:]
    hists = [np.bincount(b.ravel(), m.ravel(), bin_n) for b, m in zip(bin_cells, mag_cells)]
    hist = np.hstack(hists)     # hist is a 64 bit vector
    return hist

img = cv2.imread('zero2.png',0)

cells = [np.hsplit(row,100) for row in np.vsplit(img,112)]

im = cv2.imread('num_total.png',0)
cell = [np.hsplit(row,11) for row in np.vsplit(im,23)]
# First half is trainData, remaining is testData
train_cells = [ i[:] for i in cells ]
test_cells = [ i[:] for i in cell]

######     Now training      ########################

deskewed = [map(deskew,row) for row in train_cells]
hogdata = [map(hog,row) for row in deskewed]
trainData = np.float32(hogdata).reshape(-1,64)
responses = np.float32(np.repeat(np.arange(16),700)[:,np.newaxis])

svm = cv2.SVM()
svm.train(trainData,responses, params=svm_params)
svm.save('svm_data.dat')

######     Now testing      ########################

deskewed = [map(deskew,row) for row in test_cells]
hogdata = [map(hog,row) for row in deskewed]
testData = np.float32(hogdata).reshape(-1,bin_n*4)
result = svm.predict_all(testData)

print len(result)
for i in range(len(result)):
    if i%11==0: print
    if result[i][0]<10:
        print chr(int(result[i][0])+48),
    else:
        print chr(int(result[i][0])+55),
#######   Check Accuracy   ########################

3. bypass image captcha javascript으로 처리하려고 했던 삽질

# -*- encoding: cp949 -*-
import urllib2, re, math
from sympy import Matrix, solve_linear_system
from sympy.abc import x, y
from sympy.solvers import solve
from sympy import Symbol
x = Symbol('x')
y = Symbol('y')
import copy

session = 'o73chcunp51fn346ob6h2h8394'

req = urllib2.Request('https://www.hackthissite.org/missions/prog/6/image/')
req.add_header('cookie','PHPSESSID='+session)
page = urllib2.urlopen(req).read()
num = map(int,re.findall('new Array\((.*?)\);',page)[0].split(','))
array = []
while num:
    if num[2]<10:
        array.append([num[0],num[1],num[2],num[3],num[4]])
        num = num[5:]
    else:
        array.append([num[0],num[1],num[2],num[3]])
        num = num[4:]
print len(array) #253글자


#array = sorted(array, key = lambda x:(x[0],x[1])) #나중에 지울것.
number_consist = copy.deepcopy(array)
for i in range(len(number_consist)):
    number_consist[i] = [number_consist[i]]

c = 0
for point in array:
    if point[2]==7: continue
    #p_point = [(point[0]+point[2])/2, (point[1]+point[3])/2] if len(point)==4 else [point[0],point[1]]
    if len(point)==4: #직선이면 끝부분에 방정식 풀기위한 정보 넣어둠.
        if point[2]-point[0]==0: #x좌표의 변화가 없을경우 x=n과 같은 식이다.
            a0 = 1; b0 = 0; n0 = point[0]
        elif point[3]-point[1]==0: #y좌표의 변화가 없을경우 y=n과 같은 식이다.
            a0 = 0; b0 = 1; n0 = point[1]
        else: #일반적인 직선의 경우 방정식.
            a0 = (point[3]-point[1])/(point[2]-point[0])
            b0 = -1
            n0 = (point[0]*(point[3]-point[1])/(point[2]-point[0])) - point[1]
        p_point = (a0, b0, n0) #직선일경우 튜플로 저장.
    else: #원이면 끝에 반지름만 넣어둠.
        p_point = [point[0],point[1],point[2]] #원일경우 리스트로 저장. 나중에 자료형으로 원인지 직선인지 판별 예정.

    flag = True
    print len(array),point,c
    c += 1
    #if c==2:
    #    break
    for n in number_consist:
        for __n in n:
            if __n!=point: 
                if __n[2]==7: continue
                if len(__n)==4: #마찬가지로 직선이면 끝부분에 방정식
                    if __n[2]-__n[0]==0: #위와 같은 로직.
                        a1 = 1; b1 = 0; n1 = __n[0]
                    elif __n[3]-__n[1]==0:
                        a1 = 0; b1 = 1; n1 = __n[1]
                    else:
                        a1 = (__n[3] - __n[1])/(__n[2] - __n[0])
                        b1 = -1
                        n1 = (__n[0]*(__n[3] - __n[1])/(__n[2] - __n[0])) - __n[1]
                    n_point = (a1, b1, n1) #직선이면 방정식을 튜플로 저장,
                else: #원이면 반지름만 추가.
                    n_point = [__n[0],__n[1],__n[2]] #원이면 리스트로 저장.

                #경우의 수는 3가지. 
                if (type(p_point) is tuple) and (type(n_point) is tuple): #둘다 직선일때
                    k = solve_linear_system(Matrix(( p_point, n_point)), x, y) #교점의 좌표
                    if not k or p_point==n_point or len(k)!=2: break
                    x1 = k[x]; y1 = k[y]
                    x_min_point,x_max_point = (point[0],point[2]) if point[0] < point[2] else (point[2],point[0])
                    y_min_point,y_max_point = (point[1],point[3]) if point[1] < point[3] else (point[3],point[1])
                    x_min_n_point,x_max_n_point = (__n[0],__n[2]) if __n[0] < __n[2] else (__n[2],__n[0])
                    y_min_n_point,y_max_n_point = (__n[1],__n[3]) if __n[1] < __n[3] else (__n[3],__n[1])

                    if x_min_point-10 <= x1 and x1 <= x_max_point+10 and x_min_n_point-10 <= x1 and x1 <= x_max_n_point+10\
                        and y_min_point-10 <= y1 and y1 <= y_max_point+10 and y_min_n_point-10 <= y1 and y1 <= y_max_n_point+10: #찾은 교차점이 두 선분 안에 있으면
                        n.append(point)
                        array.remove(point)
                        flag = False
                        break

                elif (type(p_point) is list) and (type(n_point) is list): #둘다 원일때
                    length = math.sqrt((p_point[0]-n_point[0])**2 + (p_point[1]-n_point[1])**2) #중심점 사이의 거리
                    if (p_point[2]+n_point[2])+1 >= length: #반지름의 합보다 두 중심점 사이의 거리가 작으면 원이 접해 있는것.
                        n.append(point)
                        array.remove(point)
                        flag = False
                        break

                else: #하나는 직선, 하나는 원일때
                    if type(p_point) is list:
                        min_x,max_x = (__n[0],__n[2]) if __n[0] < __n[2] else (__n[2],__n[0])
                        min_y,max_y = (__n[1],__n[3]) if __n[1] < __n[3] else (__n[3],__n[1])
                        circle,line = (p_point,n_point) 
                    else:
                        min_x,max_x = (point[0],point[2]) if point[0] < point[2] else (point[2],point[0])
                        min_y,max_y = (point[1],point[3]) if point[1] < point[3] else (point[3],point[1])
                        circle,line = (n_point,p_point)
                    
                    length = (abs(line[0]*circle[0] + line[1]*circle[1] - line[2])) / math.sqrt(line[0]**2 + line[1]**2)
                    if circle[2] >= length and True: #sympy 뒤져서 원과 직선의 교점을 구한다음 그 교점이 선분 내부에 있는지 확인할 것!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
                        x = Symbol('x')
                        y = Symbol('y')
                        #print '\n',circle,line,
                        #print list(solve([(x-circle[0])**2 + (y-circle[1])**2 - circle[2]**2, line[0]*x + line[1]*y - line[2]], x, y, set=True)[1])
                        equation = list(solve([(x-circle[0])**2 + (y-circle[1])**2 - circle[2]**2, line[0]*x + line[1]*y - line[2]], x, y, set=True)[1])
                        if len(equation)==2:
                            answer1, answer2 = list(solve([(x-circle[0])**2 + (y-circle[1])**2 - circle[2]**2, line[0]*x + line[1]*y - line[2]], x, y, set=True)[1])
                            try:
                                answer1, answer2 = map(float,answer1),map(float,answer2)
                            except: continue
                            if min_x-10 <= answer1[0] and min_x-10 <= answer2[0] and answer1[0] <= max_x+10 and answer2[0] <= max_x+10\
                                and min_y-10 <= answer1[1] and min_y-10 <= answer2[1] and answer1[1] <= max_y+10 and answer2[1] <= max_y+10:
                                #여기 해야됨. 바로 윗줄에서 프린트한걸(해) 변수로 받아서 그 해가 min과 max사이에 있는지 확인하는작업.
                                n.append(point)
                                array.remove(point)
                                flag = False
                                break
                        else:
                            answer1 = list(solve([(x-circle[0])**2 + (y-circle[1])**2 - circle[2]**2, line[0]*x + line[1]*y - line[2]], x, y, set=True)[1])
                            try:
                                answer1 = map(float,answer1)
                            except: continue
                            if min_x-10 <= answer1[0] and answer1[0] <= max_x+10 and min_y-10 <= answer1[1] and answer1[1] <= max_y+10 :
                                #여기 해야됨. 바로 윗줄에서 프린트한걸(해) 변수로 받아서 그 해가 min과 max사이에 있는지 확인하는작업.
                                n.append(point)
                                array.remove(point)
                                flag = False
                                break
        if not flag:        
            try:
                number_consist.remove([point])
            except: pass
            break

    #if flag: 
    #    number_consist.remove(point)
    #    array.remove(point)
    #else:
    #    print 'nothing'

#print number_consist
for i in number_consist:
    if len(i)>1:
        print i

#l = len(number_consist)
#print 'sorted len =',l

#for i in number_consist:
#    print str(i).replace('[','').replace(']','')+','

4. 머신러닝으로 처리하려고 했던 삽질

#현재 파일 경로 : file_path = "%s%s" % (os.path.dirname(os.path.realpath(__file__))+"\\", filename)
import pyscreenshot as ImageGrab
import time,Image
import cv2
import numpy as np
import cv2.cv as cv
import pytesser
import urllib2
import PIL

time.sleep(1)
min = [1000,1000]; max = [0,0]
im = ImageGrab.grab(bbox=(0,100,1000,860)).save('cap.png') #적당히 넓은 범위 캡쳐후 저장.
im = Image.open('cap.png').convert('RGB')
min = [1000,760]; max = [0,0]
for i in range(im.size[1]): #범위 조정을 위해 최소점과 최대점을 찾아서 그 부분을 잘라냄.
    for j in range(im.size[0]):
        if im.getpixel((j,i)) == (0,128,0) and i < min[0]: min[0] = i
        if im.getpixel((j,i)) == (0,128,0) and j < min[1]: min[1] = j
        if im.getpixel((j,i)) == (0,128,0) and i > max[0]: max[0] = i
        if im.getpixel((j,i)) == (0,128,0) and j > max[1]: max[1] = j
im = Image.open('cap.png').crop((min[1],min[0],max[1]+30,max[0]+31)).save('cap.png') #이미지 위치에 맞게 적당히 잘라냄.

im = Image.open('cap.png').convert('RGB') #잘 보기위해서 글자는 흰색으로 칠함.
for i in range(im.size[1]):
    for j in range(im.size[0]):
        if im.getpixel((j,i)) == (0,0,0):
            im.putpixel((j,i),(255,255,255))
        else:
            im.putpixel((j,i),(0,0,0))
 
#for i in range(im.size[1]): #십자선.
#    im.putpixel((im.size[0]/2,i),(0,0,0))
#for i in range(im.size[0]):
#    im.putpixel((i,im.size[1]/2),(0,0,0))
im.save('cap.png')
answer = []
 
for i in range(0,360,10): #돌아갈 각도. 0,10,20....
    img = cv2.imread('cap.png',0)
    rows,cols = img.shape
    M = cv2.getRotationMatrix2D((cols/2,rows/2),i,1) #이미지를 돌림.
    dst = cv2.warpAffine(img,M,(cols,rows))
    cv2.imwrite('cap0.png',dst) #돌린 이미지를 cap0.png라고 저장.
    Image.open('cap0.png').crop((335,0,355,270)).save('cap%s.png' %str(i+1))

cnt = 1    
for i in range(0,360,10):
    im = Image.open('cap%s.png' %str(i+1))
    n = 15 if i<180 else 18
    im.crop((0,248-int((i/10)*(n/36.0)),20,268-int((i/10)*(n/36.0)))).save('num%s.png' %str(cnt))
    cnt += 1

for i in range(0,360,10):
    im = Image.open('cap%s.png' %str(i+1))
    n = 19 if i<180 else 22
    im.crop((0,228-int((i/10)*(n/36.0)),20,248-int((i/10)*(n/36.0)))).save('num%s.png' %str(cnt))
    cnt += 1

for i in range(0,360,10):
    im = Image.open('cap%s.png' %str(i+1))
    n = 25 if i<180 else 28
    im.crop((0,205-int((i/10)*(n/36.0)),20,225-int((i/10)*(n/36.0)))).save('num%s.png' %str(cnt))
    cnt += 1

for i in range(0,360,10):
    im = Image.open('cap%s.png' %str(i+1))
    n = 31 if i<180 else 34
    im.crop((0,178-int((i/10)*(n/36.0)),20,198-int((i/10)*(n/36.0)))).save('num%s.png' %str(cnt))
    cnt += 1

for i in range(0,360,10):
    im = Image.open('cap%s.png' %str(i+1))
    n = 36 if i<180 else 39
    im.crop((0,143-int((i/10)*(n/36.0)),20,163-int((i/10)*(n/36.0)))).save('num%s.png' %str(cnt))
    cnt += 1

for i in range(0,360,10):
    im = Image.open('cap%s.png' %str(i+1))
    n = 42 if i<180 else 47
    im.crop((0,103-int((i/10)*(n/36.0)),20,123-int((i/10)*(n/36.0)))).save('num%s.png' %str(cnt))
    cnt += 1

for i in range(0,360,10):
    im = Image.open('cap%s.png' %str(i+1))
    n = 50 if i<180 else 57
    im.crop((0,55-int((i/10)*(n/36.0)),20,75-int((i/10)*(n/36.0)))).save('num%s.png' %str(cnt))
    cnt += 1

im = Image.open('cap1.png')
im.crop((0,0,20,20)).save('num%s.png' %str(cnt))

sort_image = Image.new('RGB', (220,460), (255, 255, 255))

x = 0; y = 0
for i in range(1,254):
    sort_image.paste(Image.open('num%s.png' %i),(x*20,y*20))
    x += 1
    if i%11==0:
        y += 1
        x = 0
sort_image.save('num_total.png') 

img = cv2.imread('num_total.png',0)
ret,thresh1 = cv2.threshold(img,120,255,cv2.THRESH_BINARY)
cv2.imwrite('num_total0.png',thresh1)

print pytesser.image_to_string(Image.open('num_total.png'))
print pytesser.image_to_string(Image.open('num_total.png'))
exit(1)


req = urllib2.Request('https://www.hackthissite.org/missions/prog/6/','solution='+send)
req.add_header('cookie','PHPSESSID=g8kl8ggr529uqoq86o48i6d2s7')
req.add_header('referer','https://www.hackthissite.org/missions/prog/6/index.php')
print urllib2.urlopen(req).read()[:10000]
exit(1)



'private > memo' 카테고리의 다른 글

읽어볼것  (0) 2016.02.04
찬찬히 정리할것.  (0) 2015.12.07
데코레이터, 이터레이터, 제너레이터 등  (0) 2015.09.09
http 2, 필터 관련  (0) 2015.08.16
php 형 비교 표  (0) 2015.07.31