machine learning, image

Enigmagroup Captcha Cracking 1, 2

qkqhxla1 2015. 4. 4. 13:35

앞에서 많이 썼던 pytesser로 문자열을 인식했고, 그 전까지의 보정은 모두 pil모듈로 보정했다.


Captcha 1.

# -*- encoding: cp949 -*-
import Image
import urllib2
import pytesser
def download_photo(filename):
    file_path = "%s%s" % ("C:\\Users\\Ko\\Documents\\Visual Studio 2012\\Projects\\PythonApplication37\\", filename)
    downloaded_image = file(file_path, "wb")
   
    req = urllib2.Request('http://www.enigmagroup.org/missions/captcha/1/image.php')
    req.add_header('Cookie','본인 쿠키')
    image_on_web = urllib2.urlopen(req)
    while True:
        buf = image_on_web.read()
        if len(buf) == 0:
            break
        downloaded_image.write(buf)
   
    downloaded_image.close()
    image_on_web.close()
    return file_path

download_photo('image.png') #캡챠를 다운로드받아서
im = Image.open('image.png')
for i in range(im.size[1]): #색깔보정
    for j in range(im.size[0]):
        if im.getpixel((j,i)) !=0:
            im.putpixel((j,i),10)
x = 2
im.transform((im.size[0]/x,im.size[1]/x),Image.EXTENT,(0,0,im.size[0],im.size[1])).save('image.png') #크기보정.
answer = pytesser.image_to_string( Image.open('image.png') ) #답 도출
req = urllib2.Request('http://www.enigmagroup.org/missions/captcha/1/image.php','answer='+answer+'&submit=true')
req.add_header('Referer','http://www.enigmagroup.org/missions/captcha/1/image.php')
req.add_header('Cookie','본인 쿠키')
print urllib2.urlopen(req).read()




Captcha 2.

# -*- encoding: cp949 -*-
import Image
import urllib2
import pytesser
def download_photo(filename):
    file_path = "%s%s" % ("C:\\Users\\Ko\\Documents\\Visual Studio 2012\\Projects\\PythonApplication37\\", filename)
    downloaded_image = file(file_path, "wb")
   
    req = urllib2.Request('http://www.enigmagroup.org/missions/captcha/2/image.php')
    req.add_header('Cookie','본인 쿠키')
    image_on_web = urllib2.urlopen(req)
    while True:
        buf = image_on_web.read()
        if len(buf) == 0:
            break
        downloaded_image.write(buf)
   
    downloaded_image.close()
    image_on_web.close()
    return file_path

while 1:
    download_photo('image.png')
    im = Image.open('image.png')
    rgb = im.load()
    for i in range(im.size[1]):
        for j in range(im.size[0]):
            if rgb[j,i] != 1: #1에서 추가된 부분. 글자를 알아보기 힘들게 주위에 색깔 점들이 막 찍혀있는데, 검은색이 아니면 다 흰색으로 칠해서 없애버림.
                rgb[j,i] = 0
    im.save('image1.png')
    x = 1
    im.transform((im.size[0]/x,im.size[1]/x),Image.EXTENT,(0,0,im.size[0],im.size[1])).save('image.png')
    answer = pytesser.image_to_string( Image.open('image.png') )
    answer = answer.replace('~\\/','V').replace('E','6').replace('H\'','F').replace('\n','') #일부 인식 잘 안되는 글자 보정
    print answer

    req = urllib2.Request('http://www.enigmagroup.org/missions/captcha/2/image.php','answer='+answer+'&submit=true')
    req.add_header('Referer','http://www.enigmagroup.org/missions/captcha/2/image.php')
    req.add_header('Cookie','본인 쿠키')
    page = urllib2.urlopen(req).read()
    print page
    if page.find('Sorry') ==-1:
        break

위의 글자 색깔보정 부분 결과 이미지.(if rgb[j,i] != 1:~ 이부분.)




하다보면 답이 나온다.