from PIL import Image import sys def ocr(im, threshold=200, mask="letters.bmp", alphabet="0123456789abcdef"): img = Image.open(im) img = img.convert("RGB") box = (8, 8, 58, 18) img = img.crop(box) pixdata = img.load() # open the mask letters = Image.open(mask) ledata = letters.load() def test_letter(img, letter): A = img.load() B = letter.load() mx = 1000000 max_x = 0 x = 0 for x in range(img.size[0] - letter.size[0]): _sum = 0 for i in range(letter.size[0]): for j in range(letter.size[1]): _sum = _sum + abs(A[x + i, j][0] - B[i, j][0]) if _sum < mx: mx = _sum max_x = x return mx, max_x # Clean the background noise, if color != black, then set to white. for y in range(img.size[1]): for x in range(img.size[0]): if not (pixdata[x, y][0] > threshold) and \ (pixdata[x, y][1] > threshold) and \ (pixdata[x, y][2] > threshold): pixdata[x, y] = (0, 0, 0, 255) else: pixdata[x, y] = (255, 255, 255, 255) counter = 0 old_x = -1 letterlist = [] for x in range(letters.size[0]): black = True for y in range(letters.size[1]): if ledata[x, y][0] != 0: black = False break if black: if True: box = (old_x + 1, 0, x, 10) letter = letters.crop(box) t = test_letter(img, letter) letterlist.append((t[0], alphabet[counter], t[1])) old_x = x counter += 1 box = (old_x + 1, 0, 140, 10) letter = letters.crop(box) t = test_letter(img, letter) letterlist.append((t[0], alphabet[counter], t[1])) t = sorted(letterlist) t = t[0:5] # 5-letter captcha final = sorted(t, key=lambda e: e[2]) answer = "" for l in final: answer = answer + l[1] return answer if __name__ == '__main__': print(ocr(sys.argv[1]))
Run
Reset
Share
Import
Link
Embed
Language▼
English
中文
Python Fiddle
Python Cloud IDE
Follow @python_fiddle
Browser Version Not Supported
Due to Python Fiddle's reliance on advanced JavaScript techniques, older browsers might have problems running it correctly. Please download the latest version of your favourite browser.
Chrome 10+
Firefox 4+
Safari 5+
IE 10+
Let me try anyway!
url:
Go
Python Snippet
Stackoverflow Question