pytorch-captcha-recognition icon indicating copy to clipboard operation
pytorch-captcha-recognition copied to clipboard

one hot code 独热码代码优化

Open asyncins opened this issue 6 years ago • 1 comments

原来的独热码编码时位置计算太依赖数字,这样每次改动图片大小时都有可能对应改动编码算法。而且解码也必须依赖数字。从这两个角度优化,新的独热码代码如下:

import numpy


CHARACTER = {'0': 0, '1': 1, '2': 2, '3': 3, '4': 4, '5': 5, '6': 6, '7': 7, '8': 8, '9': 9, 
             'A': 10, 'B': 11, 'C': 12, 'D': 13, 'E': 14, 'F': 15, 'G': 16, 'H': 17, 'I': 18,
             'J': 19, 'K': 20, 'L': 21, 'M': 22, 'N': 23, 'O': 24, 'P': 25, 'Q': 26, 'R': 27,
             'S': 28, 'T': 29, 'U': 30, 'V': 31, 'W': 32, 'X': 33, 'Y': 34, 'Z': 35}

CAPTCHA_NUMBER = 6


def one_hot_encode(value: list) -> tuple:
    """编码,将字符转为独热码
    vector为独热码,order用于解码
    """
    order = []
    vector = numpy.zeros(CAPTCHA_NUMBER * len(CHARACTER ), dtype=float)
    for k, v in enumerate(value):
        index = k * len(CHARACTER) + CHARACTER.get(v)
        vector[index] = 1.0
        order.append(index)
    return vector, order


def one_hot_decode(value: list) -> str:
    """解码,将独热码转为字符
    """
    res = []
    for ik, iv in enumerate(value):
        val = iv - ik * len(CHARACTER) if ik else iv
        for k, v in CHARACTER.items():
            if val == int(v):
                res.append(k)
                break
    return "".join(res)


if __name__ == '__main__':
    code = '0A2JYD'
    vec, orders = one_hot_encode(code)
    print(orders)
    print(vec)
    print(one_hot_decode(orders))

运行后,输出如下:

[0, 46, 74, 127, 178, 193]
[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
0A2JYD

asyncins avatar May 02 '19 10:05 asyncins

你好, 用了这个 类替换 one_hot_encoding.py 后 执行 python captcha_train.py , python captcha_test.py 一直报错.... 报错 说返回值类型不一样了.... 新手 不知道要怎么改好..

lxk696 avatar Jan 06 '20 15:01 lxk696