验证码识别captcha_trainer使用demo

站长

2024年04月01日 15:44 · 阅读数 49

验证码生成captcha

若想生成不带不带干扰元素的验证码，ctrl键+鼠标左键点击ImageCaptcha()，进入image.py文件。找到
self.create_noise_dots(im, color)
self.create_noise_curve(im, color)

生成tfrecord格式文件

captcha_trainer需要两个参数input、label

from typing import List

import tensorflow as tf
import os

# def _bytes_feature(value):
#     return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value.encode('utf-8') if type(value)==str else value]))

def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def bytes_list_feature(value: List[bytes]):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))

def int64_feature(value: int):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def int64_list_feature(value: List[int]):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=value))

def float_feature(value: float):
    return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

def float_list_feature(value: List[float]):
    return tf.train.Feature(float_list=tf.train.FloatList(value=value))

def create_tfrecords(directory, output_file):
    writer = tf.io.TFRecordWriter(output_file)
    for root, dirs, files in os.walk(directory):
        for file in files:
            labels=file.split('_')[0]
            num_labels = []
            for j in range(4):
                num_labels.append(int(labels[j]))

            filepath = os.path.join(root, file)
            # 读取文件数据
            with open(filepath, 'rb') as f:
                data = f.read()
            # 创建Example对象 captcha_trainer 需要连个参数input、label
            feature = {
                # image 数据
                "input": tf.train.Feature(bytes_list=tf.train.BytesList(value=[data])),
                # 长、宽、通道数
                "height": tf.train.Feature(int64_list=tf.train.Int64List(value=[60])),
                "width": tf.train.Feature(int64_list=tf.train.Int64List(value=[160])),
                "channels": tf.train.Feature(int64_list=tf.train.Int64List(value=[3])),
                # label 这里是字符串
                "label": _bytes_feature(labels),
            }
            example = tf.train.Example(features=tf.train.Features(feature=feature))
            # 写入TFRecord文件
            writer.write(example.SerializeToString())
    writer.close()

# 示例目录和输出文件路径
directory = 'D:/BaiduNetdiskDownload/test_captcha/img1/'
output_file = 'D:/BaiduNetdiskDownload/test_captcha/trains.0.tfrecords'

#directory = 'D:/BaiduNetdiskDownload/test_captcha/img2/'
#output_file = 'D:/BaiduNetdiskDownload/test_captcha/valids.0.tfrecords'
# 创建TFRecord文件
create_tfrecords(directory, output_file)

captcha_trainer配置

拉取代码: git clone github.com/kerlomz/cap…

处理配置文件，根据readme说明处理就行了。

执行训练

python trains.py 项目名

生成模型测试

验证码识别captcha_trainer使用demo

muggle_ocr测试

pip那个包没有直接安装不了

github安装
pip install git+https://github.com/litongjava/muggle_ocr.git
下载后安装
pip install 文件名.whl
python setup.py install

import muggle_ocr
from PIL import Image
import numpy as np
import  init_util
init_util.set_GPU()
yaml_path = r'model.yaml'
image_test=r'3.jpg'

sdk = muggle_ocr.SDK(model_type=muggle_ocr.ModelType.Captcha,conf_path=yaml_path)
#sdk = muggle_ocr.SDK(model_type=muggle_ocr.ModelType.Captcha)
with open(image_test, "rb") as f:
    image_binary = f.read()
    text = sdk.predict(image_bytes=image_binary)
    print(text)

验证码识别captcha_trainer使用demo 用上面是识别成功的，但我想自己加载pb文件验证识别但没有成功（后面有时间了在研究下

注意两个输入、输出参数名


def grphPB():
    # 加载模型
    with tf.io.gfile.GFile(MODEL_PATH+'/test_captcha_9000.pb', 'rb') as f:
        graph_def = tf.compat.v1.GraphDef()
        graph_def.ParseFromString(f.read())

    with tf.compat.v1.Session() as sess:
        # 加载图定义
        sess.graph.as_default()
        tf.import_graph_def(graph_def, name='')

        # 获取输入和输出节点
        input_node = sess.graph.get_tensor_by_name('input:0')
        output_node = sess.graph.get_tensor_by_name('dense_decoded:0')

        # 进行预测
        image = init_util.preprocess_image(IMAGE_PATH)
        predictions = sess.run(output_node, feed_dict={input_node: image})
        print(predictions)

转载自:https://juejin.cn/post/7352387642845511720