Press "Enter" to skip to content

CASIA-HWDB脱机手写汉字数据集单字符文件解析

CASIA-HWDB首页

这次编写的是解析单个手写字体的文件的程序就是后缀名为.gnt的

file
这些都是单个手写字体的数据集

废话不多说上代码

import struct
import numpy as np
from codecs import decode
from PIL import Image
import pandas as pd
import os

def decode_gnt(mpf_file, save_file, file):
    os.mkdir(save_file)
    labels = []
    if not os.path.exists(save_file):
        os.mkdir(save_file)
    steam = open(mpf_file, "rb")
    i = 0
    while True:
        packed_length = steam.read(4)
        if packed_length == b'':
            break
        length = struct.unpack("<I", packed_length)[0]
        raw_label = struct.unpack(">2s", steam.read(2))[0]
        width = struct.unpack("<H", steam.read(2))[0]
        height = struct.unpack("<H", steam.read(2))[0]
        photo_bytes = struct.unpack("{}B".format(height * width), steam.read(height * width))
        image = Image.fromarray(np.array(photo_bytes).reshape(height, width)).convert('RGB')
        image.save(save_file + '/' + str(i) + '_' + file + '.jpg')
        i += 1
        labels.append(decode(raw_label, 'gbk'))
    d = pd.Series(labels)
    d.to_csv(save_file + '/label.csv')

if __name__ == '__main__':
    path = "data/Gnt1.0TrainPart1/"
    files = os.listdir(path)
    for f in files:
        if os.path.isfile(path + f):
            decode_gnt(path + f, path + os.path.splitext(f)[0]+'/', os.path.splitext(f)[0])
Subscribe
提醒
guest
0 评论
Inline Feedbacks
View all comments
0
喜欢聆听每一种不同的观点,欢迎评论。x
()
x