import numpy as np import scipy.ndimage #these are the digits we are considering to identify digits_to_take = [0, 1, 2] input_file_tra = 'data/optdigits-orig.tra' #train data input file output_file_tra = 'data/tra_processed.txt' #train data output file input_file_test = 'data/optdigits-orig.wdep' #test data input file output_file_test = 'data/test_processed.txt' #test data output file def extract_digits_and_downsample(inputf, outputf): with open(inputf) as f: lines = f.readlines() mat = [] #matrix to store the image matrix out_file = open(outputf, 'w') for line in lines: #for each line in the input file # print line if len(line) < 10: #to check whether we we get the data or the label if int(line[1]) in digits_to_take: #rejects the labels other than 0,1,2 W = np.mat(mat) # print W, W.shape,np.count_nonzero(W) W = scipy.ndimage.zoom(W, 0.25, order = 1) #downsamples the 32x32 to 8x8 using bilinear interpolation # print W, W.shape W = W.reshape(1, -1) #converts to 1d array # print W, W.shape W = np.append(W, [int(line[1])]) #appends the label at the end of line # print W, W.shape W = W.reshape(1, -1) #converts to 1d 1x65 # print W, W.shape np.savetxt(out_file, W, delimiter='', fmt="%d") #write to output file # break mat = [] else: line = list(line)[:-1] #deletes space at the end of string of each line line = [int(i) for i in line] #converts characters to string in each line mat.append(line) #adds the line(row) to the matrix def main(): extract_digits_and_downsample(input_file_tra, output_file_tra) extract_digits_and_downsample(input_file_test, output_file_test) if __name__ == '__main__': main()