当前位置: 首页 > 后端技术 > Python

作业:手写数字识别-Minst数据集-SoftMax回归

时间:2023-03-26 17:26:20 Python

自己跑的时候,正确率是0.89。库numpyPIL(如果需要预测实际图片)结果解压出同级文件夹下的4个minst文件和4个py文件。运行后的权重W和偏移量b也在同级文件夹中,csv文件仅供查看,所有训练和测试相关的函数都在bin文件softmax.py中#!/usr/bin/pythonimportnumpyasnpnp.random.seed(0)#定义softmax函数defSoftMax(z):ifnp.ndim(z)==2:axisn=1else:axisn=0s=np.exp(z)/np.sum(np.exp(z),axis=axisn,keepdims=True)returns#b=np.array([1,2,4,5,5,6]).reshape(2,3)#打印(SoftMax(b))#编码标签defOneCode(y):r=len(y)c=len(np.unique(y))one_hot=np.zeros((r,c))one_hot[np.arange(len(y)),y.astype(int).T]=1returnone_hot#定义y_的计算函数defCalcY_(x,w,b):#+bafterw*X.T+b是广播操作,y_=np.dot(w,x.T)+b返回y_。T#定义损失函数-交叉熵defcross_entropy(y,y_):loss=-(1/len(y))*np.sum(y*np.log(y_))returnloss#定义训练函数deftrain(tr_x,tr_y,N):''''''#模型#y=w1*x1+w2*x2+bW=np.random.rand(10,784)b=np.random.rand(10,1)损失=[]损失=0y=OneCode(tr_y)#Convert1234tovector0001001001001000foriinrange(N):#计算损失x=tr_xy_=SoftMax(CalcY_(x,W,b))loss=cross_entropy(y,y_)losss.append(loss)#计算梯度grad_w=(1/len(x))*np.dot((y_-y).T,x)grad_b=(1/len(x))*np.sum((y_-y))#更新参数#学习率×梯度W=W-0.5*grad_wb=b-0.5*grad_bdelta=abs(losi-loss)print(i,loss,delta)#损失值更低than0.01oritschangevalueislowerthan0.0001if(loss<0.01ordelta<0.0001):breaklosi=lossreturnW,b#定义测试函数defcheck(te_x,te_y,W,b):#te_x,te_y=Iread('te')#te_x=te_x/255#te_y=te_y#print(W)#print(b)y_=SoftMax(CalcY_(te_x,W,b))l=np.argmax(y_,axis=1.reshape(10000,1)right=np.sum(l==te_y.astype())/10000print('rightrate:',right)returnrightIdata.py文件读取函数#!/usr/bin/pythonimportnumpy作为npfilename_train_data='./train-images-idx3-ubyte'filename_train_label='./train-labels-idx1-ubyte'filename_test_data='./t10k-images-idx3-ubyte'filename_test_label='./t10k-labels-idx1-ubyte'defIread_train_data():fp=open(filename_train_data,'rb')fl=open(filename_train_label,'rb')fp.read(4*4)fl.read(2*4)nstrs=np.zeros复制代码((60000,28*28))l=np.zeros((60000,1))foriinrange(60000):fstr=fp.read(28*28)lstr=fl.read(1)l[i]=int.from_bytes(lstr,byteorder='big',signed=False)nstrs[i,:]=np.frombuffer(fstr,dtype=np.uint8)返回nstrs,ldefIread_test_data():fp=open(filename_test_data,'rb')fl=open(filename_test_label,'rb')fp.read(4*4)fl.read(2*4)nstrs=np.zeros((10000,28*28))l=np.zeros((10000,1))foriinrange(10000):fstr=fp.read(28*28)lstr=fl.read(1)l[i]=int.from_bytes(lstr,byteorder='big',签名=假)nstrs[i,:]=np.frombuffer(fstr,dtype=np.uint8)returnnstrs,ldefIread(option):if(option=='tr'):d,l=Iread_train_data()返回d,lelseif(option=='te'):d,l=Iread_test_data()returnd,lelse:print('operr')minst.py脚本完成训练和测试#!/usr/bin/pythonimportnumpyasnpfromIdataimportIreadfromsoftmaximporttrain,check#读取数据tr_x,tr_y=Iread('tr')te_x,te_y=Iread('te')#将训练集和测试集归一化为0-1区间tr_x=tr_x/255te_x=te_x/255#print(tr_x.shape,tr_y.shape)#print(te_x.shape,te_y.shape)#trainW,b=train(tr_x,tr_y,1000)#然后保存参数W.tofile('w.bin')b.tofile('b.bin')np.savetxt('w.csv',W,fmt='%f',delimiter=',')np.savetxt('b.csv',b,fmt='%f',delimiter=',')#读取参数W=np.fromfile('W.bin').reshape(10,784)b=np.fromfile('b.bin').reshape(10,1)#Testr=check(te_x,te_y,W,b)predict.py是一个脚本,用于识别实际的手写数字图像#!/usr/bin/pythonimportnumpy作为npfromsoftmax导入SoftMax、CalcY_、cross_entropy、OneCodefromPILimportImage#图片文件必须是一张28×28的灰度图,从0到255fname='4.bmp'img=np.array(Image.open(fname))te_x=img.reshape(1,28*28)te_x=te_x/255print(te_x)W=np.fromfile('W.bin').reshape(10,784)b=np.fromfile('b.bin').reshape(10,1)y_=SoftMax(CalcY_(te_x,W,b))y_=np.argmax(y_,axis=1)print('pred:',y_)