1. debug fastai course 的重构
# https://nbviewer.org/github/fastai/course-v3/blob/master/nbs/dl1/lesson1-pets.ipynb
from fastai .vision import *
from fastai .metrics import error_rate
bs = 64
path_anno = path / 'annotations'
path_img = path / 'images'
fnames = get_image_files (path_img )
np .random .seed (2 )
pat = r'/([^/]+)_\d+.jpg$'
data = ImageDataBunch .from_name_re (path_img , fnames , pat , ds_tfms = get_transforms (), size = 224 , bs = bs
).normalize (imagenet_stats )
data .show_batch (rows = 3 , figsize = (7 ,6 ))
learn = cnn_learner (data , models .resnet34 , metrics = error_rate )
learn .fit_one_cycle (4 )
get_image_files:获取所有的文件路径。fnames,就是file of names。所有的绝对路径
ImageDataBunch
调用ImageDataBunch的构造函数和from_name_re
from_name_func
src=ImageList.split_by_rand_pct()————(ImageList(ItemList)
重新构建了一个ItemList的trainset和validset
马上调用ItemList的
2. Pets.ipynb debug fastai库的调用图
ImageDataBunch的调用图
① 先设计好要用的transform图像变换的顺序,区分train和valid
② 通过各种方式建立数据集,最后都走LabelLists的出口。中间要建立ImageList→ImageLists→LabelList→LabelLists→databunch
什时候进行batch的transform呢?
如下面所示,__getattr__
中有self.process(),这样就去调用了y的Category,但是没有进行x的transform。只是标签提取
getattr函数是干什么的?
.getattr() 是python 中的一个内置函数,用来获取对象中的属性值
所以只要是调用对象的属性值,都会先去调用getattr函数
class A (object ):
def __init__ (self , value ):
self .value = value
def __getattr__ (self , item ):
print "into __getattr__"
return "can not find"
a = A (10 )
print a .value
# 10
print a .name
# into __getattr__
# can not find
下面的代码,在course中有讲到,也有重构。
在上面②下面的一行处F11return cls.create_fron_ll()
,先进入了cls(ItemLists)的getattr属性里面去了。
应该是src的基类的函数中去了。
进入了self.process()
进入了LabelLists的process函数中了
# https://blog.csdn.net/winycg/article/details/78512300
# import sys
# sys.path.append(r'/dataset_zhr/course-v3/nbs/dl2/exp')
# 上面的失败了,得要放在同级文件夹才行
from exp .nb_08 import *
import numpy as np
path = '/dataset_zhr/oxford-iiit-pet/'
path_anno = path + 'annotations'
path_img = path + 'images'
# 没加resize,导致后面的dataloader取不到一个batch的数据
tfms = [make_rgb , ResizeFixed (128 ), to_byte_tensor , to_float_tensor ]
# 所有的列表
il = ImageList .from_files (path_img , tfms = tfms )
np .random .seed (2 )
# 分割验证集和训练集:随机分割
def split_by_rand_pct (valid_pct :float = 0.2 , seed :int = None , len_list :int = None ):
if valid_pct == 0 : valid_pct = 0.2
if seed is not None : np .random .seed (seed )
rand_idx = np .random .permutation (range (len_list ))
cut = int (valid_pct * len_list )
# 验证集的索引
mask = np .zeros (len_list , dtype = bool )
mask [rand_idx [:cut ]] = True
return mask
# 把随机分割训练集和验证集的做好了
splitter = partial (split_by_rand_pct , 0.2 , 2 )
sd = SplitData .split_by_func (il , splitter )
# 使用正则化来进行标记
def pat_labeler (pat , fn ):
pat = re .compile (pat )
def _get_label (fn ):
if isinstance (fn , Path ): fn = fn .as_posix ()
res = pat .search (str (fn ))
assert res , f'Failed to find "{ pat } " in "{ fn } "'
return res .group (1 )
return _get_label (fn )
pat = r'/([^/]+)_\d+.jpg$'
pat_label = partial (pat_labeler , pat )
ll = label_by_func (sd , pat_label , proc_y = CategoryProcessor ())
# ll = label_by_func(sd, pat_label)
# ll.train.y
# 获取databunch
bs = 128
# train_dl, valid_dl = get_dls(ll.train, ll.valid, bs, num_workers=4)
# x,y = next(iter(train_dl))
SplitData .to_databunch = databunchify
data = ll .to_databunch (bs , c_in = 3 , c_out = 37 )
cbfs = [partial (AvgStatsCallback , accuracy , CudaCallback )]
# 如果自己搭建网络的话,看看效果咋样?怎么才能跟上fastai的结果?这是一个路子
# 从零开始搭建,然后慢慢提升。而不是一下子吃成一个胖子
# m,s = x.mean((0,2,3)).cuda(), x.std((0,2,3)).cuda()
# 使用一个batch的值
_m = tensor ([0.4734 , 0.4456 , 0.3943 ])
_s = tensor ([0.2527 , 0.2545 , 0.2631 ])
norm_pets = partial (normalize_chan , mean = _m .cuda (), std = _s .cuda ())
cbfs .append (partial (BatchTransformXCallback , norm_pets ))
nfs = [64 ,64 ,128 ,256 ]