Skip to content

Latest commit

 

History

History
176 lines (123 loc) · 5.17 KB

fastai code notes.md

File metadata and controls

176 lines (123 loc) · 5.17 KB

fastai code 笔记

1. debug fastai course 的重构

# https://nbviewer.org/github/fastai/course-v3/blob/master/nbs/dl1/lesson1-pets.ipynb
from fastai.vision import *
from fastai.metrics import error_rate

bs = 64
path_anno = path/'annotations'
path_img = path/'images'
fnames = get_image_files(path_img)

np.random.seed(2)
pat = r'/([^/]+)_\d+.jpg$'

data = ImageDataBunch.from_name_re(path_img, fnames, pat, ds_tfms=get_transforms(), size=224, bs=bs
                                  ).normalize(imagenet_stats)
data.show_batch(rows=3, figsize=(7,6))

learn = cnn_learner(data, models.resnet34, metrics=error_rate)
learn.fit_one_cycle(4)
  • get_image_files:获取所有的文件路径。fnames,就是file of names。所有的绝对路径
  • ImageDataBunch
    • 调用ImageDataBunch的构造函数和from_name_re
      • from_name_func
      • src=ImageList.split_by_rand_pct()————(ImageList(ItemList)
        • 重新构建了一个ItemList的trainset和validset
    • 马上调用ItemList的

  • y的label是靠vocab的列表来统计的

2. Pets.ipynb debug fastai库的调用图

  • ImageDataBunch的调用图
    • ① 先设计好要用的transform图像变换的顺序,区分train和valid
    • ② 通过各种方式建立数据集,最后都走LabelLists的出口。中间要建立ImageList→ImageLists→LabelList→LabelLists→databunch
    • 什时候进行batch的transform呢?
      • 如下面所示,__getattr__中有self.process(),这样就去调用了y的Category,但是没有进行x的transform。只是标签提取
  • getattr函数是干什么的?
    • .getattr() 是python 中的一个内置函数,用来获取对象中的属性值
    • 所以只要是调用对象的属性值,都会先去调用getattr函数
class A(object):
    def __init__(self, value):
        self.value = value
 
    def __getattr__(self, item):
        print "into __getattr__"
        return  "can not find"
 
a = A(10)
print a.value
# 10
print a.name
# into __getattr__
# can not find

  • 下面的代码,在course中有讲到,也有重构。
    • 在上面②下面的一行处F11return cls.create_fron_ll(),先进入了cls(ItemLists)的getattr属性里面去了。
    • 应该是src的基类的函数中去了。

  • 进入了self.process()
    • 进入了LabelLists的process函数中了

  • ③ 进入create_from_ll函数中

3. 利用part2的代码重构part1的效果

# https://blog.csdn.net/winycg/article/details/78512300
# import sys
# sys.path.append(r'/dataset_zhr/course-v3/nbs/dl2/exp')


# 上面的失败了,得要放在同级文件夹才行
from exp.nb_08 import *
import numpy as np


path = '/dataset_zhr/oxford-iiit-pet/'
path_anno = path + 'annotations'
path_img = path +'images'


# 没加resize,导致后面的dataloader取不到一个batch的数据
tfms = [make_rgb, ResizeFixed(128), to_byte_tensor, to_float_tensor]
# 所有的列表
il = ImageList.from_files(path_img, tfms=tfms)


np.random.seed(2)
# 分割验证集和训练集:随机分割
def split_by_rand_pct(valid_pct:float=0.2, seed:int=None, len_list:int=None):
    if valid_pct==0: valid_pct = 0.2
    if seed is not None: np.random.seed(seed)
    rand_idx = np.random.permutation(range(len_list))
    cut = int(valid_pct * len_list)
    # 验证集的索引
    mask = np.zeros(len_list, dtype=bool)
    mask[rand_idx[:cut]] = True
    return mask
# 把随机分割训练集和验证集的做好了
splitter = partial(split_by_rand_pct, 0.2, 2)
sd = SplitData.split_by_func(il, splitter)


# 使用正则化来进行标记
def pat_labeler(pat, fn):
    pat = re.compile(pat)
    def _get_label(fn):
        if isinstance(fn, Path): fn = fn.as_posix()
        res = pat.search(str(fn))
        assert res, f'Failed to find "{pat}" in "{fn}"'
        return res.group(1)
    return _get_label(fn)


pat = r'/([^/]+)_\d+.jpg$'
pat_label = partial(pat_labeler, pat)
ll = label_by_func(sd, pat_label, proc_y=CategoryProcessor())
# ll = label_by_func(sd, pat_label)
# ll.train.y


# 获取databunch
bs = 128
# train_dl, valid_dl = get_dls(ll.train, ll.valid, bs, num_workers=4)
# x,y = next(iter(train_dl))


SplitData.to_databunch = databunchify
data = ll.to_databunch(bs, c_in=3, c_out=37)


cbfs = [partial(AvgStatsCallback, accuracy, CudaCallback)]


# 如果自己搭建网络的话,看看效果咋样?怎么才能跟上fastai的结果?这是一个路子
# 从零开始搭建,然后慢慢提升。而不是一下子吃成一个胖子
# m,s = x.mean((0,2,3)).cuda(), x.std((0,2,3)).cuda()
# 使用一个batch的值
_m = tensor([0.4734, 0.4456, 0.3943])
_s = tensor([0.2527, 0.2545, 0.2631])
norm_pets = partial(normalize_chan, mean=_m.cuda(), std=_s.cuda())


cbfs.append(partial(BatchTransformXCallback, norm_pets))


nfs = [64,64,128,256]