Skip to content

Commit

Permalink
update example dataloader for transformers 4.31.x (#1125)
Browse files Browse the repository at this point in the history
Signed-off-by: Cheng, Zixuan <[email protected]>
  • Loading branch information
violetch24 authored Aug 2, 2023
1 parent 9f80e61 commit 59371fe
Show file tree
Hide file tree
Showing 10 changed files with 150 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -536,6 +536,22 @@ def compute_metrics(eval_preds):
else None,
)

eval_dataloader = trainer.get_eval_dataloader()
# transformer issue #1
# for transformers 4.31.0: accelerate dataloader
# *** ValueError: batch_size attribute should not be set
# after DataLoaderShard is initialized
if eval_dataloader.batch_size is None:
def _build_inc_dataloader(dataloader):
class INCDataLoader:
__iter__ = dataloader.__iter__
def __init__(self) -> None:
self.dataloader = dataloader
self.batch_size = dataloader.total_batch_size
return INCDataLoader()
eval_dataloader = _build_inc_dataloader(eval_dataloader)
batch_size = eval_dataloader.batch_size

# Tune
def eval_func_for_nc(model_tuned):
trainer.model = model_tuned
Expand Down Expand Up @@ -564,7 +580,7 @@ def eval_func_for_nc(model_tuned):
conf = PostTrainingQuantConfig(accuracy_criterion=accuracy_criterion)
q_model = quantization.fit(model,
conf,
calib_dataloader=trainer.get_eval_dataloader(),
calib_dataloader=eval_dataloader,
eval_func=eval_func_for_nc)
q_model.save(training_args.output_dir)
exit(0)
Expand All @@ -582,7 +598,7 @@ def eval_func_for_nc(model_tuned):
from neural_compressor.config import BenchmarkConfig
from neural_compressor import benchmark
b_conf = BenchmarkConfig(warmup=5, iteration=100, cores_per_instance=4, num_of_instance=1)
benchmark.fit(new_model, b_conf, b_dataloader=trainer.get_eval_dataloader())
benchmark.fit(new_model, b_conf, b_dataloader=eval_dataloader)
else:
eval_func_for_nc(new_model)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,22 @@ def compute_metrics(eval_preds):
else None,
)

eval_dataloader = trainer.get_eval_dataloader()
# transformer issue #1
# for transformers 4.31.0: accelerate dataloader
# *** ValueError: batch_size attribute should not be set
# after DataLoaderShard is initialized
if eval_dataloader.batch_size is None:
def _build_inc_dataloader(dataloader):
class INCDataLoader:
__iter__ = dataloader.__iter__
def __init__(self) -> None:
self.dataloader = dataloader
self.batch_size = dataloader.total_batch_size
return INCDataLoader()
eval_dataloader = _build_inc_dataloader(eval_dataloader)
batch_size = eval_dataloader.batch_size

# Tune
def eval_func_for_nc(model_tuned):
trainer.model = model_tuned
Expand Down Expand Up @@ -592,7 +608,7 @@ def eval_func_for_nc(model_tuned):
op_type_dict=op_type_dict)
q_model = quantization.fit(model,
conf,
calib_dataloader=trainer.get_eval_dataloader(),
calib_dataloader=eval_dataloader,
eval_func=eval_func_for_nc)
q_model.save(training_args.output_dir)
exit(0)
Expand All @@ -610,7 +626,7 @@ def eval_func_for_nc(model_tuned):
from neural_compressor.config import BenchmarkConfig
from neural_compressor import benchmark
b_conf = BenchmarkConfig(warmup=5, iteration=100, cores_per_instance=4, num_of_instance=1)
benchmark.fit(new_model, b_conf, b_dataloader=trainer.get_eval_dataloader())
benchmark.fit(new_model, b_conf, b_dataloader=eval_dataloader)
else:
eval_func_for_nc(new_model)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -605,6 +605,19 @@ def compute_metrics(p: EvalPrediction):
)

eval_dataloader = trainer.get_eval_dataloader()
# transformer issue #1
# for transformers 4.31.0: accelerate dataloader
# *** ValueError: batch_size attribute should not be set
# after DataLoaderShard is initialized
if eval_dataloader.batch_size is None:
def _build_inc_dataloader(dataloader):
class INCDataLoader:
__iter__ = dataloader.__iter__
def __init__(self) -> None:
self.dataloader = dataloader
self.batch_size = dataloader.total_batch_size
return INCDataLoader()
eval_dataloader = _build_inc_dataloader(eval_dataloader)
batch_size = eval_dataloader.batch_size
metric_name = "eval_f1"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -612,6 +612,19 @@ def compute_metrics(p: EvalPrediction):
)

eval_dataloader = trainer.get_eval_dataloader()
# transformer issue #1
# for transformers 4.31.0: accelerate dataloader
# *** ValueError: batch_size attribute should not be set
# after DataLoaderShard is initialized
if eval_dataloader.batch_size is None:
def _build_inc_dataloader(dataloader):
class INCDataLoader:
__iter__ = dataloader.__iter__
def __init__(self) -> None:
self.dataloader = dataloader
self.batch_size = dataloader.total_batch_size
return INCDataLoader()
eval_dataloader = _build_inc_dataloader(eval_dataloader)
batch_size = eval_dataloader.batch_size
metric_name = "eval_f1"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -588,6 +588,22 @@ def compute_metrics(eval_preds):
)
num_beams = data_args.num_beams if data_args.num_beams is not None else training_args.generation_num_beams

eval_dataloader = trainer.get_eval_dataloader()
# transformer issue #1
# for transformers 4.31.0: accelerate dataloader
# *** ValueError: batch_size attribute should not be set
# after DataLoaderShard is initialized
if eval_dataloader.batch_size is None:
def _build_inc_dataloader(dataloader):
class INCDataLoader:
__iter__ = dataloader.__iter__
def __init__(self) -> None:
self.dataloader = dataloader
self.batch_size = dataloader.total_batch_size
return INCDataLoader()
eval_dataloader = _build_inc_dataloader(eval_dataloader)
batch_size = eval_dataloader.batch_size

def eval_func_for_nc(model):
trainer.model = model
results = trainer.evaluate(
Expand Down Expand Up @@ -616,7 +632,7 @@ def eval_func_for_nc(model):
conf = PostTrainingQuantConfig(approach="dynamic")
q_model = quantization.fit(model,
conf,
calib_dataloader=trainer.get_eval_dataloader(),
calib_dataloader=eval_dataloader,
eval_func=eval_func_for_nc)
q_model.save(training_args.output_dir)
exit(0)
Expand All @@ -634,7 +650,7 @@ def eval_func_for_nc(model):
from neural_compressor.config import BenchmarkConfig
from neural_compressor import benchmark
b_conf = BenchmarkConfig(warmup=5, iteration=100, cores_per_instance=4, num_of_instance=1)
benchmark.fit(new_model, b_conf, b_dataloader=trainer.get_eval_dataloader())
benchmark.fit(new_model, b_conf, b_dataloader=eval_dataloader)
else:
eval_func_for_nc(new_model)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,19 @@ def compute_metrics(p: EvalPrediction):
)

eval_dataloader = trainer.get_eval_dataloader()
# transformer issue #1
# for transformers 4.31.0: accelerate dataloader
# *** ValueError: batch_size attribute should not be set
# after DataLoaderShard is initialized
if eval_dataloader.batch_size is None:
def _build_inc_dataloader(dataloader):
class INCDataLoader:
__iter__ = dataloader.__iter__
def __init__(self) -> None:
self.dataloader = dataloader
self.batch_size = dataloader.total_batch_size
return INCDataLoader()
eval_dataloader = _build_inc_dataloader(eval_dataloader)
batch_size = eval_dataloader.batch_size

def take_eval_steps(model, trainer, save_metrics=False):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -479,6 +479,19 @@ def compute_metrics(p: EvalPrediction):
)

eval_dataloader = trainer.get_eval_dataloader()
# transformer issue #1
# for transformers 4.31.0: accelerate dataloader
# *** ValueError: batch_size attribute should not be set
# after DataLoaderShard is initialized
if eval_dataloader.batch_size is None:
def _build_inc_dataloader(dataloader):
class INCDataLoader:
__iter__ = dataloader.__iter__
def __init__(self) -> None:
self.dataloader = dataloader
self.batch_size = dataloader.total_batch_size
return INCDataLoader()
eval_dataloader = _build_inc_dataloader(eval_dataloader)
batch_size = eval_dataloader.batch_size

def take_eval_steps(model, trainer, save_metrics=False):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -479,6 +479,19 @@ def compute_metrics(p: EvalPrediction):
)

eval_dataloader = trainer.get_eval_dataloader()
# transformer issue #1
# for transformers 4.31.0: accelerate dataloader
# *** ValueError: batch_size attribute should not be set
# after DataLoaderShard is initialized
if eval_dataloader.batch_size is None:
def _build_inc_dataloader(dataloader):
class INCDataLoader:
__iter__ = dataloader.__iter__
def __init__(self) -> None:
self.dataloader = dataloader
self.batch_size = dataloader.total_batch_size
return INCDataLoader()
eval_dataloader = _build_inc_dataloader(eval_dataloader)
batch_size = eval_dataloader.batch_size

def take_eval_steps(model, trainer, save_metrics=False):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,19 @@ def compute_metrics(p: EvalPrediction):
early_stopping_threshold))

eval_dataloader = trainer.get_eval_dataloader()
# transformer issue #1
# for transformers 4.31.0: accelerate dataloader
# *** ValueError: batch_size attribute should not be set
# after DataLoaderShard is initialized
if eval_dataloader.batch_size is None:
def _build_inc_dataloader(dataloader):
class INCDataLoader:
__iter__ = dataloader.__iter__
def __init__(self) -> None:
self.dataloader = dataloader
self.batch_size = dataloader.total_batch_size
return INCDataLoader()
eval_dataloader = _build_inc_dataloader(eval_dataloader)
batch_size = eval_dataloader.batch_size

def eval_func(model):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -543,6 +543,22 @@ def compute_metrics(eval_preds):
compute_metrics=compute_metrics if training_args.predict_with_generate else None
)

eval_dataloader = trainer.get_eval_dataloader()
# transformer issue #1
# for transformers 4.31.0: accelerate dataloader
# *** ValueError: batch_size attribute should not be set
# after DataLoaderShard is initialized
if eval_dataloader.batch_size is None:
def _build_inc_dataloader(dataloader):
class INCDataLoader:
__iter__ = dataloader.__iter__
def __init__(self) -> None:
self.dataloader = dataloader
self.batch_size = dataloader.total_batch_size
return INCDataLoader()
eval_dataloader = _build_inc_dataloader(eval_dataloader)
batch_size = eval_dataloader.batch_size

results = {}
max_length = (
training_args.generation_max_length
Expand Down Expand Up @@ -576,7 +592,7 @@ def eval_func_for_nc(model):
conf = PostTrainingQuantConfig(approach="dynamic")
q_model = quantization.fit(model,
conf,
calib_dataloader=trainer.get_eval_dataloader(),
calib_dataloader=eval_dataloader,
eval_func=eval_func_for_nc)
q_model.save(training_args.output_dir)
exit(0)
Expand All @@ -595,7 +611,7 @@ def eval_func_for_nc(model):
from neural_compressor.config import BenchmarkConfig
from neural_compressor import benchmark
b_conf = BenchmarkConfig(warmup=5, iteration=100, cores_per_instance=4, num_of_instance=1)
benchmark.fit(new_model, b_conf, b_dataloader=trainer.get_eval_dataloader())
benchmark.fit(new_model, b_conf, b_dataloader=eval_dataloader)
else:
eval_func_for_nc(new_model)
exit(0)
Expand Down

0 comments on commit 59371fe

Please sign in to comment.