You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I am tring to use customer data to train.
I convent the data of faces_webface_112x112 and edited the pics.
while tring, I met this error:
Traceback (most recent call last):
File "main.py", line 88, in
main(args)
File "main.py", line 60, in main
trainer.fit(trainer_mod, data_mod)
File "/root/miniconda3/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 553, in fit
self._run(model)
File "/root/miniconda3/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 918, in _run
self._dispatch()
File "/root/miniconda3/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 986, in _dispatch
self.accelerator.start_training(self)
File "/root/miniconda3/lib/python3.7/site-packages/pytorch_lightning/accelerators/accelerator.py", line 92, in start_training
self.training_type_plugin.start_training(trainer)
File "/root/miniconda3/lib/python3.7/site-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py", line 161, in start_training
self._results = trainer.run_stage()
File "/root/miniconda3/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 996, in run_stage
return self._run_train()
File "/root/miniconda3/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 1045, in _run_train
self.fit_loop.run()
File "/root/miniconda3/lib/python3.7/site-packages/pytorch_lightning/loops/base.py", line 111, in run
self.advance(*args, **kwargs)
File "/root/miniconda3/lib/python3.7/site-packages/pytorch_lightning/loops/fit_loop.py", line 200, in advance
epoch_output = self.epoch_loop.run(train_dataloader)
File "/root/miniconda3/lib/python3.7/site-packages/pytorch_lightning/loops/base.py", line 111, in run
self.advance(*args, **kwargs)
File "/root/miniconda3/lib/python3.7/site-packages/pytorch_lightning/loops/epoch/training_epoch_loop.py", line 130, in advance
batch_output = self.batch_loop.run(batch, self.iteration_count, self._dataloader_idx)
File "/root/miniconda3/lib/python3.7/site-packages/pytorch_lightning/loops/batch/training_batch_loop.py", line 101, in run
super().run(batch, batch_idx, dataloader_idx)
File "/root/miniconda3/lib/python3.7/site-packages/pytorch_lightning/loops/base.py", line 111, in run
self.advance(*args, **kwargs)
File "/root/miniconda3/lib/python3.7/site-packages/pytorch_lightning/loops/batch/training_batch_loop.py", line 148, in advance
result = self._run_optimization(batch_idx, split_batch, opt_idx, optimizer)
File "/root/miniconda3/lib/python3.7/site-packages/pytorch_lightning/loops/batch/training_batch_loop.py", line 202, in _run_optimization
self._optimizer_step(optimizer, opt_idx, batch_idx, closure)
File "/root/miniconda3/lib/python3.7/site-packages/pytorch_lightning/loops/batch/training_batch_loop.py", line 404, in _optimizer_step
using_lbfgs=is_lbfgs,
File "/root/miniconda3/lib/python3.7/site-packages/pytorch_lightning/core/lightning.py", line 1618, in optimizer_step
optimizer.step(closure=optimizer_closure)
File "/root/miniconda3/lib/python3.7/site-packages/pytorch_lightning/core/optimizer.py", line 209, in step
self.__optimizer_step(*args, closure=closure, profiler_name=profiler_name, **kwargs)
File "/root/miniconda3/lib/python3.7/site-packages/pytorch_lightning/core/optimizer.py", line 129, in __optimizer_step
trainer.accelerator.optimizer_step(optimizer, self._optimizer_idx, lambda_closure=closure, **kwargs)
File "/root/miniconda3/lib/python3.7/site-packages/pytorch_lightning/accelerators/accelerator.py", line 293, in optimizer_step
self.lightning_module, optimizer, opt_idx, lambda_closure, **kwargs
File "/root/miniconda3/lib/python3.7/site-packages/pytorch_lightning/plugins/precision/native_amp.py", line 59, in pre_optimizer_step
result = lambda_closure()
File "/root/miniconda3/lib/python3.7/site-packages/pytorch_lightning/loops/batch/training_batch_loop.py", line 236, in _training_step_and_backward_closure
result = self.training_step_and_backward(split_batch, batch_idx, opt_idx, optimizer, hiddens)
File "/root/miniconda3/lib/python3.7/site-packages/pytorch_lightning/loops/batch/training_batch_loop.py", line 537, in training_step_and_backward
result = self._training_step(split_batch, batch_idx, opt_idx, hiddens)
File "/root/miniconda3/lib/python3.7/site-packages/pytorch_lightning/loops/batch/training_batch_loop.py", line 307, in _training_step
training_step_output = self.trainer.accelerator.training_step(step_kwargs)
File "/root/miniconda3/lib/python3.7/site-packages/pytorch_lightning/accelerators/accelerator.py", line 193, in training_step
return self.training_type_plugin.training_step(*step_kwargs.values())
File "/root/miniconda3/lib/python3.7/site-packages/pytorch_lightning/plugins/training_type/ddp.py", line 384, in training_step
return self.model(*args, **kwargs)
File "/root/miniconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
return forward_call(*input, **kwargs)
File "/root/miniconda3/lib/python3.7/site-packages/torch/nn/parallel/distributed.py", line 1040, in forward
output = self._run_ddp_forward(*inputs, **kwargs)
File "/root/miniconda3/lib/python3.7/site-packages/torch/nn/parallel/distributed.py", line 1000, in _run_ddp_forward
return module_to_run(*inputs[0], **kwargs[0])
File "/root/miniconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
return forward_call(*input, **kwargs)
File "/root/miniconda3/lib/python3.7/site-packages/pytorch_lightning/overrides/base.py", line 82, in forward
output = self.module.training_step(*inputs, **kwargs)
File "/home/ubuntu/AdaFace/train_val.py", line 51, in training_step
cos_thetas, norms, embeddings, labels = self.forward(images, labels)
File "/home/ubuntu/AdaFace/train_val.py", line 40, in forward
embeddings, norms = self.model(images)
File "/root/miniconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
return forward_call(*input, **kwargs)
File "/home/ubuntu/AdaFace/net.py", line 328, in forward
x = self.output_layer(x)
File "/root/miniconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
return forward_call(*input, **kwargs)
File "/root/miniconda3/lib/python3.7/site-packages/torch/nn/modules/container.py", line 204, in forward
input = module(input)
File "/root/miniconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
return forward_call(*input, **kwargs)
File "/root/miniconda3/lib/python3.7/site-packages/torch/nn/modules/linear.py", line 114, in forward
return F.linear(input, self.weight, self.bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (200x32768 and 25088x512)
The text was updated successfully, but these errors were encountered:
I have the same error, The "RuntimeError: mat1 and mat2 shapes cannot be multiplied (200x32768 and 25088x512)" an the mat1 size depends of "--batch_size" number. So the number depends directly of the size of your NVRAM (and hardware) available.
@rsj123 or @trnikon did you found a way to train modifying the batches, the data-loader or the "net.py" blocks?
I am tring to use customer data to train.
I convent the data of faces_webface_112x112 and edited the pics.
while tring, I met this error:
The text was updated successfully, but these errors were encountered: