never run out of data

This commit is contained in:
Patrick Esser 2022-06-13 10:59:50 +00:00 committed by root
parent 046410167f
commit 6f604acf0a

View file

@ -153,7 +153,7 @@ class WebDataModuleFromConfig(pl.LightningDataModule):
tars,
nodesplitter=nodesplitter,
shardshuffle=shardshuffle,
handler=wds.warn_and_continue).shuffle(shuffle)
handler=wds.warn_and_continue).repeat().shuffle(shuffle)
print(f'Loading webdataset with {len(dset.pipeline[0].urls)} shards.')
dset = (dset