add aesthetic laion dataloader
This commit is contained in:
parent
89ac468ce8
commit
5b582ddfd8
1 changed files with 12 additions and 1 deletions
|
@ -148,7 +148,18 @@ class WebDataModuleFromConfig(pl.LightningDataModule):
|
||||||
|
|
||||||
nodesplitter = wds.shardlists.split_by_node if self.multinode else wds.shardlists.single_node_only
|
nodesplitter = wds.shardlists.split_by_node if self.multinode else wds.shardlists.single_node_only
|
||||||
|
|
||||||
|
if self.tar_base == "__improvedaesthetic__":
|
||||||
|
print("## Warning, loading the same improved aesthetic dataset "
|
||||||
|
"for all splits and ignoring shards parameter.")
|
||||||
|
urls = []
|
||||||
|
for i in range(1, 65):
|
||||||
|
for j in range(512):
|
||||||
|
for k in range(5):
|
||||||
|
urls.append(f's3://s-laion/improved-aesthetics-laion-2B-en-subsets/aesthetics/{i:02d}/{j:03d}/{k:05d}.tar')
|
||||||
|
tars = [f'pipe:aws s3 cp {url} -' for url in urls]
|
||||||
|
else:
|
||||||
tars = os.path.join(self.tar_base, dataset_config.shards)
|
tars = os.path.join(self.tar_base, dataset_config.shards)
|
||||||
|
|
||||||
dset = wds.WebDataset(
|
dset = wds.WebDataset(
|
||||||
tars,
|
tars,
|
||||||
nodesplitter=nodesplitter,
|
nodesplitter=nodesplitter,
|
||||||
|
|
Loading…
Reference in a new issue