From 94e1c2c5acc55f81698833c7a3938918b0051c02 Mon Sep 17 00:00:00 2001 From: Eric Meier Date: Thu, 28 Sep 2023 18:18:41 -0700 Subject: [PATCH 1/2] Fix download script in lain-high-resolution docs The current script produces invalid urls due to the padding not being kept in the for loop --- dataset_examples/laion-high-resolution.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/dataset_examples/laion-high-resolution.md b/dataset_examples/laion-high-resolution.md index e6613cd..b1be40d 100644 --- a/dataset_examples/laion-high-resolution.md +++ b/dataset_examples/laion-high-resolution.md @@ -11,8 +11,12 @@ A good use case is to train a superresolution model. Download from https://huggingface.co/datasets/laion/laion-high-resolution ``` -mkdir laion-high-resolution && cd laion-high-resolution -for i in {00000..00127}; do wget https://huggingface.co/datasets/laion/laion-high-resolution/resolve/main/part-$i-5d6701c4-b238-4c0a-84e4-fe8e9daea963-c000.snappy.parquet; done +mkdir -p laion-high-resolution && cd laion-high-resolution + +for i in $(seq 0 127); do + wget https://huggingface.co/datasets/laion/laion-high-resolution/resolve/main/part-$(printf "%05d" $i)-5d6701c4-b238-4c0a-84e4-fe8e9daea963-c000.snappy.parquet +done + cd .. ``` From 80757f0b3a6fe004780b2495e5e4937066301dc0 Mon Sep 17 00:00:00 2001 From: Eric Meier Date: Thu, 28 Sep 2023 18:26:50 -0700 Subject: [PATCH 2/2] Cleanup --- dataset_examples/laion-high-resolution.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dataset_examples/laion-high-resolution.md b/dataset_examples/laion-high-resolution.md index b1be40d..e77373e 100644 --- a/dataset_examples/laion-high-resolution.md +++ b/dataset_examples/laion-high-resolution.md @@ -13,8 +13,8 @@ Download from https://huggingface.co/datasets/laion/laion-high-resolution ``` mkdir -p laion-high-resolution && cd laion-high-resolution -for i in $(seq 0 127); do - wget https://huggingface.co/datasets/laion/laion-high-resolution/resolve/main/part-$(printf "%05d" $i)-5d6701c4-b238-4c0a-84e4-fe8e9daea963-c000.snappy.parquet +for i in {0..127}; do + wget "https://huggingface.co/datasets/laion/laion-high-resolution/resolve/main/part-$(printf "%05d" $i)-5d6701c4-b238-4c0a-84e4-fe8e9daea963-c000.snappy.parquet" done cd ..