Merge pull request #35 from JuliaML/permute

change SVHN2 to WH format + other fixes
JuliaML · Apr 1, 2020 · cd83d1d · cd83d1d · CarloLucibello · Apr 6, 2020
2 parents 09a0cc5 + 213fa68
commit cd83d1d
Show file tree

Hide file tree

Showing 29 changed files with 199 additions and 649 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -1,13 +1,18 @@
 language: julia
+
 os:
   - linux
   - osx
+  - windows
+
 julia:
   - 1.0
-  - 1.3
+  - 1
   - nightly
+
 notifications:
   email: false
+
 git:
   depth: 99999999
 
@@ -32,4 +37,4 @@ jobs:
       after_success: skip
 
 after_success:
-- julia -e 'using Pkg; Pkg.add("Coverage"); using Coverage; Coveralls.submit(process_folder())'
+  - julia -e 'using Pkg; Pkg.add("Coverage"); using Coverage; Coveralls.submit(process_folder())'
diff --git a/Project.toml b/Project.toml
@@ -1,6 +1,6 @@
 name = "MLDatasets"
 uuid = "eb30cadb-4394-5ae3-aed4-317e484a6458"
-version = "0.4.1"
+version = "0.5.0"
 
 [deps]
 BinDeps = "9e28174c-4ba2-5203-b857-d8d62c4213ee"
@@ -9,22 +9,23 @@ DataDeps = "124859b0-ceae-595e-8997-d05f6a7a8dfe"
 DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
 FixedPointNumbers = "53c48c17-4a7d-5ca2-90c5-79b7896eea93"
 GZip = "92fee26a-97fe-5a0c-ad85-20a5f3185b63"
+MAT = "23992714-dd62-5051-b70f-ba57cb901cac"
 Requires = "ae029012-a4dd-5104-9daa-d747884805df"
 
 [compat]
-ColorTypes = "0.4, 0.5, 0.6, 0.7, 0.8, 0.9"
+BinDeps = "1"
+ColorTypes = "0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.10"
 DataDeps = "0.3, 0.4, 0.5, 0.6, 0.7"
 FixedPointNumbers = "0.3, 0.4, 0.5, 0.6, 0.7, 0.8"
-ImageCore = "0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8"
 GZip = "0.5"
+ImageCore = "0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8"
+MAT = "0.7"
 Requires = "1"
-BinDeps = "1"
 julia = "1"
 
 [extras]
 ImageCore = "a09fc81d-aa75-5fe9-8630-4744c3626534"
-MAT = "23992714-dd62-5051-b70f-ba57cb901cac"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Test", "MAT", "ImageCore"]
+test = ["Test", "ImageCore"]
diff --git a/docs/src/datasets/CIFAR10.md b/docs/src/datasets/CIFAR10.md
@@ -56,21 +56,8 @@ the CIFAR-10 dataset in Julia more convenient.
 
 Function | Description
 ---------|-------------
-[`convert2features(array)`](@ref CIFAR10.convert2features) | Convert the CIFAR-10 tensor to a flat feature matrix
 [`convert2image(array)`](@ref CIFAR10.convert2image) | Convert the CIFAR-10 tensor/matrix to a colorant array
 
-You can use the function
-[`convert2features`](@ref CIFAR10.convert2features) to convert
-the given CIFAR-10 tensor to a feature matrix (or feature vector
-in the case of a single image). The purpose of this function is
-to drop the spatial dimensions such that traditional ML
-algorithms can process the dataset.
-
-```julia
-julia> CIFAR10.convert2features(CIFAR10.traintensor()) # full training data
-3072×50000 Array{N0f8,2}:
-[...]
-```
 
 To visualize an image or a prediction we provide the function
 [`convert2image`](@ref CIFAR10.convert2image) to convert the
@@ -106,7 +93,6 @@ CIFAR10.testdata
 ```@docs
 CIFAR10.download
 CIFAR10.classnames
-CIFAR10.convert2features
 CIFAR10.convert2image
 ```
 

diff --git a/docs/src/datasets/CIFAR100.md b/docs/src/datasets/CIFAR100.md
@@ -60,21 +60,8 @@ the CIFAR-100 dataset in Julia more convenient.
 
 Function | Description
 ---------|-------------
-[`convert2features(array)`](@ref CIFAR10.convert2features) | Convert the CIFAR-100 tensor to a flat feature matrix
 [`convert2image(array)`](@ref CIFAR10.convert2image) | Convert the CIFAR-100 tensor/matrix to a colorant array
 
-You can use the function
-[`convert2features`](@ref CIFAR10.convert2features) to convert
-the given CIFAR-100 tensor to a feature matrix (or feature vector
-in the case of a single image). The purpose of this function is
-to drop the spatial dimensions such that traditional ML
-algorithms can process the dataset.
-
-```julia
-julia> CIFAR100.convert2features(CIFAR100.traintensor()) # full training data
-3072×50000 Array{N0f8,2}:
-[...]
-```
 
 To visualize an image or a prediction we provide the function
 [`convert2image`](@ref CIFAR10.convert2image) to convert the
@@ -107,15 +94,15 @@ CIFAR100.testdata
 
 ### Utilities
 
-See [`CIFAR10.convert2features`](@ref) and
-[`CIFAR10.convert2image`](@ref)
 
 ```@docs
 CIFAR100.download
 CIFAR100.classnames_coarse
 CIFAR100.classnames_fine
 ```
 
+See also [`CIFAR10.convert2image`](@ref).
+
 ## References
 
 - **Authors**: Alex Krizhevsky, Vinod Nair, Geoffrey Hinton

diff --git a/docs/src/datasets/FashionMNIST.md b/docs/src/datasets/FashionMNIST.md
@@ -56,22 +56,8 @@ the Fashion-MNIST dataset in Julia more convenient.
 
 Function | Description
 ---------|-------------
-[`convert2features(array)`](@ref MNIST.convert2features) | Convert the Fashion-MNIST tensor to a flat feature matrix
 [`convert2image(array)`](@ref MNIST.convert2image) | Convert the Fashion-MNIST tensor/matrix to a colorant array
 
-You can use the function
-[`convert2features`](@ref MNIST.convert2features) to
-convert the given Fashion-MNIST tensor to a feature matrix (or
-feature vector in the case of a single image). The purpose of
-this function is to drop the spatial dimensions such that
-traditional ML algorithms can process the dataset.
-
-```julia
-julia> FashionMNIST.convert2features(FashionMNIST.traintensor()) # full training data
-784×60000 Array{N0f8,2}:
-[...]
-```
-
 To visualize an image or a prediction we provide the function
 [`convert2image`](@ref MNIST.convert2image) to convert the
 given Fashion-MNIST horizontal-major tensor (or feature matrix)
@@ -114,8 +100,7 @@ FashionMNIST.download
 FashionMNIST.classnames
 ```
 
-Also, the `FashionMNIST` module is re-exporting [`convert2features`](@ref MNIST.convert2features)
-and [`convert2image`](@ref MNIST.convert2image) from the [`MNIST`](@ref) module.
+Also, the `FashionMNIST` module is re-exporting [`convert2image`](@ref MNIST.convert2image) from the [`MNIST`](@ref) module.
 
 ## References
 

diff --git a/docs/src/datasets/MNIST.md b/docs/src/datasets/MNIST.md
@@ -56,21 +56,8 @@ the MNIST dataset in Julia more convenient.
 
 Function | Description
 ---------|-------------
-[`convert2features(array)`](@ref MNIST.convert2features) | Convert the MNIST tensor to a flat feature matrix
 [`convert2image(array)`](@ref MNIST.convert2image) | Convert the MNIST tensor/matrix to a colorant array
 
-You can use the function [`convert2features`](@ref
-MNIST.convert2features) to convert the given MNIST tensor to a
-feature matrix (or feature vector in the case of a single image).
-The purpose of this function is to drop the spatial dimensions
-such that traditional ML algorithms can process the dataset.
-
-```julia
-julia> MNIST.convert2features(MNIST.traintensor()) # full training data
-784×60000 Array{N0f8,2}:
-[...]
-```
-
 To visualize an image or a prediction we provide the function
 [`convert2image`](@ref MNIST.convert2image) to convert the given
 MNIST horizontal-major tensor (or feature matrix) to a
@@ -110,7 +97,6 @@ MNIST.testdata
 
 ```@docs
 MNIST.download
-MNIST.convert2features
 MNIST.convert2image
 ```
 

diff --git a/docs/src/datasets/SVHN2.md b/docs/src/datasets/SVHN2.md
@@ -77,22 +77,8 @@ the SVHN (format 2) dataset in Julia more convenient.
 
 Function | Description
 ---------|-------------
-[`convert2features(array)`](@ref SVHN2.convert2features) | Convert the SVHN tensor to a flat feature matrix
 [`convert2image(array)`](@ref SVHN2.convert2image) | Convert the SVHN tensor/matrix to a colorant array
 
-You can use the function
-[`convert2features`](@ref SVHN2.convert2features) to convert
-the given SVHN tensor to a feature matrix (or feature vector
-in the case of a single image). The purpose of this function is
-to drop the spatial dimensions such that traditional ML
-algorithms can process the dataset.
-
-```julia
-julia> SVHN2.convert2features(SVHN2.traindata()[1]) # full training data
-3072×73257 Array{N0f8,2}:
-[...]
-```
-
 To visualize an image or a prediction we provide the function
 [`convert2image`](@ref SVHN2.convert2image) to convert the
 given SVHN2 horizontal-major tensor (or feature matrix) to a
@@ -139,7 +125,6 @@ SVHN2.extradata
 ```@docs
 SVHN2.download
 SVHN2.classnames
-SVHN2.convert2features
 SVHN2.convert2image
 ```
 

diff --git a/src/CIFAR10/CIFAR10.jl b/src/CIFAR10/CIFAR10.jl
@@ -21,10 +21,11 @@ module CIFAR10
         testdata,
 
         convert2image,
-        convert2features,
 
         download
 
+    @deprecate convert2features reshape
+
     const DEPNAME = "CIFAR10"
     const NCHUNKS = 5
 

diff --git a/src/CIFAR10/interface.jl b/src/CIFAR10/interface.jl
@@ -13,17 +13,16 @@ given `indices` as a multi-dimensional array of eltype `T`. If
 the corresponding labels are required as well, it is recommended
 to use [`CIFAR10.traindata`](@ref) instead.
 
-The image(s) is/are returned in the native horizontal-major
+The image(s) is/are returned in the horizontal-major
 memory layout as a single numeric array. If `T <: Integer`, then
 all values will be within `0` and `255`, otherwise the values are
 scaled to be between `0` and `1`.
 
 If the parameter `indices` is omitted or an `AbstractVector`, the
-images are returned as a 4D array (i.e. a `Array{T,4}`), in which
-the first dimension corresponds to the pixel *rows* (x) of the
-image, the second dimension to the pixel *columns* (y) of the
-image, the third dimension the RGB color channels, and the fourth
-dimension denotes the index of the image.
+images are returned as a 4D array (i.e. a `Array{T,4}`) in
+WHCN format (width, height, #channels, #images). 
+For integer `indices` instead, a 3D array in WHC format is returned.
+
 
 ```julia-repl
 julia> CIFAR10.traintensor() # load all training images
@@ -35,23 +34,18 @@ julia> CIFAR10.traintensor(Float32, 1:3) # first three images as Float32
 [...]
 ```
 
-If `indices` is an `Integer`, the single image is returned as
-`Array{T,3}` in horizontal-major layout, which means that the
-first dimension denotes the pixel *rows* (x), the second
-dimension denotes the pixel *columns* (y), and the third
-dimension the RGB color channels of the image.
+If `indices` is an `Integer`, a single image is returned as
+`Array{T,3}` array. 
 
 ```julia-repl
 julia> CIFAR10.traintensor(1) # load first training image
 32×32×3 Array{N0f8,3}:
 [...]
 ```
 
-As mentioned above, the images are returned in the native
-horizontal-major layout to preserve the original feature
-ordering. You can use the utility function
+You can use the utility function
 [`convert2image`](@ref) to convert an CIFAR-10 array into a
-vertical-major Julia image with the appropriate `RGB` eltype.
+horizontal-major Julia image with the appropriate `RGB` eltype.
 
 ```julia-repl
 julia> CIFAR10.convert2image(CIFAR10.traintensor(1)) # convert to column-major colorant array
@@ -78,17 +72,15 @@ Return the CIFAR-10 **test** images corresponding to the given
 corresponding labels are required as well, it is recommended to
 use [`CIFAR10.testdata`](@ref) instead.
 
-The image(s) is/are returned in the native horizontal-major
+Images are returned in horizontal-major
 memory layout as a single numeric array. If `T <: Integer`, then
 all values will be within `0` and `255`, otherwise the values are
 scaled to be between `0` and `1`.
 
 If the parameter `indices` is omitted or an `AbstractVector`, the
-images are returned as a 4D array (i.e. a `Array{T,4}`), in which
-the first dimension corresponds to the pixel *rows* (x) of the
-image, the second dimension to the pixel *columns* (y) of the
-image, the third dimension the RGB color channels, and the fourth
-dimension denotes the index of the image.
+images are returned as a 4D array (i.e. a `Array{T,4}`) in
+WHCN format (width, height, #channels, #images). 
+For integer `indices` instead, a 3D array in WHC format is returned.
 
 ```julia-repl
 julia> CIFAR10.testtensor() # load all training images
@@ -100,23 +92,18 @@ julia> CIFAR10.testtensor(Float32, 1:3) # first three images as Float32
 [...]
 ```
 
-If `indices` is an `Integer`, the single image is returned as
-`Array{T,3}` in horizontal-major layout, which means that the
-first dimension denotes the pixel *rows* (x), the second
-dimension denotes the pixel *columns* (y), and the third
-dimension the RGB color channels of the image.
+If `indices` is an `Integer`, a single image is returned as
+`Array{T,3}`.
 
 ```julia-repl
 julia> CIFAR10.testtensor(1) # load first training image
 32×32×3 Array{N0f8,3}:
 [...]
 ```
 
-As mentioned above, the images are returned in the native
-horizontal-major layout to preserve the original feature
-ordering. You can use the utility function
+You can use the utility function
 [`convert2image`](@ref) to convert an CIFAR-10 array into a
-vertical-major Julia image with the appropriate `RGB` eltype.
+horizontal-major HW Julia image with the appropriate `RGB` eltype.
 
 ```julia-repl
 julia> CIFAR10.convert2image(CIFAR10.testtensor(1)) # convert to column-major colorant array
@@ -217,7 +204,7 @@ full trainingset is returned. The first element of the return
 values will be the images as a multi-dimensional array, and the
 second element the corresponding labels as integers.
 
-The image(s) is/are returned in the native horizontal-major
+The image(s) is/are returned in horizontal-major
 memory layout as a single numeric array of eltype `T`. If `T <:
 Integer`, then all values will be within `0` and `255`, otherwise
 the values are scaled to be between `0` and `1`. The integer
@@ -322,7 +309,7 @@ full testset is returned. The first element of the return
 values will be the images as a multi-dimensional array, and the
 second element the corresponding labels as integers.
 
-The image(s) is/are returned in the native horizontal-major
+The image(s) is/are returned in the horizontal-major
 memory layout as a single numeric array of eltype `T`. If `T <:
 Integer`, then all values will be within `0` and `255`, otherwise
 the values are scaled to be between `0` and `1`. The integer