Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FasterAI #151

Merged
merged 18 commits into from
Aug 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased] - 2020-03-08 – now
## [Unreleased] - 2020-07-32 – now

### Added
- `plotlrfind` to visualize results of `LRFinderPhase`
- High-level API "FasterAI"
- Find datasets and learning methods based on `Block`s: `finddataset`, `findlearningmethods`
- `loaddataset` for quickly loading data containers from configured recipes
- Data container recipes (`DatasetRecipe`, `loadrecipe`)

### Changed
- Documentation notebooks to reflect changes in API
### Changed
4 changes: 2 additions & 2 deletions src/datablock/wrappers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@ end
Wrapper `Block` to attach a name to a block. Can be used in conjunction
with [`Only`](#) to apply encodings to specific blocks only.
"""
struct Named{Name, B<:Block} <: WrapperBlock
struct Named{Name, B<:AbstractBlock} <: WrapperBlock
block::B
end
Named(name::Symbol, block::B) where {B<:Block} = Named{name, B}(block)
Named(name::Symbol, block::B) where {B<:AbstractBlock} = Named{name, B}(block)


# the name is preserved through encodings and decodings
Expand Down
3 changes: 2 additions & 1 deletion src/datasets/fastairegistry.jl
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@

const FASTAI_DATA_RECIPES = Dict{String, Vector{DatasetRecipe}}(
# Image classification datasets
[name => [ImageClassificationFolders()] for name in (
[name => [ImageFolders()] for name in (
"imagenette", "imagenette-160", "imagenette-320",
"imagenette2", "imagenette2-160", "imagenette2-320",
"imagewoof", "imagewoof-160", "imagewoof-320",
"imagewoof2", "imagewoof2-160", "imagewoof2-320",
)]...,

"camvid_tiny" => [ImageSegmentationFolders()],
"pascal_2007" => [ImageTableMultiLabel()],
)


Expand Down
44 changes: 39 additions & 5 deletions src/datasets/recipes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ recipeblocks(::R) where {R<:DatasetRecipe} = recipeblocks(R)
# ImageClfFolders

"""
ImageClfFolders(; labelfn = parentname, split = false)
ImageFolders(; labelfn = parentname, split = false)

Recipe for loading a single-label image classification dataset
stored in a hierarchical folder format. If `split == true`, split
Expand All @@ -56,16 +56,16 @@ defaults to the name of the parent folder but a custom function can
be passed as `labelfn`.

```julia
julia> recipeblocks(ImageClassificationFolders)
julia> recipeblocks(ImageFolders)
Tuple{Image{2}, Label}
```
"""
Base.@kwdef struct ImageClassificationFolders <: DatasetRecipe
Base.@kwdef struct ImageFolders <: DatasetRecipe
labelfn = parentname
split::Bool = false
end

function loadrecipe(recipe::ImageClassificationFolders, path)
function loadrecipe(recipe::ImageFolders, path)
isdir(path) || error("$path is not a directory")
data = loadfolderdata(
path,
Expand All @@ -81,7 +81,7 @@ function loadrecipe(recipe::ImageClassificationFolders, path)
return data, blocks
end

recipeblocks(::Type{ImageClassificationFolders}) = Tuple{Image{2}, Label}
recipeblocks(::Type{ImageFolders}) = Tuple{Image{2}, Label}


# ImageSegmentationFolders
Expand Down Expand Up @@ -124,3 +124,37 @@ function loadrecipe(recipe::ImageSegmentationFolders, path)
end

recipeblocks(::Type{ImageSegmentationFolders}) = Tuple{Image{2}, Mask{2}}

# ImageTableMultiLabel

Base.@kwdef struct ImageTableMultiLabel <: DatasetRecipe
csvfile::String = "train.csv"
imagefolder::String = "train"
filecol::Symbol = :fname
labelcol::Symbol = :labels
split::Bool = false
splitcol::Symbol = :is_valid
labelsep::String = " "
end


function loadrecipe(recipe::ImageTableMultiLabel, path)
csvpath = joinpath(path, recipe.csvfile)
isfile(csvpath) || error("File $csvpath does not exist")
df = loadfile(csvpath)
images = mapobs(f -> loadfile(joinpath(path, recipe.imagefolder, f)), df[:, recipe.filecol])
labels = map(str -> split(str, recipe.labelsep), df[:,recipe.labelcol])
data = (images, labels)
blocks = Image{2}(), LabelMulti(unique(Iterators.flatten(labels)))
if recipe.split
idxs = 1:nobs(data)
splits = df[:, recipe.splitcol]
data = Dict(
"train" => datasubset(data, idxs[splits]),
"valid" => datasubset(data, idxs[(!).(splits)])
)
end
return data, blocks
end

recipeblocks(::Type{ImageTableMultiLabel}) = Tuple{Image{2}, LabelMulti}
2 changes: 1 addition & 1 deletion src/datasets/transformations.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ struct MappedData{F, D}
data::D
end

Base.show(io::IO, data::MappedData) = print(io, "mapobs($(data.f), $(data.data))")
Base.show(io::IO, data::MappedData) = print(io, "mapobs($(data.f), $(summary(data.data)))")
Base.show(io::IO, data::MappedData{F, <:AbstractArray}) where F = print(io, "mapobs($(data.f), $(ShowLimit(data.data, limit=80)))")
LearnBase.nobs(data::MappedData) = nobs(data.data)
LearnBase.getobs(data::MappedData, idx::Int) = data.f(getobs(data.data, idx))
Expand Down
17 changes: 3 additions & 14 deletions test/datablock.jl
Original file line number Diff line number Diff line change
@@ -1,15 +1,6 @@
#include("imports.jl")
using FastAI
import FastAI: Block, Encoding, encode, decode, checkblock, encodedblock, decodedblock
using FastAI: Label, LabelMulti, Mask, Image, ImageTensor, testencoding
using FastAI: OneHot
using Test
using StaticArrays
using Images
using FastAI: grabbounds
using Images

##
include("imports.jl")


struct ABlock <: Block
end
checkblock(::ABlock, ::Int) = true
Expand Down Expand Up @@ -69,9 +60,7 @@ end
testencoding(enc, block, image)
@testset "randstate is shared" begin
im1, im2 = encode(enc, Training(), (block, block), (image, image))
im3 = encode(enc, Training(), block, image)
@test im1 ≈ im2
@test !(im1 == im3)
end

@testset "don't transform data that doesn't need to be resized" begin
Expand Down
10 changes: 5 additions & 5 deletions test/datasets/recipes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,31 +15,31 @@ function testrecipe(recipe::Datasets.DatasetRecipe, data, blocks)
end


@testset ExtendedTestSet "ImageClassificationFolders" begin
@testset ExtendedTestSet "ImageFolders" begin
path = joinpath(datasetpath("mnist_var_size_tiny"), "train")

@testset ExtendedTestSet "Basic configuration" begin
recipe = Datasets.ImageClassificationFolders()
recipe = Datasets.ImageFolders()
data, blocks = loadrecipe(recipe, path)
testrecipe(recipe, data, blocks)
@test blocks[1] isa Image
@test blocks[2].classes == ["3", "7"]
end

@testset ExtendedTestSet "Split configuration" begin
recipe = Datasets.ImageClassificationFolders(split=true)
recipe = Datasets.ImageFolders(split=true)
data, blocks = loadrecipe(recipe, path)
testrecipe(recipe, data["train"], blocks)
end

@testset ExtendedTestSet "Error cases" begin
@testset ExtendedTestSet "Empty directory" begin
recipe = Datasets.ImageClassificationFolders(split=true)
recipe = Datasets.ImageFolders(split=true)
@test_throws ErrorException loadrecipe(recipe, mktempdir())
end

@testset ExtendedTestSet "Only one label" begin
recipe = Datasets.ImageClassificationFolders(labelfn=x -> "1")
recipe = Datasets.ImageFolders(labelfn=x -> "1")
@test_throws ErrorException loadrecipe(recipe, path)
end
end
Expand Down
2 changes: 1 addition & 1 deletion test/datasets/registry.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ include("../imports.jl")

@testset ExtendedTestSet "registerrecipe!" begin
@test_nowarn Datasets.registerrecipe!(
reg, "mnist_var_size_tiny", Datasets.ImageClassificationFolders())
reg, "mnist_var_size_tiny", Datasets.ImageFolders())
end

@testset ExtendedTestSet "finddatasets" begin
Expand Down
14 changes: 14 additions & 0 deletions test/fasterai.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
include("imports.jl")



@testset ExtendedTestSet "FasterAI" begin
@test length(listdatasources()) > 10

@test !isempty(finddatasets(blocks=(Image, Label)))
@test !isempty(finddatasets(blocks=(Image, LabelMulti)))
@test !isempty(finddatasets(blocks=(Image, Mask)))

@test ImageClassificationSingle ∈ findlearningmethods((Image, Label))
@test ImageClassificationMulti ∈ findlearningmethods((Image, LabelMulti))
end
4 changes: 2 additions & 2 deletions test/imports.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
using Colors: RGB, N0f8, Gray
using FastAI
using FastAI: ParamGroups, IndexGrouper, getgroup, DiscriminativeLRs, decay_optim
using FastAI: Image, Keypoints, Mask, testencoding, Label, OneHot, ProjectiveTransforms,
encodedblock, decodedblock, encode, decode, mockblock
import FastAI: Image, Keypoints, Mask, testencoding, Label, OneHot, ProjectiveTransforms,
encodedblock, decodedblock, encode, decode, mockblock, checkblock, Block, Encoding
using FilePathsBase
using FastAI.Datasets
using DLPipelines
Expand Down
8 changes: 8 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,14 @@ include("imports.jl")
##

@testset ExtendedTestSet "FastAI.jl" begin
@testset ExtendedTestSet "datablock.jl" begin
include("datablock.jl")
end

@testset ExtendedTestSet "fasterai.jl" begin
include("fasterai.jl")
end

@testset ExtendedTestSet "encodings/" begin
@testset ExtendedTestSet "projective.jl" begin
include("encodings/projective.jl")
Expand Down