diff --git a/docs/Project.toml b/docs/Project.toml index a727dcb04..44993247a 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -27,24 +27,24 @@ StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd" [compat] BSON = "0.3" -BenchmarkTools = "1.3" +BenchmarkTools = "1.5" CSV = "0.10" DataFrames = "1.6" -DifferentialEquations = "7.9" +DifferentialEquations = "7.14" Distributions = "0.25" -Documenter = "0.27" +Documenter = "1.7" Flux = "0.14" -GLM = "1.8" -GLPK = "1.1" -GR = "0.72" +GLM = "1.9" +GLPK = "1.2" +GR = "0.73" HypothesisTests = "0.11" -Ipopt = "1.4" -JuMP = "1.14" +Ipopt = "1.6" +JuMP = "1.23" MLDatasets = "0.7" -Plots = "1.39" -ProgressMeter = "1.8" +Plots = "1.40" +ProgressMeter = "1.10" Query = "1.0" RDatasets = "0.7" -SpecialFunctions = "2.3" +SpecialFunctions = "2.4" StatsPlots = "0.15" -julia = "1.9" \ No newline at end of file +julia = "1.10" diff --git a/docs/make.jl b/docs/make.jl index 443c7ffd0..927847e82 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -113,7 +113,6 @@ makedocs(; collapselevel=1, assets=[ "assets/favicon.ico", - "assets/ctustyle.css", ], ansicolor=true ), @@ -134,7 +133,7 @@ makedocs(; "11: Neural networks II." => lecture_11, "12: Statistics" => lecture_12, "13: Ordinary differential equations" => lecture_13, - ] + ], ) deploydocs(; diff --git a/docs/src/assets/ctustyle.css b/docs/src/assets/ctustyle.css deleted file mode 100644 index 5e49a5d19..000000000 --- a/docs/src/assets/ctustyle.css +++ /dev/null @@ -1,215 +0,0 @@ -/* CSS style from https: //github.com/joshday/OnlineStats.jl */ -@import url('https://rsms.me/inter/inter.css'); - -html { - font-family: 'Inter', sans-serif; -} - -@supports (font-variation-settings: normal) { - html { - font-family: 'Inter var', sans-serif; - } -} - - -body { - font-family: "Inter", -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", "Helvetica", "Arial", sans-serif !important; -} - -ul.internal .tocitem::before { - content: "" !important; - margin: 0px !important; - /* margin-right: 0.4em; */ -} - -/* Indentation of TOC level 2 */ -ul.internal .tocitem { - margin-left: 16px; -} - -.tocitem { - font-weight: normal !important; -} - -img, -svg { - border-radius: 4px; - margin-left: 6px; - margin-right: 6px; - margin-bottom: 6px; -} - -.content pre { - border: 1px; - border-radius: 4px; -} - -ul.internal { - border: 0px !important; -} - -#documenter>nav>ul>li.is-active { - padding-bottom: 4px; -} - -.admonition { - border-width: 0px !important; -} - -.admonition-header { - font-weight: normal !important; - border-radius: 4px 4px 0px 0px; -} - -.docstring { - border-radius: 4px !important; -} - -.docstring>header { - border-radius: 4px 4px 0px 0px !important; - border-bottom: 0px !important; -} - -#documenter-search-query { - border: 1px; -} - -.docs-menu { - border-top: 1px; - margin-top: 16px !important; - overflow-x: hidden; -} - -#documenter>nav>ul>li>a { - margin-left: 16px !important; -} - -h1, -h2, -h3, -h4, -h5, -h6 { - font-weight: normal !important; -} - -#documenter-search-results { - list-style-type: decimal !important; - padding-bottom: 16px; -} - -.docs-version-selector { - border-top: 0px !important; -} - -/* Custom admonitions */ -/* Exercise*/ -.admonition.is-category-exercise { - background-color: #9beeb8; - border-color: #22c35b; -} - -.admonition.is-category-exercise>.admonition-header { - background-color: #22c35b; - color: #fff; -} - -html.theme--documenter-dark .admonition.is-category-exercise { - background-color: #282f2f; - border-color: #008438; -} - -html.theme--documenter-dark .admonition.is-category-exercise>.admonition-header { - background-color: #008438; - color: #fff; -} - -/* Theorem*/ -.admonition.is-category-theorem { - background-color: #f5b7b1; - border-color: #e74c3c; -} - -.admonition.is-category-theorem>.admonition-header { - background-color: #e74c3c; - color: #fff; -} - -html.theme--documenter-dark .admonition.is-category-theorem { - background-color: #282f2f; - border-color: #b03a2e; -} - -html.theme--documenter-dark .admonition.is-category-theorem>.admonition-header { - background-color: #b03a2e; - color: #fff; -} - -/* Homework class */ -.admonition.is-category-homework { - background-color: #d7bde2; - border-color: #8e44ad; -} - -.admonition.is-category-homework>.admonition-header { - background-color: #8e44ad; - color: #fff; -} - -html.theme--documenter-dark .admonition.is-category-homework { - background-color: #282f2f; - border-color: #6c3483; -} - -html.theme--documenter-dark .admonition.is-category-homework>.admonition-header { - background-color: #6c3483; - color: #fff; -} - -/* Bonus class */ -.admonition.is-category-bonus { - background-color: #99e6f0; - border-color: #1db5c9; -} - -.admonition.is-category-bonus>.admonition-header { - background-color: #1db5c9; - color: #fff; -} - -html.theme--documenter-dark .admonition.is-category-bonus { - background-color: #282f2f; - border-color: #137886; -} - -html.theme--documenter-dark .admonition.is-category-bonus>.admonition-header { - background-color: #137886; - color: #fff; -} - -/* Solution*/ -.admonition.is-category-solution { - background-color: #ffCC99; - border-color: #ff9933; -} - -.admonition.is-category-solution>.admonition-header { - background-color: #ff9933; - color: #fff; - border-radius: 4px; -} - -.admonition.is-category-solution>.admonition-header:before { - content: none; -} - -html.theme--documenter-dark .admonition.is-category-solution { - background-color: #282f2f; - border-color: #FF8000; -} - -html.theme--documenter-dark .admonition.is-category-solution>.admonition-header { - background-color: #FF8000; - color: #fff; - border-radius: 4px; -} \ No newline at end of file diff --git a/docs/src/howto.md b/docs/src/howto.md deleted file mode 100644 index 7c36a54ac..000000000 --- a/docs/src/howto.md +++ /dev/null @@ -1,67 +0,0 @@ -# How to use special html elements - -## Exercise - -```@raw html -
-
Exercise:
-
-``` - -Text... - -```@raw html -
-
-Solution: -
-``` - -Solution... - -```@raw html -
-``` - -## Homework - -```@raw html -
-
Homework:
-
-``` - -Text... - -```@raw html -
-``` - -## Theorem - -```@raw html -
-
Theorem:
-
-``` - -Text... - -```@raw html -
-``` - - -## Bonus - -```@raw html -
-
Bonus:
-
-``` - -Text... - -```@raw html -
-``` \ No newline at end of file diff --git a/docs/src/lecture_01/operators.md b/docs/src/lecture_01/operators.md index c57c882d2..c3ee31d31 100644 --- a/docs/src/lecture_01/operators.md +++ b/docs/src/lecture_01/operators.md @@ -46,74 +46,59 @@ julia> 2(3 + 4) # equivalent to 2*(3 + 4) 14 ``` -```@raw html -
-
Exercise:
-
-``` +!!! warning "Exercise:" + Determine the value and type of `y` given by the following expression -Determine the value and type of `y` given by the following expression + ```math + y = \frac{(x + 2)^2 - 4}{(x - 2)^{p - 2}}, + ``` -```math -y = \frac{(x + 2)^2 - 4}{(x - 2)^{p - 2}}, -``` + where `x = 4` and `p = 5`. -where `x = 4` and `p = 5`. +!!! details "Solution:" + First, we define variables `x` and `p` -```@raw html -
-
-Solution: -
-``` + ```jldoctest ex1 + julia> x = 4 + 4 -First, we define variables `x` and `p` + julia> p = 5 + 5 + ``` -```jldoctest ex1 -julia> x = 4 -4 + then we can use the combination of basic arithmetic operators to compute the value of `y` -julia> p = 5 -5 -``` + ```jldoctest ex1 + julia> y = ((x + 2)^2 - 4)/(x - 2)^(p - 2) + 4.0 + ``` -then we can use the combination of basic arithmetic operators to compute the value of `y` + The type of `y` can be determined using the `typeof` function -```jldoctest ex1 -julia> y = ((x + 2)^2 - 4)/(x - 2)^(p - 2) -4.0 -``` + ```jldoctest ex1 + julia> typeof(y) + Float64 + ``` -The type of `y` can be determined using the `typeof` function + Note that the resulting type of `y` is `Float64` even though the result can be represented as an integer. The reason is that we divide two integers -```jldoctest ex1 -julia> typeof(y) -Float64 -``` + ```jldoctest ex1 + julia> typeof((x + 2)^2 - 4) + Int64 -Note that the resulting type of `y` is `Float64` even though the result can be represented as an integer. The reason is that we divide two integers + julia> typeof((x - 2)^(p - 2)) + Int64 + ``` -```jldoctest ex1 -julia> typeof((x + 2)^2 - 4) -Int64 + Because this operation generally does not result in an integer, dividing two integers always returns a floating-point number. If we want to get an integer, we can use the integer division operator `÷` (can be typed as `\div`) -julia> typeof((x - 2)^(p - 2)) -Int64 -``` - -Because this operation generally does not result in an integer, dividing two integers always returns a floating-point number. If we want to get an integer, we can use the integer division operator `÷` (can be typed as `\div`) - -```jldoctest ex1 -julia> y_int = ((x + 2)^2 - 4)÷(x - 2)^(p - 2) -4 + ```jldoctest ex1 + julia> y_int = ((x + 2)^2 - 4)÷(x - 2)^(p - 2) + 4 -julia> typeof(y_int) -Int64 -``` - -```@raw html -
-``` + julia> typeof(y_int) + Int64 + ``` ## Promotion system @@ -176,51 +161,36 @@ julia> typeof(z) Float32 ``` -```@raw html -
-
Exercise:
-
-``` - -All of these values represent number ``1``. Determine the smallest type which can represent them. +!!! warning "Exercise:" + All of these values represent number ``1``. Determine the smallest type which can represent them. -```jldoctest promotion3; output = false -x = 1 -y = 1f0 -z = true -w = Int32(1) - -# output -1 -``` - -```@raw html -
-
-Solution: -
-``` + ```jldoctest promotion3; output = false + x = 1 + y = 1f0 + z = true + w = Int32(1) -To get the correct promotion type, we can use a combination of the `promote` and `typeof` functions + # output + 1 + ``` -```jldoctest promotion3 -julia> xp, yp, zp, wp = promote(x, y, z, w) -(1.0f0, 1.0f0, 1.0f0, 1.0f0) +!!! details "Solution:" + To get the correct promotion type, we can use a combination of the `promote` and `typeof` functions -julia> typeof(xp) -Float32 -``` + ```jldoctest promotion3 + julia> xp, yp, zp, wp = promote(x, y, z, w) + (1.0f0, 1.0f0, 1.0f0, 1.0f0) -or the `promote_type` and `typeof` functions + julia> typeof(xp) + Float32 + ``` -```jldoctest promotion3 -julia> promote_type(typeof(x), typeof(y), typeof(z), typeof(w)) -Float32 -``` + or the `promote_type` and `typeof` functions -```@raw html -
-``` + ```jldoctest promotion3 + julia> promote_type(typeof(x), typeof(y), typeof(z), typeof(w)) + Float32 + ``` ## Updating operators @@ -243,57 +213,42 @@ julia> x \= 16 # x = x \ 16 = 16 / x 2.0 ``` -```@raw html -
-
Exercise:
-
-``` +!!! warning "Exercise:" + Compute the value of `y` given by the following expression -Compute the value of `y` given by the following expression + ```math + y = \frac{(x + 4)^{\frac{3}{2}}}{(x + 1)^{p - 1}}, + ``` -```math -y = \frac{(x + 4)^{\frac{3}{2}}}{(x + 1)^{p - 1}}, -``` - -where `x = 5` and `p = 3`. Then multiply the result by `8`, add `3`, divide by `3`, and subtract `1`. What are all the intermediate results and the final result? + where `x = 5` and `p = 3`. Then multiply the result by `8`, add `3`, divide by `3`, and subtract `1`. What are all the intermediate results and the final result? -```@raw html -
-
-Solution: -
-``` +!!! details "Solution:" + First, we calculate the value of `y` -First, we calculate the value of `y` + ```jldoctest ex2 + julia> x = 5; -```jldoctest ex2 -julia> x = 5; + julia> p = 3; -julia> p = 3; + julia> y = (x + 4)^(3/2)/(x + 1)^(p - 1) + 0.75 + ``` -julia> y = (x + 4)^(3/2)/(x + 1)^(p - 1) -0.75 -``` + Then we can use the update operators to get all the intermediate results as well as the final result -Then we can use the update operators to get all the intermediate results as well as the final result + ```jldoctest ex2 + julia> y *= 8 + 6.0 -```jldoctest ex2 -julia> y *= 8 -6.0 + julia> y += 3 + 9.0 -julia> y += 3 -9.0 - -julia> y /= 3 -3.0 - -julia> y -= 1 -2.0 -``` + julia> y /= 3 + 3.0 -```@raw html -
-``` + julia> y -= 1 + 2.0 + ``` ## Numeric comparison @@ -466,59 +421,44 @@ julia> round(x; sigdigits = 3) 3140.0 ``` -```@raw html -
-
Exercise:
-
-``` +!!! warning "Exercise:" + Use rounding functions to solve the following tasks: + - Round `1252.1518` to the nearest larger integer and convert the resulting value to `Int64`. + - Round `1252.1518` to the nearest smaller integer and convert the resulting value to `Int16`. + - Round `1252.1518` to `2` digits after the decimal point. + - Round `1252.1518` to `3` significant digits. -Use rounding functions to solve the following tasks: -- Round `1252.1518` to the nearest larger integer and convert the resulting value to `Int64`. -- Round `1252.1518` to the nearest smaller integer and convert the resulting value to `Int16`. -- Round `1252.1518` to `2` digits after the decimal point. -- Round `1252.1518` to `3` significant digits. - -```@raw html -
-
-Solution: -
-``` +!!! details "Solution:" + The `ceil` function rounds numbers to the nearest larger value, and since we want the result to be of type `Int64`, we have to pass this type as a first argument -The `ceil` function rounds numbers to the nearest larger value, and since we want the result to be of type `Int64`, we have to pass this type as a first argument - -```jldoctest rounding_ex -julia> x = 1252.1518 -1252.1518 - -julia> ceil(Int64, x) -1253 -``` + ```jldoctest rounding_ex + julia> x = 1252.1518 + 1252.1518 -Similarly, the floor function rounds numbers to the nearest smaller value + julia> ceil(Int64, x) + 1253 + ``` -```jldoctest rounding_ex -julia> floor(Int16, x) -1252 -``` + Similarly, the floor function rounds numbers to the nearest smaller value -The number of digits after the decimal point can be controlled using the `digits` keyword + ```jldoctest rounding_ex + julia> floor(Int16, x) + 1252 + ``` -```jldoctest rounding_ex -julia> round(x; digits = 2) -1252.15 -``` + The number of digits after the decimal point can be controlled using the `digits` keyword -and the number of significant digits using the `sigdigits` keyword + ```jldoctest rounding_ex + julia> round(x; digits = 2) + 1252.15 + ``` -```jldoctest rounding_ex -julia> round(x; sigdigits = 3) -1250.0 -``` + and the number of significant digits using the `sigdigits` keyword -```@raw html -
-``` + ```jldoctest rounding_ex + julia> round(x; sigdigits = 3) + 1250.0 + ``` ## Numerical conversions @@ -553,45 +493,30 @@ ERROR: InexactError: Int64(1.234) Conversion to other types works in a similar way. -```@raw html -
-
Exercise:
-
-``` - -Use the proper numeric conversion to get the correct result (not approximate) of summing the following two numbers +!!! warning "Exercise:" + Use the proper numeric conversion to get the correct result (not approximate) of summing the following two numbers -```jldoctest conversion_ex; output = false -x = 1//3 -y = 0.5 + ```jldoctest conversion_ex; output = false + x = 1//3 + y = 0.5 -# output -0.5 -``` - -**Hint:** rational numbers can be summed without approximation. + # output + 0.5 + ``` -```@raw html -
-
-Solution: -
-``` + **Hint:** rational numbers can be summed without approximation. -Firstly, we can try just to sum the given numbers +!!! details "Solution:" + Firstly, we can try just to sum the given numbers -```jldoctest conversion_ex -julia> x + y -0.8333333333333333 -``` + ```jldoctest conversion_ex + julia> x + y + 0.8333333333333333 + ``` -The result of this operation is a floating-point number. However, in this specific case, we have a rational number and a floating-point number that can also be represented as a rational number. The exact result can be obtained by converting the variable `y` to a rational number + The result of this operation is a floating-point number. However, in this specific case, we have a rational number and a floating-point number that can also be represented as a rational number. The exact result can be obtained by converting the variable `y` to a rational number -```jldoctest conversion_ex -julia> x + Rational(y) -5//6 -``` - -```@raw html -
-``` + ```jldoctest conversion_ex + julia> x + Rational(y) + 5//6 + ``` \ No newline at end of file diff --git a/docs/src/lecture_01/strings.md b/docs/src/lecture_01/strings.md index 4f145521c..2d36aa4a8 100644 --- a/docs/src/lecture_01/strings.md +++ b/docs/src/lecture_01/strings.md @@ -74,7 +74,11 @@ julia> str2 = "\$\$\$ dollars everywhere \$\$\$" ```jldoctest strings julia> "The $ will be fine." -ERROR: syntax: invalid interpolation syntax: "$ " +ERROR: ParseError: +# Error @ none:1:7 +"The $ will be fine." +# └ ── identifier or parenthesized expression expected after $ in string +[...] ``` No, they won't. If used incorrectly, Julia will throw an error. @@ -114,54 +118,39 @@ julia> print(str) world. ``` -```@raw html -
-
Exercise:
-
-``` - -Create a string with the following text -> Quotation is the repetition or copy of someone else's statement or thoughts. \ -> Quotation marks are punctuation marks used in text to indicate a quotation. \ -> Both of these words are sometimes abbreviated as "quote(s)". -and print it into the REPL. The printed string should look the same as the text above, i.e., each sentence should be on a separate line. Use an indent of length 4 for each sentence. - -```@raw html -
-
-Solution: -
-``` +!!! warning "Exercise:" + Create a string with the following text + > Quotation is the repetition or copy of someone else's statement or thoughts. \ + > Quotation marks are punctuation marks used in text to indicate a quotation. \ + > Both of these words are sometimes abbreviated as "quote(s)". + and print it into the REPL. The printed string should look the same as the text above, i.e., each sentence should be on a separate line. Use an indent of length 4 for each sentence. -There are two basic ways to get the right result. The first is to use a multi-line string and write the message in the correct form. +!!! details "Solution:" + There are two basic ways to get the right result. The first is to use a multi-line string and write the message in the correct form. -```jldoctest -julia> str = """ - Quotation is the repetition or copy of someone else's statement or thoughts. - Quotation marks are punctuation marks used in text to indicate a quotation. - Both of these words are sometimes abbreviated as "quote(s)". - """; - -julia> println(str) - Quotation is the repetition or copy of someone else's statement or thoughts. - Quotation marks are punctuation marks used in text to indicate a quotation. - Both of these words are sometimes abbreviated as "quote(s)". -``` + ```jldoctest + julia> str = """ + Quotation is the repetition or copy of someone else's statement or thoughts. + Quotation marks are punctuation marks used in text to indicate a quotation. + Both of these words are sometimes abbreviated as "quote(s)". + """; -We do not have to add backslashes to escape quotation marks in the text. The second way is to use a regular string and the new line symbol `\n`. In this case, it is necessary to use backslashes to escape quotation marks. Also, we have to add four spaces before each sentence to get a proper indentation. + julia> println(str) + Quotation is the repetition or copy of someone else's statement or thoughts. + Quotation marks are punctuation marks used in text to indicate a quotation. + Both of these words are sometimes abbreviated as "quote(s)". + ``` -```jldoctest -julia> str = " Quotation is the repetition or copy of someone else's statement or thoughts.\n Quotation marks are punctuation marks used in text to indicate a quotation.\n Both of these words are sometimes abbreviated as \"quote(s)\"."; + We do not have to add backslashes to escape quotation marks in the text. The second way is to use a regular string and the new line symbol `\n`. In this case, it is necessary to use backslashes to escape quotation marks. Also, we have to add four spaces before each sentence to get a proper indentation. -julia> println(str) - Quotation is the repetition or copy of someone else's statement or thoughts. - Quotation marks are punctuation marks used in text to indicate a quotation. - Both of these words are sometimes abbreviated as "quote(s)". -``` + ```jldoctest + julia> str = " Quotation is the repetition or copy of someone else's statement or thoughts.\n Quotation marks are punctuation marks used in text to indicate a quotation.\n Both of these words are sometimes abbreviated as \"quote(s)\"."; -```@raw html -
-``` + julia> println(str) + Quotation is the repetition or copy of someone else's statement or thoughts. + Quotation marks are punctuation marks used in text to indicate a quotation. + Both of these words are sometimes abbreviated as "quote(s)". + ``` ## String concatenation and interpolation @@ -258,55 +247,40 @@ julia> "tuple: $(t)" "tuple: (1, 2, 3)" ``` -```@raw html -
-
Exercise:
-
-``` - -Print the following message for a given vector -> " is a vector of length with elements of type " -where `` is the string representation of the given vector, `` is the actual length of the given vector, and `` is the type of its elements. Use the following two vectors. - -```julia -a = [1,2,3] -b = [:a, :b, :c, :d] -``` - -**Hint:** use the `length` and `eltype` functions. +!!! warning "Exercise:" + Print the following message for a given vector + > " is a vector of length with elements of type " + where `` is the string representation of the given vector, `` is the actual length of the given vector, and `` is the type of its elements. Use the following two vectors. -```@raw html -
-
-Solution: -
-``` + ```julia + a = [1,2,3] + b = [:a, :b, :c, :d] + ``` -We will show two ways how to solve this exercise. The first way is to use the `string` function in combination with the `length` function to get the length of the vector, and the `eltype` function to get the type of its elements. + **Hint:** use the `length` and `eltype` functions. -```jldoctest -julia> a = [1,2,3]; +!!! details "Solution:" + We will show two ways how to solve this exercise. The first way is to use the `string` function in combination with the `length` function to get the length of the vector, and the `eltype` function to get the type of its elements. -julia> str = string(a, " is a vector of length ", length(a), " with elements of type ", eltype(a)); + ```jldoctest + julia> a = [1,2,3]; -julia> println(str) -[1, 2, 3] is a vector of length 3 with elements of type Int64 -``` + julia> str = string(a, " is a vector of length ", length(a), " with elements of type ", eltype(a)); -The second way is to use string interpolation. + julia> println(str) + [1, 2, 3] is a vector of length 3 with elements of type Int64 + ``` -```jldoctest -julia> b = [:a, :b, :c, :d]; + The second way is to use string interpolation. -julia> str = "$(b) is a vector of length $(length(b)) with elements of type $(eltype(b))"; + ```jldoctest + julia> b = [:a, :b, :c, :d]; -julia> println(str) -[:a, :b, :c, :d] is a vector of length 4 with elements of type Symbol -``` + julia> str = "$(b) is a vector of length $(length(b)) with elements of type $(eltype(b))"; -```@raw html -
-``` + julia> println(str) + [:a, :b, :c, :d] is a vector of length 4 with elements of type Symbol + ``` ## Useful functions @@ -403,37 +377,22 @@ julia> replace("Sherlock Holmes", "Holmes" => "Homeless") "Sherlock Homeless" ``` -```@raw html -
-
Exercise:
-
-``` - -Use the `split` function to split the following string -> "Julia!" -into a vector of single-character strings. - -**Hint:** we can say that an empty string `""` separates the characters in the string. - -```@raw html -
-
-Solution: -
-``` - -To separate a string into separate single-character strings, we can use the `split` function and an empty string (`""`) as a delimiter. -```jldoctest -julia> split("Julia!", "") -6-element Vector{SubString{String}}: - "J" - "u" - "l" - "i" - "a" - "!" -``` - -```@raw html -
-``` +!!! warning "Exercise:" + Use the `split` function to split the following string + > "Julia!" + into a vector of single-character strings. + + **Hint:** we can say that an empty string `""` separates the characters in the string. + +!!! details "Solution:" + To separate a string into separate single-character strings, we can use the `split` function and an empty string (`""`) as a delimiter. + ```jldoctest + julia> split("Julia!", "") + 6-element Vector{SubString{String}}: + "J" + "u" + "l" + "i" + "a" + "!" + ``` \ No newline at end of file diff --git a/docs/src/lecture_01/variables.md b/docs/src/lecture_01/variables.md index 898bbc39a..a1c340754 100644 --- a/docs/src/lecture_01/variables.md +++ b/docs/src/lecture_01/variables.md @@ -43,54 +43,39 @@ Float64 In this case, the variable `x` is of type `Float64`, which is a type that represents floating-point numbers. -```@raw html -
-
Exercise:
-
-``` - -Create the following three variables: -1. Variable `x` with value `1.234`. -2. Variable `y` with value `1//2`. -3. Variable `z` with value `x + y*im`. -What are the types of these three variables? - -```@raw html -
-
-Solution: -
-``` +!!! warning "Exercise:" + Create the following three variables: + 1. Variable `x` with value `1.234`. + 2. Variable `y` with value `1//2`. + 3. Variable `z` with value `x + y*im`. + What are the types of these three variables? -All three variables can be declared simply by assigning the value to the given variable name +!!! details "Solution:" + All three variables can be declared simply by assigning the value to the given variable name -```jldoctest var_types -julia> x = 1.234 -1.234 + ```jldoctest var_types + julia> x = 1.234 + 1.234 -julia> y = 1//2 -1//2 - -julia> z = x + y*im -1.234 + 0.5im -``` + julia> y = 1//2 + 1//2 -and types can be checked using the `typeof` function + julia> z = x + y*im + 1.234 + 0.5im + ``` -```jldoctest var_types -julia> typeof(x) -Float64 + and types can be checked using the `typeof` function -julia> typeof(y) -Rational{Int64} + ```jldoctest var_types + julia> typeof(x) + Float64 -julia> typeof(z) -ComplexF64 (alias for Complex{Float64}) -``` + julia> typeof(y) + Rational{Int64} -```@raw html -
-``` + julia> typeof(z) + ComplexF64 (alias for Complex{Float64}) + ``` ## Primitive numeric types @@ -150,7 +135,7 @@ julia> ℯ ℯ = 2.7182818284590... julia> ℯ = 2 -ERROR: cannot assign a value to imported variable MathConstants.ℯ from module Main +ERROR: cannot assign a value to imported variable Base.ℯ from module Main [...] ``` @@ -166,7 +151,10 @@ The only explicitly disallowed names for variables are the names of built-in res ```jldoctest julia> struct = 3 -ERROR: syntax: unexpected "=" +ERROR: ParseError: +# Error @ none:1:8 +struct = 3 +# ╙ ── unexpected `=` [...] ``` diff --git a/docs/src/lecture_02/arrays.md b/docs/src/lecture_02/arrays.md index 4a176591c..3f8b838b7 100644 --- a/docs/src/lecture_02/arrays.md +++ b/docs/src/lecture_02/arrays.md @@ -166,6 +166,7 @@ julia> append!(v, 3.0) julia> append!(v, 3.1415) ERROR: InexactError: Int64(3.1415) +[...] ``` In the first case, it is possible to append a floating-point number since it can be represented as an integer. We can use the `isinteger` function to test whether the number is numerically equal to some integer. @@ -229,76 +230,61 @@ julia> v 11 ``` -```@raw html -
-
Exercise:
-
-``` - -Create a vector of positive integers that contains all odd numbers smaller than `10`. Then change the first element to `4` and the last two elements to `1`. +!!! warning "Exercise:" + Create a vector of positive integers that contains all odd numbers smaller than `10`. Then change the first element to `4` and the last two elements to `1`. -```@raw html -
-
-Solution: -
-``` +!!! details "Solution:" + Such a vector can be either created manually by -Such a vector can be either created manually by - -```jldoctest vectors_ex -julia> v = [1,3,5,7,9] -5-element Vector{Int64}: - 1 - 3 - 5 - 7 - 9 -``` - -or we can use the `range` function to create a range with given properties and then use the `collect` function to create a vector. Another possibility is to use the `Vector` type to convert the range into a vector. - -```jldoctest vectors_ex -julia> collect(1:2:9) -5-element Vector{Int64}: - 1 - 3 - 5 - 7 - 9 + ```jldoctest vectors_ex + julia> v = [1,3,5,7,9] + 5-element Vector{Int64}: + 1 + 3 + 5 + 7 + 9 + ``` -julia> Vector(1:2:9) -5-element Vector{Int64}: - 1 - 3 - 5 - 7 - 9 -``` + or we can use the `range` function to create a range with given properties and then use the `collect` function to create a vector. Another possibility is to use the `Vector` type to convert the range into a vector. + + ```jldoctest vectors_ex + julia> collect(1:2:9) + 5-element Vector{Int64}: + 1 + 3 + 5 + 7 + 9 + + julia> Vector(1:2:9) + 5-element Vector{Int64}: + 1 + 3 + 5 + 7 + 9 + ``` -The values stored in the vector can be changed using the `.=` sign and proper indices. Do not forget to add the dot before the `=` sign to perform the element-wise operation. + The values stored in the vector can be changed using the `.=` sign and proper indices. Do not forget to add the dot before the `=` sign to perform the element-wise operation. -```jldoctest vectors_ex -julia> v[1] = 4 -4 + ```jldoctest vectors_ex + julia> v[1] = 4 + 4 -julia> v[end-1:end] .= 1 -2-element view(::Vector{Int64}, 4:5) with eltype Int64: - 1 - 1 + julia> v[end-1:end] .= 1 + 2-element view(::Vector{Int64}, 4:5) with eltype Int64: + 1 + 1 -julia> v -5-element Vector{Int64}: - 4 - 3 - 5 - 1 - 1 -``` - -```@raw html -
-``` + julia> v + 5-element Vector{Int64}: + 4 + 3 + 5 + 1 + 1 + ``` ## Matrices @@ -438,77 +424,62 @@ julia> hcat(M, v) 5 6 7 8 12 julia> vcat(M, v) -ERROR: ArgumentError: number of columns of each array must match (got (4, 1)) +ERROR: DimensionMismatch: number of columns of each array must match (got (4, 1)) [...] ``` -```@raw html -
-
Exercise:
-
-``` - -Create two vectors: vector of all odd positive integers smaller than `10` and vector of all even positive integers smaller than or equal to `10`. Then concatenate these two vectors horizontally and fill the third row with `4`. - -```@raw html -
-
-Solution: -
-``` - -First, we have to create the two vectors. We can do it manually, or we can use ranges and the `collect` function as in the exercise in the previous section. - -```jldoctest matrices_ex -julia> v1 = collect(1:2:9) -5-element Vector{Int64}: - 1 - 3 - 5 - 7 - 9 - -julia> v2 = collect(2:2:10) -5-element Vector{Int64}: - 2 - 4 - 6 - 8 - 10 -``` - -Then we use the `hcat` function to concatenate these two vectors horizontally. - -```jldoctest matrices_ex -julia> M = hcat(v1, v2) -5×2 Matrix{Int64}: - 1 2 - 3 4 - 5 6 - 7 8 - 9 10 -``` +!!! warning "Exercise:" + Create two vectors: vector of all odd positive integers smaller than `10` and vector of all even positive integers smaller than or equal to `10`. Then concatenate these two vectors horizontally and fill the third row with `4`. + +!!! details "Solution:" + First, we have to create the two vectors. We can do it manually, or we can use ranges and the `collect` function as in the exercise in the previous section. + + ```jldoctest matrices_ex + julia> v1 = collect(1:2:9) + 5-element Vector{Int64}: + 1 + 3 + 5 + 7 + 9 + + julia> v2 = collect(2:2:10) + 5-element Vector{Int64}: + 2 + 4 + 6 + 8 + 10 + ``` -Finally, we select all elements in the third row and assign the new value to them. + Then we use the `hcat` function to concatenate these two vectors horizontally. -```jldoctest matrices_ex -julia> M[3,:] .= 4 -2-element view(::Matrix{Int64}, 3, :) with eltype Int64: - 4 - 4 - -julia> M -5×2 Matrix{Int64}: - 1 2 - 3 4 - 4 4 - 7 8 - 9 10 -``` + ```jldoctest matrices_ex + julia> M = hcat(v1, v2) + 5×2 Matrix{Int64}: + 1 2 + 3 4 + 5 6 + 7 8 + 9 10 + ``` -```@raw html -
-``` + Finally, we select all elements in the third row and assign the new value to them. + + ```jldoctest matrices_ex + julia> M[3,:] .= 4 + 2-element view(::Matrix{Int64}, 3, :) with eltype Int64: + 4 + 4 + + julia> M + 5×2 Matrix{Int64}: + 1 2 + 3 4 + 4 4 + 7 8 + 9 10 + ``` ## `N`-dimensional arrays @@ -640,68 +611,53 @@ julia> fill(1.234, 2, 3, 1) 1.234 1.234 1.234 ``` -```@raw html -
-
Exercise:
-
-``` - -Create three matrices with the following properties: -- Matrix `A` is of size `2x3`, and all its elements equal 0. -- Matrix `B` is of size `2x3x1`, and all its elements equal 1. -- Matrix `C` is of size `2x3`, and all its elements equal 2. -Concatenate these three matrices along the third dimension. - -**Hint:** use the `cat` function and the keyword `dims`. +!!! warning "Exercise:" + Create three matrices with the following properties: + - Matrix `A` is of size `2x3`, and all its elements equal 0. + - Matrix `B` is of size `2x3x1`, and all its elements equal 1. + - Matrix `C` is of size `2x3`, and all its elements equal 2. + Concatenate these three matrices along the third dimension. -```@raw html -
-
-Solution: -
-``` + **Hint:** use the `cat` function and the keyword `dims`. -Matrix `A` can be created using the `zeros` function, and similarly, matrix `B` using the `ones` function. To create a matrix `C`, we can use the `fill` function. +!!! details "Solution:" + Matrix `A` can be created using the `zeros` function, and similarly, matrix `B` using the `ones` function. To create a matrix `C`, we can use the `fill` function. -```jldoctest arrays_ex -julia> A = zeros(2, 3) -2×3 Matrix{Float64}: - 0.0 0.0 0.0 - 0.0 0.0 0.0 + ```jldoctest arrays_ex + julia> A = zeros(2, 3) + 2×3 Matrix{Float64}: + 0.0 0.0 0.0 + 0.0 0.0 0.0 -julia> B = ones(2, 3, 1) -2×3×1 Array{Float64, 3}: -[:, :, 1] = - 1.0 1.0 1.0 - 1.0 1.0 1.0 - -julia> C = fill(2, 2, 3) -2×3 Matrix{Int64}: - 2 2 2 - 2 2 2 -``` + julia> B = ones(2, 3, 1) + 2×3×1 Array{Float64, 3}: + [:, :, 1] = + 1.0 1.0 1.0 + 1.0 1.0 1.0 -Now we can use the `cat` function with `dims = 3` to concatenate the matrices along the third dimension. + julia> C = fill(2, 2, 3) + 2×3 Matrix{Int64}: + 2 2 2 + 2 2 2 + ``` -```jldoctest arrays_ex -julia> cat(A, B, C; dims = 3) -2×3×3 Array{Float64, 3}: -[:, :, 1] = - 0.0 0.0 0.0 - 0.0 0.0 0.0 + Now we can use the `cat` function with `dims = 3` to concatenate the matrices along the third dimension. -[:, :, 2] = - 1.0 1.0 1.0 - 1.0 1.0 1.0 + ```jldoctest arrays_ex + julia> cat(A, B, C; dims = 3) + 2×3×3 Array{Float64, 3}: + [:, :, 1] = + 0.0 0.0 0.0 + 0.0 0.0 0.0 -[:, :, 3] = - 2.0 2.0 2.0 - 2.0 2.0 2.0 -``` + [:, :, 2] = + 1.0 1.0 1.0 + 1.0 1.0 1.0 -```@raw html -
-``` + [:, :, 3] = + 2.0 2.0 2.0 + 2.0 2.0 2.0 + ``` ## Broadcasting @@ -802,75 +758,60 @@ julia> a .* b 18 ``` -```@raw html -
-
Exercise:
-
-``` - -Construct a matrix whose elements are given by the following formula - -```math -A_{i, j} = \frac{1}{2}\exp\{(B_{i, j} + 1)^2\}, \quad i \in \{1, 2\}, \; j \in \{1, 2, 3\} -``` - -where the matrix `B` is defined by +!!! warning "Exercise:" + Construct a matrix whose elements are given by the following formula -```jldoctest broadcasting_ex; output=false -B = [ - -1 0 2; - 2 -3 1; -] + ```math + A_{i, j} = \frac{1}{2}\exp\{(B_{i, j} + 1)^2\}, \quad i \in \{1, 2\}, \; j \in \{1, 2, 3\} + ``` -# output -2×3 Matrix{Int64}: - -1 0 2 - 2 -3 1 -``` + where the matrix `B` is defined by -```@raw html -
-
-Solution: -
-``` + ```jldoctest broadcasting_ex; output=false + B = [ + -1 0 2; + 2 -3 1; + ] -Each element of the matrix `A` depends on only one element of the matrix `B`. In other words, matrix `A` can be created in an element-wise manner from matrix `B`, i.e. we can use broadcasting. + # output + 2×3 Matrix{Int64}: + -1 0 2 + 2 -3 1 + ``` -```jldoctest broadcasting_ex -julia> A = exp.((B .+ 1) .^ 2) ./ 2 -2×3 Matrix{Float64}: - 0.5 1.35914 4051.54 - 4051.54 27.2991 27.2991 -``` +!!! details "Solution:" + Each element of the matrix `A` depends on only one element of the matrix `B`. In other words, matrix `A` can be created in an element-wise manner from matrix `B`, i.e. we can use broadcasting. -We use a dot before each operation since we want to perform all operations element-wise. In this case, we can use the `@.` macro, which automatically adds a dot before each operator and each function. + ```jldoctest broadcasting_ex + julia> A = exp.((B .+ 1) .^ 2) ./ 2 + 2×3 Matrix{Float64}: + 0.5 1.35914 4051.54 + 4051.54 27.2991 27.2991 + ``` -```jldoctest broadcasting_ex -julia> A = @. exp((B + 1) ^ 2) / 2 -2×3 Matrix{Float64}: - 0.5 1.35914 4051.54 - 4051.54 27.2991 27.2991 -``` + We use a dot before each operation since we want to perform all operations element-wise. In this case, we can use the `@.` macro, which automatically adds a dot before each operator and each function. -Just for the comparison, the same matrix can be created as follows using [`for` loop](@ref for-and-while-loops). + ```jldoctest broadcasting_ex + julia> A = @. exp((B + 1) ^ 2) / 2 + 2×3 Matrix{Float64}: + 0.5 1.35914 4051.54 + 4051.54 27.2991 27.2991 + ``` -```jldoctest broadcasting_ex -julia> A = zeros(2, 3); + Just for the comparison, the same matrix can be created as follows using [`for` loop](@ref for-and-while-loops). -julia> for i in 1:length(A) - A[i] = exp((B[i] + 1)^2)/2 - end + ```jldoctest broadcasting_ex + julia> A = zeros(2, 3); -julia> A -2×3 Matrix{Float64}: - 0.5 1.35914 4051.54 - 4051.54 27.2991 27.2991 -``` + julia> for i in 1:length(A) + A[i] = exp((B[i] + 1)^2)/2 + end -```@raw html -
-``` + julia> A + 2×3 Matrix{Float64}: + 0.5 1.35914 4051.54 + 4051.54 27.2991 27.2991 + ``` ## Views diff --git a/docs/src/lecture_02/dictionaries.md b/docs/src/lecture_02/dictionaries.md index 4167044c3..b93a7e741 100644 --- a/docs/src/lecture_02/dictionaries.md +++ b/docs/src/lecture_02/dictionaries.md @@ -33,6 +33,7 @@ If the key does not exist in the dictionary, an error will occur if we try to ac ```jldoctest dicts julia> d[:c] ERROR: KeyError: key :c not found +[...] julia> haskey(d, :c) false diff --git a/docs/src/lecture_02/tuples.md b/docs/src/lecture_02/tuples.md index 5fe83eb7f..7cdf0e641 100644 --- a/docs/src/lecture_02/tuples.md +++ b/docs/src/lecture_02/tuples.md @@ -48,38 +48,23 @@ The values stored in the tuple are: 1, 2.0 and 3 Arrays can be unpacked similarly. However, tuples are usually used for storing a small number of values, while arrays are typically large. Recall that while tuples are immutable, arrays are mutable. -```@raw html -
-
Exercise:
-
-``` - -Create a tuple that contains the first four letters of the alphabet (these letters should be of type `String`). Then unpack this tuple into four variables `a`, `b`, `c` and `d`. - -```@raw html -
-
-Solution: -
-``` +!!! warning "Exercise:" + Create a tuple that contains the first four letters of the alphabet (these letters should be of type `String`). Then unpack this tuple into four variables `a`, `b`, `c` and `d`. -Such a tuple can be created easily using the standard syntax: +!!! details "Solution:" + Such a tuple can be created easily using the standard syntax: -```jldoctest tuples_ex -julia> t = ("a", "b", "c", "d") -("a", "b", "c", "d") -``` - -We can use the four variables and the `=` sign to unpack the tuple. + ```jldoctest tuples_ex + julia> t = ("a", "b", "c", "d") + ("a", "b", "c", "d") + ``` -```jldoctest tuples_ex -julia> a, b, c, d = t -("a", "b", "c", "d") -``` + We can use the four variables and the `=` sign to unpack the tuple. -```@raw html -
-``` + ```jldoctest tuples_ex + julia> a, b, c, d = t + ("a", "b", "c", "d") + ``` ## Named tuples @@ -113,7 +98,7 @@ julia> t[end] # the last element "3" julia> t[1:2] # error -ERROR: MethodError: no method matching getindex(::NamedTuple{(:a, :b, :c), Tuple{Int64, Float64, String}}, ::UnitRange{Int64}) +ERROR: MethodError: no method matching getindex(::@NamedTuple{a::Int64, b::Float64, c::String}, ::UnitRange{Int64}) [...] ``` diff --git a/docs/src/lecture_03/conditions.md b/docs/src/lecture_03/conditions.md index 61303b58d..ae79db906 100644 --- a/docs/src/lecture_03/conditions.md +++ b/docs/src/lecture_03/conditions.md @@ -70,6 +70,7 @@ julia> if 1 println("Hello") end ERROR: TypeError: non-boolean (Int64) used in boolean context +[...] ``` The `if` blocks do not introduce a [local scope](https://docs.julialang.org/en/v1/manual/variables-and-scoping/), i.e., it is possible to introduce a new variable inside the `if` block and use this variable outside the block. @@ -147,70 +148,57 @@ julia> compare(2.3, 2.3) 2.3 ``` -```@raw html -
-
Exercise:
-
-``` - -Write the `fact(n)` function that computes the factorial of `n`. Use the following function declaration: - -```julia -function fact(n) - # some code -end -``` - -Make sure that the input argument is a non-negative integer. For negative input arguments and for arguments that can not be represented as an integer, the function should throw an error. - -**Hint:** use recursion, the `isinteger` function and the `error` function. The or operator is written by `|`. - -```@raw html -
-
-Solution: -
-``` +!!! warning "Exercise:" + Write the `fact(n)` function that computes the factorial of `n`. Use the following function declaration: -We split the solution into three cases: -1. If `n` is smaller than zero or not an integer, we throw an error. -2. If `n` is equal to zero, the function returns `1`. -3. If `n` is a positive integer, we use recursion. - -```jldoctest conditions_ex; output = false -function fact(n) - return if n < 0 | !isinteger(n) - error("argument must be non-negative integer") - elseif n == 0 - 1 - else - n * fact(n - 1) + ```julia + function fact(n) + # some code end -end + ``` -# output -fact (generic function with 1 method) -``` + Make sure that the input argument is a non-negative integer. For negative input arguments and for arguments that can not be represented as an integer, the function should throw an error. + + **Hint:** use recursion, the `isinteger` function and the `error` function. The or operator is written by `|`. + +!!! details "Solution:" + We split the solution into three cases: + 1. If `n` is smaller than zero or not an integer, we throw an error. + 2. If `n` is equal to zero, the function returns `1`. + 3. If `n` is a positive integer, we use recursion. + + ```jldoctest conditions_ex; output = false + function fact(n) + return if n < 0 | !isinteger(n) + error("argument must be non-negative integer") + elseif n == 0 + 1 + else + n * fact(n - 1) + end + end -Since the `if` block returns a value from the latest evaluated expression, it is possible to use it after the `return` keyword to define the function output. However, it is also possible to omit the `return` keyword since functions return the last evaluated expression if the `return` keyword is not used. + # output + fact (generic function with 1 method) + ``` -```jldoctest conditions_ex -julia> fact(4) -24 + Since the `if` block returns a value from the latest evaluated expression, it is possible to use it after the `return` keyword to define the function output. However, it is also possible to omit the `return` keyword since functions return the last evaluated expression if the `return` keyword is not used. -julia> fact(0) -1 + ```jldoctest conditions_ex + julia> fact(4) + 24 -julia> fact(-5) -ERROR: argument must be non-negative integer + julia> fact(0) + 1 -julia> fact(1.4) -ERROR: argument must be non-negative integer -``` + julia> fact(-5) + ERROR: argument must be non-negative integer + [...] -```@raw html -
-``` + julia> fact(1.4) + ERROR: argument must be non-negative integer + [...] + ``` ## Ternary operator @@ -270,29 +258,19 @@ julia> f(1) || println(2) # both expressions are evaluated 2 ``` -```@raw html -
-
Short-circuit evaluation vs. bitwise boolean operators:
-
-``` +!!! info "Short-circuit evaluation vs. bitwise boolean operators:" + Boolean operations without short-circuit evaluation can be done with the bitwise boolean operators `&` and `|` introduced in [previous lecture](@ref Numeric-comparison). These are normal functions, which happen to support infix operator syntax, but always evaluate their arguments. -Boolean operations without short-circuit evaluation can be done with the bitwise boolean operators `&` and `|` introduced in [previous lecture](@ref Numeric-comparison). These are normal functions, which happen to support infix operator syntax, but always evaluate their arguments. - -```jldoctest shortcirc -julia> f(1) & t(2) -1 -2 -false - -julia> f(1) && t(2) -1 -false -``` - -```@raw html -
-``` + ```jldoctest shortcirc + julia> f(1) & t(2) + 1 + 2 + false + julia> f(1) && t(2) + 1 + false + ``` When multiple `&&` and `||` are chained together, `&&` has a higher precedence than `||`. For example, `a || b && c && d || e` is equivalent to `a || (b && c && d) || e`. @@ -334,59 +312,46 @@ julia> t(1) || f(2) && println(3) # the first expression is evaluated true ``` -```@raw html -
-
Exercise:
-
-``` - -Rewrite the factorial function from the exercises above. Use the short-circuit evaluation to check if the given number is a non-negative integer and the ternary operator for recursion. +!!! warning "Exercise:" + Rewrite the factorial function from the exercises above. Use the short-circuit evaluation to check if the given number is a non-negative integer and the ternary operator for recursion. -```@raw html -
-
-Solution: -
-``` +!!! details "Solution:" + Since we want to check if the input number is a non-negative integer, we need to check two conditions. It can be done separately by the short-circuit evaluation. -Since we want to check if the input number is a non-negative integer, we need to check two conditions. It can be done separately by the short-circuit evaluation. - -```julia -function fact(n) - isinteger(n) || error("argument must be non-negative integer") - n >= 0 || error("argument must be non-negative integer") - return n == 0 ? 1 : n * fact(n - 1) -end -``` - -This can be further simplified by combining the `&&` and `||` operators. + ```julia + function fact(n) + isinteger(n) || error("argument must be non-negative integer") + n >= 0 || error("argument must be non-negative integer") + return n == 0 ? 1 : n * fact(n - 1) + end + ``` -```jldoctest shortcirc_ex; output = false -function fact(n) - isinteger(n) && n >= 0 || error("argument must be non-negative integer") - return n == 0 ? 1 : n * fact(n - 1) -end + This can be further simplified by combining the `&&` and `||` operators. -# output -fact (generic function with 1 method) -``` + ```jldoctest shortcirc_ex; output = false + function fact(n) + isinteger(n) && n >= 0 || error("argument must be non-negative integer") + return n == 0 ? 1 : n * fact(n - 1) + end -Since `&&` has higher precedence than `||`, the error function is evaluated only if `isinteger(n) && n >= 0` is violated. We can then check that this function works the same as the `fact` function from above. + # output + fact (generic function with 1 method) + ``` -```jldoctest shortcirc_ex -julia> fact(4) -24 + Since `&&` has higher precedence than `||`, the error function is evaluated only if `isinteger(n) && n >= 0` is violated. We can then check that this function works the same as the `fact` function from above. -julia> fact(0) -1 + ```jldoctest shortcirc_ex + julia> fact(4) + 24 -julia> fact(-5) -ERROR: argument must be non-negative integer + julia> fact(0) + 1 -julia> fact(1.4) -ERROR: argument must be non-negative integer -``` + julia> fact(-5) + ERROR: argument must be non-negative integer + [...] -```@raw html -
-``` + julia> fact(1.4) + ERROR: argument must be non-negative integer + [...] + ``` \ No newline at end of file diff --git a/docs/src/lecture_03/exercises.md b/docs/src/lecture_03/exercises.md index 626fd457e..981cbce93 100644 --- a/docs/src/lecture_03/exercises.md +++ b/docs/src/lecture_03/exercises.md @@ -22,196 +22,160 @@ savefig("sin.svg") # hide There will be a whole [section](@ref Plots.jl) dedicated to the Plots package. However, we need some basic functionality to visualize the outputs of the following exercises. -```@raw html -
-
Exercise 1:
-
-``` - -Every programmer should be able to rewrite pseudocode to actual code. The goal of this exercise is to rewrite the following pseudocode: +!!! warning "Exercise 1:" + Every programmer should be able to rewrite pseudocode to actual code. The goal of this exercise is to rewrite the following pseudocode: -![](juliasetalg.png) + ![](juliasetalg.png) -This pseudocode describes how to compute the [Julia set](https://en.wikipedia.org/wiki/Julia_set) for the following function + This pseudocode describes how to compute the [Julia set](https://en.wikipedia.org/wiki/Julia_set) for the following function -```math -f_c(z) = z^2 + c, -``` + ```math + f_c(z) = z^2 + c, + ``` -where ``c \in \mathbb{C}`` is a complex parameter. To test the resulting code, try the following settings of input parameters -- ``x`` is a vector of 1500 evenly spaced numbers from `-1.5` to `1.5`. -- ``y`` is a vector of 1000 evenly spaced numbers from `-1` to `1`. -- ``c = - 0.4 + 0.61 \cdot i`` -- ``R = 2`` -- ``N = 1000`` + where ``c \in \mathbb{C}`` is a complex parameter. To test the resulting code, try the following settings of input parameters + - ``x`` is a vector of 1500 evenly spaced numbers from `-1.5` to `1.5`. + - ``y`` is a vector of 1000 evenly spaced numbers from `-1` to `1`. + - ``c = - 0.4 + 0.61 \cdot i`` + - ``R = 2`` + - ``N = 1000`` -Use this code given below to plot the heatmap of the matrix ``A``. + Use this code given below to plot the heatmap of the matrix ``A``. -```julia -using Plots -heatmap(A; - c=:viridis, - clims=(0, 0.15), - cbar=:none, - axis=:none, - ticks=:none -) -``` - -```@raw html -
-
-Solution: -
-``` + ```julia + using Plots + heatmap(A; + c=:viridis, + clims=(0, 0.15), + cbar=:none, + axis=:none, + ticks=:none + ) + ``` -Firstly, we have to define all input parameters. +!!! details "Solution:" + Firstly, we have to define all input parameters. -```julia -c = - 0.4 + 0.61im -R = 2 -N = 1000 -L = 1500 -K = 1000 -``` + ```julia + c = - 0.4 + 0.61im + R = 2 + N = 1000 + L = 1500 + K = 1000 + ``` -The second step is to define the vectors `x` and `y`. Since we know that these vectors contain evenly spaced numbers, and we also know the starting point, the stopping point, and the length of the vectors, we can use the `range` function. + The second step is to define the vectors `x` and `y`. Since we know that these vectors contain evenly spaced numbers, and we also know the starting point, the stopping point, and the length of the vectors, we can use the `range` function. -```julia -x = range(-1.5, 1.5; length = L) -y = range(-1.0, 1.0; length = K) -``` + ```julia + x = range(-1.5, 1.5; length = L) + y = range(-1.0, 1.0; length = K) + ``` -The next step is to define the `A` matrix of zeros by the `zeros` function. + The next step is to define the `A` matrix of zeros by the `zeros` function. -```julia -A = zeros(K, L) -``` + ```julia + A = zeros(K, L) + ``` -Now, we rewrite the for loops from the pseudocode. It is possible to rewrite the pseudocode in an almost identical way. However, in many cases, the code can be simplified. For example, we can use the shorter syntax for writing nested `for` loops. + Now, we rewrite the for loops from the pseudocode. It is possible to rewrite the pseudocode in an almost identical way. However, in many cases, the code can be simplified. For example, we can use the shorter syntax for writing nested `for` loops. -```julia -for k in 1:K, l in 1:L - z = x[l] + y[k]*im - for n in 0:N - if abs(z) > R^2 - R - A[k, l] = n/N - break + ```julia + for k in 1:K, l in 1:L + z = x[l] + y[k]*im + for n in 0:N + if abs(z) > R^2 - R + A[k, l] = n/N + break + end + z = z^2 + c end - z = z^2 + c end -end -``` + ``` -Finally, we visualize the heatmap of the matrix `A`. + Finally, we visualize the heatmap of the matrix `A`. -```julia -using Plots -heatmap(A; - c = :viridis, - clims = (0, 0.15), - cbar = :none, - axis = :none, - ticks = :none, -) -``` - -```@raw html -
-``` + ```julia + using Plots + heatmap(A; + c = :viridis, + clims = (0, 0.15), + cbar = :none, + axis = :none, + ticks = :none, + ) + ``` ![](juliaset.svg) -```@raw html -
-
Exercise 2:
-
-``` +!!! warning "Exercise 2:" + In the previous exercise, we rewrote pseudocode to an actual Julia code. This exercise will improve the central part of the code: the inner loop. Write a function which replaces the inner loop in the code from the exercise above. Use the following function definition -In the previous exercise, we rewrote pseudocode to an actual Julia code. This exercise will improve the central part of the code: the inner loop. Write a function which replaces the inner loop in the code from the exercise above. Use the following function definition + ```julia + function juliaset(z, c, R, N) + ??? + return ??? + end + ``` -```julia -function juliaset(z, c, R, N) - ??? - return ??? -end -``` + where ``z, c \in \mathbb{C}``, ``R \in \mathbb{R}`` and ``N \in \mathbb{N}``. Use the `while` loop to replace the `for` loop in the original pseudocode. Visualize the resulting matrix by the same code as in the previous exercise. -where ``z, c \in \mathbb{C}``, ``R \in \mathbb{R}`` and ``N \in \mathbb{N}``. Use the `while` loop to replace the `for` loop in the original pseudocode. Visualize the resulting matrix by the same code as in the previous exercise. + **Hint:** recall that the function should return `0` if `n > N` and `n/N` otherwise. -**Hint:** recall that the function should return `0` if `n > N` and `n/N` otherwise. -```@raw html -
-
-Solution: -
-``` +!!! details "Solution:" + As suggested in the exercise description, we will use the `while` loop. Using the `while` loop, we have to define a stopping condition. In this case, we have two conditions: + 1. maximal number of iterations is `N + 1`, + 2. the absolute value of `z` needs to be smaller or equal to `R^2 - R`. + These two conditions can be merged into `n <= N && abs(z) <= R^2 - R`. Inside the `while` loop, we only have to update `n` and `z`. -As suggested in the exercise description, we will use the `while` loop. Using the `while` loop, we have to define a stopping condition. In this case, we have two conditions: -1. maximal number of iterations is `N + 1`, -2. the absolute value of `z` needs to be smaller or equal to `R^2 - R`. -These two conditions can be merged into `n <= N && abs(z) <= R^2 - R`. Inside the `while` loop, we only have to update `n` and `z`. - -```julia -function juliaset(z, c, R, N) - n = 0 - while n <= N && abs(z) <= R^2 - R - n += 1 - z = z^2 + c + ```julia + function juliaset(z, c, R, N) + n = 0 + while n <= N && abs(z) <= R^2 - R + n += 1 + z = z^2 + c + end + return n > N ? 0 : n/N end - return n > N ? 0 : n/N -end -``` - -We use the ternary operator to decide which value is returned. Now we need to define all input parameters as in the previous exercise. + ``` -```julia -c = - 0.4 + 0.61im -R = 2 -N = 1000 -x = range(-1.5, 1.5; length = 1500) -y = range(-1.0, 1.0; length = 1000) -``` + We use the ternary operator to decide which value is returned. Now we need to define all input parameters as in the previous exercise. -We can use a nested `for` loops to create `A`. However, a simpler way is to use the list comprehension or broadcasting to vectorize the `juliaset` function. + ```julia + c = - 0.4 + 0.61im + R = 2 + N = 1000 + x = range(-1.5, 1.5; length = 1500) + y = range(-1.0, 1.0; length = 1000) + ``` -```julia -A1 = [juliaset(xl + yk*im, c, R, N) for yk in y, xl in x] -A2 = juliaset.(x' .+ y .* im, c, R, N) -``` + We can use a nested `for` loops to create `A`. However, a simpler way is to use the list comprehension or broadcasting to vectorize the `juliaset` function. -Both `A1` and `A2` are the same. In the second case, we have to pay attention to use the correct form of the input. We use the transposition of `x`. Finally, we can call the same code to create the same plot. + ```julia + A1 = [juliaset(xl + yk*im, c, R, N) for yk in y, xl in x] + A2 = juliaset.(x' .+ y .* im, c, R, N) + ``` -```julia -using Plots -heatmap(A1; - c = :viridis, - clims = (0, 0.15), - cbar = :none, - axis = :none, - ticks = :none, - size = (800, 600), -) -``` + Both `A1` and `A2` are the same. In the second case, we have to pay attention to use the correct form of the input. We use the transposition of `x`. Finally, we can call the same code to create the same plot. -```@raw html -
-``` + ```julia + using Plots + heatmap(A1; + c = :viridis, + clims = (0, 0.15), + cbar = :none, + axis = :none, + ticks = :none, + size = (800, 600), + ) + ``` ![](juliaset_ex2.svg) -```@raw html -
-
Exercise 3:
-
-``` -Try different values of variable `c` to create different plots. For inspiration, check the Wikipedia page about [Julia set](https://en.wikipedia.org/wiki/Julia_set). -```@raw html -
-``` +!!! warning "Exercise 3:" + Try different values of variable `c` to create different plots. For inspiration, check the Wikipedia page about [Julia set](https://en.wikipedia.org/wiki/Julia_set). - ``c = 0.285 + 0.01 \cdot i`` ![](juliaset_ex3_1.svg) @@ -227,7 +191,7 @@ Try different values of variable `c` to create different plots. For inspiration, ## Animation -!!! warning "Warning:" +!!! danger "Warning:" It takes a lot of time to create the animation below, especially when using the default [GR](https://github.com/jheinen/GR.jl) backend for the Plots package. The plotting time can be reduced by using a different backend such as the [PyPlot](https://github.com/JuliaPy/PyPlot.jl) backend. ```julia diff --git a/docs/src/lecture_03/loops.md b/docs/src/lecture_03/loops.md index 91066fbda..745d0fa59 100644 --- a/docs/src/lecture_03/loops.md +++ b/docs/src/lecture_03/loops.md @@ -46,30 +46,21 @@ i = 4 i = 5 ``` -```@raw html -
-
An alternative notation for for loops:
-
-``` - -There are two alternative notations for the `for` loop. It is possible to use the `=` or `∈` symbol instead of the `in` keyword. +!!! info "An alternative notation for for loops:" + There are two alternative notations for the `for` loop. It is possible to use the `=` or `∈` symbol instead of the `in` keyword. -```jldoctest -julia> for i = 1:5 - @show i - end -i = 1 -i = 2 -i = 3 -i = 4 -i = 5 -``` - -However, it is better to use the `in` keyword to improve code readability. Regardless of which notation is used, it is essential to be consistent and use the same notation in all `for` loops. + ```jldoctest + julia> for i = 1:5 + @show i + end + i = 1 + i = 2 + i = 3 + i = 4 + i = 5 + ``` -```@raw html -
-``` + However, it is better to use the `in` keyword to improve code readability. Regardless of which notation is used, it is essential to be consistent and use the same notation in all `for` loops. In Julia (similarly to Python), it is possible to loop not only over ranges but over any iterable object such as arrays or tuples. This is advantageous because it allows getting elements of iterable objects directly without using indices. @@ -99,77 +90,62 @@ Hi, my name is Daniel and I am 34 old. Hi, my name is Bob and I am 23 old. ``` -```@raw html -
-
Exercise:
-
-``` - -Use `for` or `while` loop to print all integers between `1` and `100` which can be divided by both `3` and `7`. +!!! warning "Exercise:" + Use `for` or `while` loop to print all integers between `1` and `100` which can be divided by both `3` and `7`. -**Hint:** use the `mod` function. + **Hint:** use the `mod` function. -```@raw html -
-
-Solution: -
-``` +!!! details "Solution:" + First, we need to check if a given integer is divisible by both `3` and `7`. This can be performed using the `mod` function in combination with the `if-else` statement as follows: -First, we need to check if a given integer is divisible by both `3` and `7`. This can be performed using the `mod` function in combination with the `if-else` statement as follows: + ```jldoctest + julia> i = 21 + 21 -```jldoctest -julia> i = 21 -21 - -julia> if mod(i, 3) == 0 && mod(i, 7) == 0 - println("$(i) is divisible by 3 and 7") - end -21 is divisible by 3 and 7 -``` - -or using the short-circuit evaluation + julia> if mod(i, 3) == 0 && mod(i, 7) == 0 + println("$(i) is divisible by 3 and 7") + end + 21 is divisible by 3 and 7 + ``` -```jldoctest -julia> i = 21 -21 + or using the short-circuit evaluation -julia> mod(i, 3) == mod(i, 7) == 0 && println("$(i) is divisible by 3 and 7") -21 is divisible by 3 and 7 -``` + ```jldoctest + julia> i = 21 + 21 -When we know how to check the conditions, it is easy to write a `for` loop to iterate over integers from `1` to `100`. + julia> mod(i, 3) == mod(i, 7) == 0 && println("$(i) is divisible by 3 and 7") + 21 is divisible by 3 and 7 + ``` -```jldoctest -julia> for i in 1:100 - mod(i, 3) == mod(i, 7) == 0 && @show i - end -i = 21 -i = 42 -i = 63 -i = 84 -``` + When we know how to check the conditions, it is easy to write a `for` loop to iterate over integers from `1` to `100`. -A `while` loop can be created in a similar way + ```jldoctest + julia> for i in 1:100 + mod(i, 3) == mod(i, 7) == 0 && @show i + end + i = 21 + i = 42 + i = 63 + i = 84 + ``` -```jldoctest -julia> i = 0; + A `while` loop can be created in a similar way -julia> while i <= 100 - i += 1 - mod(i, 3) == mod(i, 7) == 0 && @show i - end -i = 21 -i = 42 -i = 63 -i = 84 -``` + ```jldoctest + julia> i = 0; -The `for` loop should be used here because the range is known before-hand and unlike the `while` loop, it does not require to initialize `i`. + julia> while i <= 100 + i += 1 + mod(i, 3) == mod(i, 7) == 0 && @show i + end + i = 21 + i = 42 + i = 63 + i = 84 + ``` -```@raw html -
-``` + The `for` loop should be used here because the range is known before-hand and unlike the `while` loop, it does not require to initialize `i`. ### `break` and `continue` @@ -201,43 +177,28 @@ i = 10 The code after the `continue` keyword is not evaluated. -```@raw html -
-
Exercise:
-
-``` - -Rewrite the code from the exercise above. Use a combination of the `while` loop and the keyword `continue` to print all integers between `1` and `100` divisible by both `3` and `7`. In the declaration of the `while` loop use the `true` value instead of a condition. Use the `break` keyword and a proper condition to terminate the loop. - -```@raw html -
-
-Solution: -
-``` - -The `true` value creates an infinite loop, i.e., it is necessary to end the loop with the `break` keyword. Because the variable `i` represents an integer and we want to iterate over integers between 1 and 100, the correct termination condition is `i > 100`. +!!! warning "Exercise:" + Rewrite the code from the exercise above. Use a combination of the `while` loop and the keyword `continue` to print all integers between `1` and `100` divisible by both `3` and `7`. In the declaration of the `while` loop use the `true` value instead of a condition. Use the `break` keyword and a proper condition to terminate the loop. -```jldoctest -julia> i = 0; +!!! details "Solution:" + The `true` value creates an infinite loop, i.e., it is necessary to end the loop with the `break` keyword. Because the variable `i` represents an integer and we want to iterate over integers between 1 and 100, the correct termination condition is `i > 100`. -julia> while true - i += 1 - i > 100 && break - mod(i, 3) == mod(i, 7) == 0 || continue - @show i - end -i = 21 -i = 42 -i = 63 -i = 84 -``` + ```jldoctest + julia> i = 0; -We used the short-circuit evaluation to break the loop. To check that the integer is divisible, we use the same condition as before. However, we must use `||` instead of `&&` because we want to use the `continue` keyword. + julia> while true + i += 1 + i > 100 && break + mod(i, 3) == mod(i, 7) == 0 || continue + @show i + end + i = 21 + i = 42 + i = 63 + i = 84 + ``` -```@raw html -
-``` + We used the short-circuit evaluation to break the loop. To check that the integer is divisible, we use the same condition as before. However, we must use `||` instead of `&&` because we want to use the `continue` keyword. ### Nested loops @@ -298,78 +259,63 @@ julia> for i in 1:3, j in i:10 There are other limitations of the shorter syntax, such as the impossibility to perform any operation outside the inner loop. Nevertheless, it is a useful syntax in many cases. -```@raw html -
-
Exercise:
-
-``` - -Use nested loops to create a matrix with elements given by the formula - -```math -A_{i, j} = \frac{1}{2}\exp\left\{\frac{1}{2} (x_{i}^2 - y_{j}^2) \right\} \quad i \in \{1, 2, 3\}, \quad j \in \{1, 2, 3, 4\}, -``` - -where ``x \in \{0.4, 2.3, 4.6\}`` and ``y \in \{1.4, -3.1, 2.4, 5.2\}``. +!!! warning "Exercise:" + Use nested loops to create a matrix with elements given by the formula -**Bonus:** try to create the same matrix in a more effective way. + ```math + A_{i, j} = \frac{1}{2}\exp\left\{\frac{1}{2} (x_{i}^2 - y_{j}^2) \right\} \quad i \in \{1, 2, 3\}, \quad j \in \{1, 2, 3, 4\}, + ``` -```@raw html -
-
-Solution: -
-``` + where ``x \in \{0.4, 2.3, 4.6\}`` and ``y \in \{1.4, -3.1, 2.4, 5.2\}``. -First, we have to define vectors `x` and `y`, and an empty array of the proper size and element type to use in nested loops. + **Bonus:** try to create the same matrix in a more effective way. -```jldoctest nestedloops_ex; output = false -x = [0.4, 2.3, 4.6] -y = [1.4, -3.1, 2.4, 5.2] -A = zeros(Float64, length(x), length(y)) +!!! details "Solution:" + First, we have to define vectors `x` and `y`, and an empty array of the proper size and element type to use in nested loops. -# output -3×4 Matrix{Float64}: - 0.0 0.0 0.0 0.0 - 0.0 0.0 0.0 0.0 - 0.0 0.0 0.0 0.0 -``` + ```jldoctest nestedloops_ex; output = false + x = [0.4, 2.3, 4.6] + y = [1.4, -3.1, 2.4, 5.2] + A = zeros(Float64, length(x), length(y)) -The element type specification can be omitted since the default value type is `Float64`. Now we have to use proper indices to fill `A`. In this case, we use the indices `1:length(x)` for `x` and `1:length(y)` for `y`. + # output + 3×4 Matrix{Float64}: + 0.0 0.0 0.0 0.0 + 0.0 0.0 0.0 0.0 + 0.0 0.0 0.0 0.0 + ``` -```jldoctest nestedloops_ex -julia> for i in 1:length(x), j in 1:length(y) - A[i, j] = exp((x[i]^2 - y[j]^2)/2)/2 - end + The element type specification can be omitted since the default value type is `Float64`. Now we have to use proper indices to fill `A`. In this case, we use the indices `1:length(x)` for `x` and `1:length(y)` for `y`. -julia> A -3×4 Matrix{Float64}: - 0.203285 0.00443536 0.030405 7.27867e-7 - 2.64284 0.0576626 0.395285 9.46275e-6 - 7382.39 161.072 1104.17 0.0264329 -``` + ```jldoctest nestedloops_ex + julia> for i in 1:length(x), j in 1:length(y) + A[i, j] = exp((x[i]^2 - y[j]^2)/2)/2 + end -There are more efficient ways to create this array. The one way is to use broadcasting. + julia> A + 3×4 Matrix{Float64}: + 0.203285 0.00443536 0.030405 7.27867e-7 + 2.64284 0.0576626 0.395285 9.46275e-6 + 7382.39 161.072 1104.17 0.0264329 + ``` -```jldoctest nestedloops_ex -julia> y_row = y' -1×4 adjoint(::Vector{Float64}) with eltype Float64: - 1.4 -3.1 2.4 5.2 + There are more efficient ways to create this array. The one way is to use broadcasting. -julia> A = @. exp((x^2 - y_row^2)/2)/2 -3×4 Matrix{Float64}: - 0.203285 0.00443536 0.030405 7.27867e-7 - 2.64284 0.0576626 0.395285 9.46275e-6 - 7382.39 161.072 1104.17 0.0264329 -``` + ```jldoctest nestedloops_ex + julia> y_row = y' + 1×4 adjoint(::Vector{Float64}) with eltype Float64: + 1.4 -3.1 2.4 5.2 -We use the `@ .` macro to perform all operations elementwise. Since `x` is a column vector and `y_row` is a row vector, `x - y_row` uses broadcasting to create a matrix. + julia> A = @. exp((x^2 - y_row^2)/2)/2 + 3×4 Matrix{Float64}: + 0.203285 0.00443536 0.030405 7.27867e-7 + 2.64284 0.0576626 0.395285 9.46275e-6 + 7382.39 161.072 1104.17 0.0264329 + ``` -The third way to create this matrix is to use list comprehension. Due to its importance, we dedicate a whole section to it. + We use the `@ .` macro to perform all operations elementwise. Since `x` is a column vector and `y_row` is a row vector, `x - y_row` uses broadcasting to create a matrix. -```@raw html -
-``` + The third way to create this matrix is to use list comprehension. Due to its importance, we dedicate a whole section to it. ## List comprehension @@ -416,42 +362,27 @@ julia> [(x, y, x + y) for x in 1:10, y in 1:10 if x + y < 5] (1, 3, 4) ``` -```@raw html -
-
Exercise:
-
-``` +!!! warning "Exercise:" + Use the list comprehension to create a vector of all integers from `1` to `100` divisible by `3` and `7` simultaneously. What is the sum of all these integers? -Use the list comprehension to create a vector of all integers from `1` to `100` divisible by `3` and `7` simultaneously. What is the sum of all these integers? +!!! details "Solution:" + We can use list comprehension with the same condition that we used in the exercise in the first section. -```@raw html -
-
-Solution: -
-``` - -We can use list comprehension with the same condition that we used in the exercise in the first section. + ```jldoctest compheresions_ex + julia> v = [i for i in 1:100 if mod(i, 3) == mod(i, 7) == 0] + 4-element Vector{Int64}: + 21 + 42 + 63 + 84 + ``` -```jldoctest compheresions_ex -julia> v = [i for i in 1:100 if mod(i, 3) == mod(i, 7) == 0] -4-element Vector{Int64}: - 21 - 42 - 63 - 84 -``` + Then we can use the `sum` function to get their sum. -Then we can use the `sum` function to get their sum. - -```jldoctest compheresions_ex -julia> sum(v) -210 -``` - -```@raw html -
-``` + ```jldoctest compheresions_ex + julia> sum(v) + 210 + ``` ## Generator expressions @@ -511,43 +442,28 @@ julia> collect(gen) (3, 1) ``` -```@raw html -
-
Exercise:
-
-``` - -Use a generator to sum the square of all integers from `1` to `100`, which are divisible by `3` and `7` simultaneously. +!!! warning "Exercise:" + Use a generator to sum the square of all integers from `1` to `100`, which are divisible by `3` and `7` simultaneously. -```@raw html -
-
-Solution: -
-``` +!!! details "Solution:" + There are two ways how to solve this exercise. The first one creates a generator and then uses the `sum` function. -There are two ways how to solve this exercise. The first one creates a generator and then uses the `sum` function. + ```julia + julia> gen = (i^2 for i in 1:100 if mod(i, 3) == mod(i, 7) == 0); -```julia -julia> gen = (i^2 for i in 1:100 if mod(i, 3) == mod(i, 7) == 0); + julia> typeof(gen) + Base.Generator{Base.Iterators.Filter{var"#2#4",UnitRange{Int64}},var"#1#3"} -julia> typeof(gen) -Base.Generator{Base.Iterators.Filter{var"#2#4",UnitRange{Int64}},var"#1#3"} + julia> sum(gen) + 13230 + ``` -julia> sum(gen) -13230 -``` + It is worth noting that `gen` is a `Generator` object and not an array. The second way uses the shorter syntax that allows us to write a generator inside the `sum` function. -It is worth noting that `gen` is a `Generator` object and not an array. The second way uses the shorter syntax that allows us to write a generator inside the `sum` function. - -```jldoctest compheresions_ex -julia> sum(i^2 for i in 1:100 if mod(i, 3) == mod(i, 7) == 0) -13230 -``` - -```@raw html -
-``` + ```jldoctest compheresions_ex + julia> sum(i^2 for i in 1:100 if mod(i, 3) == mod(i, 7) == 0) + 13230 + ``` ## Iterators @@ -612,69 +528,54 @@ julia> for (i, vals) in enumerate(zip([1, 4, 2, 5], 2:12, (:a, :b, :c))) (i, vals) = (3, (2, 4, :c)) ``` -```@raw html -
-
Exercise:
-
-``` - -Create a matrix with elements given by the following formula - -```math -A_{i, j} = \frac{1}{2}\exp\left\{\frac{1}{2} (x_{i}^2 - y_{j}^2) \right\} \quad i \in \{1, 2, 3\}, \; j \in \{1, 2, 3, 4\} -``` - -where ``x \in \{0.4, 2.3, 4.6\}``, ``y \in \{1.4, -3.1, 2.4, 5.2\}``. Compute the sum of all elements in each row and print the following message: -> *Sum of all elements in a row `i` is `i_sum`* -where `i` represents row's number and `i_sum` the sum of all elements in this row. Do the same for each column and print the following message: -> *Sum of all elements in a column `i` is `i_sum`* +!!! warning "Exercise:" + Create a matrix with elements given by the following formula -**Hint:** use iterators `eachcol` and `eachrow`. + ```math + A_{i, j} = \frac{1}{2}\exp\left\{\frac{1}{2} (x_{i}^2 - y_{j}^2) \right\} \quad i \in \{1, 2, 3\}, \; j \in \{1, 2, 3, 4\} + ``` -```@raw html -
-
-Solution: -
-``` + where ``x \in \{0.4, 2.3, 4.6\}``, ``y \in \{1.4, -3.1, 2.4, 5.2\}``. Compute the sum of all elements in each row and print the following message: + > *Sum of all elements in a row `i` is `i_sum`* + where `i` represents row's number and `i_sum` the sum of all elements in this row. Do the same for each column and print the following message: + > *Sum of all elements in a column `i` is `i_sum`* -First, we have to generate the matrix `A`. It can be done using list comprehension as follows: + **Hint:** use iterators `eachcol` and `eachrow`. -```jldoctest iterators_ex; output = false -X = [0.4, 2.3, 4.6] -Y = [1.4, -3.1, 2.4, 5.2] -A = [exp((x^2 - y^2)/2)/2 for x in X, y in Y] +!!! details "Solution:" + First, we have to generate the matrix `A`. It can be done using list comprehension as follows: -# output -3×4 Matrix{Float64}: - 0.203285 0.00443536 0.030405 7.27867e-7 - 2.64284 0.0576626 0.395285 9.46275e-6 - 7382.39 161.072 1104.17 0.0264329 -``` + ```jldoctest iterators_ex; output = false + X = [0.4, 2.3, 4.6] + Y = [1.4, -3.1, 2.4, 5.2] + A = [exp((x^2 - y^2)/2)/2 for x in X, y in Y] -To compute the sum of each row and print the appropriate message, we use the combination of `enumerate` and `eachrow` functions. + # output + 3×4 Matrix{Float64}: + 0.203285 0.00443536 0.030405 7.27867e-7 + 2.64284 0.0576626 0.395285 9.46275e-6 + 7382.39 161.072 1104.17 0.0264329 + ``` -```jldoctest iterators_ex -julia> for (i, row) in enumerate(eachrow(A)) - println("Sum of all elements in a row $(i) is $(sum(row))") - end -Sum of all elements in a row 1 is 0.2381259460051036 -Sum of all elements in a row 2 is 3.0957940729669864 -Sum of all elements in a row 3 is 8647.66342895583 -``` + To compute the sum of each row and print the appropriate message, we use the combination of `enumerate` and `eachrow` functions. -Similarly, to compute the sum of each column and print the appropriate message, we use the combination of `enumerate` and `eachcol` functions. + ```jldoctest iterators_ex + julia> for (i, row) in enumerate(eachrow(A)) + println("Sum of all elements in a row $(i) is $(sum(row))") + end + Sum of all elements in a row 1 is 0.2381259460051036 + Sum of all elements in a row 2 is 3.0957940729669864 + Sum of all elements in a row 3 is 8647.66342895583 + ``` -```jldoctest iterators_ex -julia> for (i, row) in enumerate(eachcol(A)) - println("Sum of all elements in a column $(i) is $(sum(row))") - end -Sum of all elements in a column 1 is 7385.236904243371 -Sum of all elements in a column 2 is 161.13431527671185 -Sum of all elements in a column 3 is 1104.5996863997295 -Sum of all elements in a column 4 is 0.026443054989612996 -``` + Similarly, to compute the sum of each column and print the appropriate message, we use the combination of `enumerate` and `eachcol` functions. -```@raw html -
-``` + ```jldoctest iterators_ex + julia> for (i, row) in enumerate(eachcol(A)) + println("Sum of all elements in a column $(i) is $(sum(row))") + end + Sum of all elements in a column 1 is 7385.236904243371 + Sum of all elements in a column 2 is 161.13431527671185 + Sum of all elements in a column 3 is 1104.5996863997295 + Sum of all elements in a column 4 is 0.026443054989612996 + ``` diff --git a/docs/src/lecture_04/exceptions.md b/docs/src/lecture_04/exceptions.md index ddd4f60db..ec2d326e8 100644 --- a/docs/src/lecture_04/exceptions.md +++ b/docs/src/lecture_04/exceptions.md @@ -19,9 +19,11 @@ We use the `error` function, which throws the `ErrorException` if the input argu ```jldoctest expections julia> fact(1.4) ERROR: argument must be non-negative integer +[...] julia> fact(-5) ERROR: argument must be non-negative integer +[...] ``` However, it is better to use error messages as descriptive as possible. In the case above, the error message can also include the argument value. Julia provides several predefined types of exceptions that can be used to create more descriptive error messages. In our example, we want to check whether the argument is a non-negative integer. The more specific `DomainError` can do this. @@ -42,10 +44,12 @@ We must use the `throw` function because the `DomainError(x, msg)` function only julia> fact(1.4) ERROR: DomainError with 1.4: argument must be non-negative integer +[...] julia> fact(-5) ERROR: DomainError with -5: argument must be non-negative integer +[...] ``` The error message now contains a short description, the input value, and the type of exception. Now imagine that due to an error, the `fact` function is used to calculate the factorial from a string. diff --git a/docs/src/lecture_04/exercises.md b/docs/src/lecture_04/exercises.md index 8ce3d89f2..b12392469 100644 --- a/docs/src/lecture_04/exercises.md +++ b/docs/src/lecture_04/exercises.md @@ -12,171 +12,109 @@ The first generation must be initialized. Every new generation is created by app The following few exercises will implement the Game of Life. We will consider finite universe with periodic boundary conditions. -```@raw html -
-
Exercise:
-
-``` - -Write a function `neighbours` that return the number of live neighbours of a cell. The function should accept the `world` matrix of boolean values representing the state of all cells (`true` if the cell is alive and `false` otherwise) and index of the row and column of the cell. - -**Hint:** use the following properties of the `mod1` function to implement periodic boundaries. +!!! warning "Exercise:" + Write a function `neighbours` that return the number of live neighbours of a cell. The function should accept the `world` matrix of boolean values representing the state of all cells (`true` if the cell is alive and `false` otherwise) and index of the row and column of the cell. -```@repl -mod1(1, 4) -mod1(4, 4) -mod1(5, 4) -``` + **Hint:** use the following properties of the `mod1` function to implement periodic boundaries. -**Bonus:** implement a more general function which computes the number of alive cells in a neighbourhood of given size. + ```@repl + mod1(1, 4) + mod1(4, 4) + mod1(5, 4) + ``` -```@raw html -
-
-Solution: -
-``` + **Bonus:** implement a more general function which computes the number of alive cells in a neighbourhood of given size. -One way to define the `neighbours` function is to check all neighbours manually. +!!! details "Solution:" + One way to define the `neighbours` function is to check all neighbours manually. -```julia -function neighbours(world, row, col) - n, m = size(world) - - # this implements periodic boundaries - down = mod1(row + 1, n) - up = mod1(row - 1, n) - left = mod1(col - 1, m) - right = mod1(col + 1, m) - - return ( world[up, left] + world[up, col] + world[up, right] - + world[row, left] + + world[row, right] - + world[down, left] + world[down, col] + world[down, right]) -end -``` + ```julia + function neighbours(world, row, col) + n, m = size(world) -The approach above can not define a general version of the `neighbours` function. In this case, we can use nested loops. First, we compute proper row indices by `range` combined with the `mod1` function. + # this implements periodic boundaries + down = mod1(row + 1, n) + up = mod1(row - 1, n) + left = mod1(col - 1, m) + right = mod1(col + 1, m) -```julia -rows = mod1.(row .+ (-r:r), size(world, 1)) -``` + return ( world[up, left] + world[up, col] + world[up, right] + + world[row, left] + + world[row, right] + + world[down, left] + world[down, col] + world[down, right]) + end + ``` -Column indexes can be computed similarly. Then we use nested loops to iterate through both rows and columns. Since the iteration includes the middle cell, we need to subtract its state. + The approach above can not define a general version of the `neighbours` function. In this case, we can use nested loops. First, we compute proper row indices by `range` combined with the `mod1` function. -```julia -function neighbours(world, row, col; r = 1) + ```julia rows = mod1.(row .+ (-r:r), size(world, 1)) - cols = mod1.(col .+ (-r:r), size(world, 2)) + ``` - return sum(world[i, j] for i in rows, j in cols) - world[row, col] -end -``` - -```@raw html -
-``` - - -```@raw html -
-
Exercise:
-
-``` - -Add a new method to the `neighbours` function that for the `world` matrix returns a matrix containing numbers of living neighbours. - -```@raw html -
-
-Solution: -
-``` + Column indexes can be computed similarly. Then we use nested loops to iterate through both rows and columns. Since the iteration includes the middle cell, we need to subtract its state. -We created a function that computes the number of living neighbours in the exercise above. One way how to create a matrix with numbers of living neighbours is: + ```julia + function neighbours(world, row, col; r = 1) + rows = mod1.(row .+ (-r:r), size(world, 1)) + cols = mod1.(col .+ (-r:r), size(world, 2)) -```julia -function neighbours(world) - n, m = size(world) - return [neighbours(world, row, col) for row in 1:n, col in 1:m] -end -``` - -This is an example of multiple dispatch. The function `neighbours` can have both one and three input arguments. - -```@raw html -
-``` - -```@raw html -
-
Exercise:
-
-``` - -Write a function `willsurvive` that returns `true` if the cell will survive based on the conditions described at the beginning of the section and `false` otherwise. This function should accept two arguments: state of the cell (`true`/`false`) and the number of living neighbours. + return sum(world[i, j] for i in rows, j in cols) - world[row, col] + end + ``` -```@raw html -
-
-Solution: -
-``` +!!! warning "Exercise:" + Add a new method to the `neighbours` function that for the `world` matrix returns a matrix containing numbers of living neighbours. -This function can be written using the `if-elseif-else` statement. Since `cell` is a boolean value, we do not need to compare with one as in `cell == 1`. +!!! details "Solution:" + We created a function that computes the number of living neighbours in the exercise above. One way how to create a matrix with numbers of living neighbours is: -```julia -function willsurvive(cell, k) - if k == 3 - return true - elseif k == 2 && cell - return true - else - return false + ```julia + function neighbours(world) + n, m = size(world) + return [neighbours(world, row, col) for row in 1:n, col in 1:m] end -end -``` + ``` -We can write this function in a simpler form. We first realize that the short-circuit evaluation can merge the first two conditions. Since the function returns only `true` or `false`, we can write the function on one line. + This is an example of multiple dispatch. The function `neighbours` can have both one and three input arguments. -```julia -willsurvive(cell, k) = k == 3 || k == 2 && cell -``` +!!! warning "Exercise:" + Write a function `willsurvive` that returns `true` if the cell will survive based on the conditions described at the beginning of the section and `false` otherwise. This function should accept two arguments: state of the cell (`true`/`false`) and the number of living neighbours. -```@raw html -
-``` +!!! details "Solution:" + This function can be written using the `if-elseif-else` statement. Since `cell` is a boolean value, we do not need to compare with one as in `cell == 1`. + ```julia + function willsurvive(cell, k) + if k == 3 + return true + elseif k == 2 && cell + return true + else + return false + end + end + ``` -```@raw html -
-
Exercise:
-
-``` + We can write this function in a simpler form. We first realize that the short-circuit evaluation can merge the first two conditions. Since the function returns only `true` or `false`, we can write the function on one line. -Combine these functions to write a function `evolve!` that evolves the given `world` matrix into a new generation. + ```julia + willsurvive(cell, k) = k == 3 || k == 2 && cell + ``` -```@raw html -
-
-Solution: -
-``` +!!! warning "Exercise:" + Combine these functions to write a function `evolve!` that evolves the given `world` matrix into a new generation. -We first compute the matrix with the numbers of living neighbours. Then we iterate over all elements of the `world` matrix and compute new states of all elements with the `willsurvive` function. Since we computed the number of living neighbours before iterating, we can rewrite the `world` matrix. +!!! details "Solution:" + We first compute the matrix with the numbers of living neighbours. Then we iterate over all elements of the `world` matrix and compute new states of all elements with the `willsurvive` function. Since we computed the number of living neighbours before iterating, we can rewrite the `world` matrix. -```julia -function evolve!(world) - ks = neighbours(world) - for i in eachindex(world) - world[i] = willsurvive(world[i], ks[i]) + ```julia + function evolve!(world) + ks = neighbours(world) + for i in eachindex(world) + world[i] = willsurvive(world[i], ks[i]) + end + return end - return -end -``` - -```@raw html -
-``` + ``` In the four exercises above, we defined functions sufficient to animate the Game of Life. Use the following code to initialize the `world`. diff --git a/docs/src/lecture_04/functions.md b/docs/src/lecture_04/functions.md index d52209031..9b5fffd40 100644 --- a/docs/src/lecture_04/functions.md +++ b/docs/src/lecture_04/functions.md @@ -85,74 +85,59 @@ julia> x3 8 ``` -```@raw html -
-
Exercise:
-
-``` - -Write function `power(x::Real, p::Integer)` that for a number ``x`` and a (possibly negative) integer ``p`` computes ``x^p`` without using the `^` operator. Use only basic arithmetic operators `+`, `-`, `*`, `/` and the `if` condition. The annotation `p::Integer` ensures that the input `p` is always an integer. - -**Hint:** use recursion. - -```@raw html -
-
-Solution: -
-``` - -To use recursion, we have to split the computation into three parts: -- `p = 0`: the function should return `1`. -- `p > 0`: the function should be called recursively with arguments `x`, `p - 1` and the result should be multiplied by `x`. -- `p < 0`: then it is equivalent to call the power function with arguments `1/x`, `-p`. - -These three cases can be defined using the `if-elseif` as follows: - -```jldoctest functions_ex; output = false -function power(x::Real, p::Integer) - if p == 0 - return 1 - elseif p > 0 - return x * power(x, p - 1) - else - return power(1/x, -p) +!!! warning "Exercise:" + Write function `power(x::Real, p::Integer)` that for a number ``x`` and a (possibly negative) integer ``p`` computes ``x^p`` without using the `^` operator. Use only basic arithmetic operators `+`, `-`, `*`, `/` and the `if` condition. The annotation `p::Integer` ensures that the input `p` is always an integer. + + **Hint:** use recursion. + +!!! details "Solution:" + To use recursion, we have to split the computation into three parts: + - `p = 0`: the function should return `1`. + - `p > 0`: the function should be called recursively with arguments `x`, `p - 1` and the result should be multiplied by `x`. + - `p < 0`: then it is equivalent to call the power function with arguments `1/x`, `-p`. + + These three cases can be defined using the `if-elseif` as follows: + + ```jldoctest functions_ex; output = false + function power(x::Real, p::Integer) + if p == 0 + return 1 + elseif p > 0 + return x * power(x, p - 1) + else + return power(1/x, -p) + end end -end -# output -power (generic function with 1 method) -``` + # output + power (generic function with 1 method) + ``` -We use type annotation for function arguments to ensure that the input arguments are always of the proper type. In the example above, the first argument must be a real number, and the second argument must be an integer. + We use type annotation for function arguments to ensure that the input arguments are always of the proper type. In the example above, the first argument must be a real number, and the second argument must be an integer. -```jldoctest functions_ex -julia> power(2, 5) -32 + ```jldoctest functions_ex + julia> power(2, 5) + 32 -julia> power(2, -2) -0.25 + julia> power(2, -2) + 0.25 -julia> power(2, 5) ≈ 2^5 -true + julia> power(2, 5) ≈ 2^5 + true -julia> power(5, -3) ≈ 5^(-3) -true -``` + julia> power(5, -3) ≈ 5^(-3) + true + ``` -If we call the function with arguments of wrong types, an error will occur. + If we call the function with arguments of wrong types, an error will occur. -```jldoctest functions_ex -julia> power(2, 2.5) -ERROR: MethodError: no method matching power(::Int64, ::Float64) -[...] -``` + ```jldoctest functions_ex + julia> power(2, 2.5) + ERROR: MethodError: no method matching power(::Int64, ::Float64) + [...] + ``` -We will discuss type annotation later in the section about [methods](@ref Methods). - -```@raw html -
-``` + We will discuss type annotation later in the section about [methods](@ref Methods). ## One-line functions @@ -208,51 +193,36 @@ julia> g(3) However, for better code readability, the traditional multiline syntax is preferred for more complex functions. -```@raw html -
-
Exercise:
-
-``` +!!! warning "Exercise:" + Write a one-line function that returns `true` if the input argument is an even number and `false` otherwise. -Write a one-line function that returns `true` if the input argument is an even number and `false` otherwise. + **Hint:** use modulo function and [ternary operator](@ref Ternary-operator) `?`. -**Hint:** use modulo function and [ternary operator](@ref Ternary-operator) `?`. +!!! details "Solution:" + From the section about the ternary operator, we know that the syntax -```@raw html -
-
-Solution: -
-``` + ```julia + a ? b : c + ``` -From the section about the ternary operator, we know that the syntax + means: *if `a` is true, evaluate `b`; otherwise, evaluate `c`*. Since even numbers are divisible by 2, we can check it by the modulo function `mod(x, 2) == 0`. This results in the following function. -```julia -a ? b : c -``` + ```jldoctest functions; output = false + even(x::Integer) = mod(x, 2) == 0 ? true : false -means: *if `a` is true, evaluate `b`; otherwise, evaluate `c`*. Since even numbers are divisible by 2, we can check it by the modulo function `mod(x, 2) == 0`. This results in the following function. + # output + even (generic function with 1 method) + ``` -```jldoctest functions; output = false -even(x::Integer) = mod(x, 2) == 0 ? true : false + We again used type annotation to ensure that the argument is an integer. -# output -even (generic function with 1 method) -``` - -We again used type annotation to ensure that the argument is an integer. - -```jldoctest functions -julia> even(11) -false + ```jldoctest functions + julia> even(11) + false -julia> even(14) -true -``` - -```@raw html -
-``` + julia> even(14) + true + ``` ## Optional arguments @@ -319,59 +289,44 @@ ERROR: UndefVarError: `y` not defined [...] ``` -```@raw html -
-
Exercise:
-
-``` - -Write a function which computes the value of the following quadratic form +!!! warning "Exercise:" + Write a function which computes the value of the following quadratic form -```math -q_{a,b,c}(x,y) = ax^2 + bxy + cy^2, -``` - -where ``a, b, c, x \in \mathbb{R}``. Use optional arguments to set default values for parameters + ```math + q_{a,b,c}(x,y) = ax^2 + bxy + cy^2, + ``` -```math -a = 1, \quad b = 2a, \quad c = 3(a + b). -``` + where ``a, b, c, x \in \mathbb{R}``. Use optional arguments to set default values for parameters -What is the function value at point ``(4, 2)`` for default parameters? What is the function value at the same point if we use ``c = 3``? + ```math + a = 1, \quad b = 2a, \quad c = 3(a + b). + ``` -```@raw html -
-
-Solution: -
-``` + What is the function value at point ``(4, 2)`` for default parameters? What is the function value at the same point if we use ``c = 3``? -The quadratic form can be implemented as follows: +!!! details "Solution:" + The quadratic form can be implemented as follows: -```jldoctest opt_args_ex; output = false -q(x, y, a = 1, b = 2*a, c = 3*(a + b)) = a*x^2 + b*x*y + c*y^2 + ```jldoctest opt_args_ex; output = false + q(x, y, a = 1, b = 2*a, c = 3*(a + b)) = a*x^2 + b*x*y + c*y^2 -# output -q (generic function with 4 methods) -``` + # output + q (generic function with 4 methods) + ``` -Since we want to evaluate ``q`` at ``(4, 2)`` with default parameters, we can use only the first two arguments. + Since we want to evaluate ``q`` at ``(4, 2)`` with default parameters, we can use only the first two arguments. -```jldoctest opt_args_ex -julia> q(4, 2) -68 -``` + ```jldoctest opt_args_ex + julia> q(4, 2) + 68 + ``` -In the second case, we want to evaluate the function at the same point with ``c = 3``. However, it is not possible to set only the last optional argument. We have to set all previous optional arguments too. For the first two optional arguments, we use the default values, i.e., `a = 1` and `b = 2*a = 2`. + In the second case, we want to evaluate the function at the same point with ``c = 3``. However, it is not possible to set only the last optional argument. We have to set all previous optional arguments too. For the first two optional arguments, we use the default values, i.e., `a = 1` and `b = 2*a = 2`. -```jldoctest opt_args_ex -julia> q(4, 2, 1, 2, 3) -44 -``` - -```@raw html -
-``` + ```jldoctest opt_args_ex + julia> q(4, 2, 1, 2, 3) + 44 + ``` ## Keyword arguments @@ -430,95 +385,80 @@ julia> linear(2; a, b) 8 ``` -```@raw html -
-
Exercise:
-
-``` +!!! warning "Exercise:" + Write a probability density function for the [Gaussian distribution](https://en.wikipedia.org/wiki/Normal_distribution) -Write a probability density function for the [Gaussian distribution](https://en.wikipedia.org/wiki/Normal_distribution) - -```math -f_{\mu, \sigma}(x) = \frac{1}{\sigma \sqrt{ 2\pi }} \exp\left\{ -\frac{1}{2} \left( \frac{x - \mu}{\sigma} \right) ^2 \right\}, -``` + ```math + f_{\mu, \sigma}(x) = \frac{1}{\sigma \sqrt{ 2\pi }} \exp\left\{ -\frac{1}{2} \left( \frac{x - \mu}{\sigma} \right) ^2 \right\}, + ``` -where ``\mu \in \mathbb{R}`` and ``\sigma^2 > 0``. Use keyword arguments to obtain the standardized normal distribution (``\mu = 0`` and ``\sigma = 1``). Check that the inputs are correct. + where ``\mu \in \mathbb{R}`` and ``\sigma^2 > 0``. Use keyword arguments to obtain the standardized normal distribution (``\mu = 0`` and ``\sigma = 1``). Check that the inputs are correct. **Bonus:** verify that this function is a probability density function, i.e., its integral equals 1. -```@raw html -
-
-Solution: -
-``` - -The probability density function for the Gaussian distribution equals to - -```jldoctest key_args_ex; output = false -function gauss(x::Real; μ::Real = 0, σ::Real = 1) - σ^2 > 0 || error("the variance `σ^2` must be positive") - return exp(-1/2 * ((x - μ)/σ)^2)/(σ * sqrt(2*π)) -end +!!! details "Solution:" + The probability density function for the Gaussian distribution equals to -# output -gauss (generic function with 1 method) -``` + ```jldoctest key_args_ex; output = false + function gauss(x::Real; μ::Real = 0, σ::Real = 1) + σ^2 > 0 || error("the variance `σ^2` must be positive") + return exp(-1/2 * ((x - μ)/σ)^2)/(σ * sqrt(2*π)) + end -We used type annotation to ensure that all input arguments are real numbers. We also checked whether the standard deviation ``\sigma`` is positive. + # output + gauss (generic function with 1 method) + ``` -```jldoctest key_args_ex -julia> gauss(0) -0.3989422804014327 + We used type annotation to ensure that all input arguments are real numbers. We also checked whether the standard deviation ``\sigma`` is positive. -julia> gauss(0.1; μ = 1, σ = 1) -0.2660852498987548 -``` + ```jldoctest key_args_ex + julia> gauss(0) + 0.3989422804014327 -The integral of the probability density function over all real numbers should equal one. We can check it numerically by discretizing the integral into a finite sum. + julia> gauss(0.1; μ = 1, σ = 1) + 0.2660852498987548 + ``` -```jldoctest key_args_ex -julia> step = 0.01 -0.01 + The integral of the probability density function over all real numbers should equal one. We can check it numerically by discretizing the integral into a finite sum. -julia> x = -100:step:100; + ```jldoctest key_args_ex + julia> step = 0.01 + 0.01 -julia> sum(gauss, x) * step -1.0000000000000002 + julia> x = -100:step:100; -julia> g(x) = gauss(x; μ = -1, σ = 1.4); + julia> sum(gauss, x) * step + 1.0000000000000002 -julia> sum(g, x) * step -1.0000000000000007 -``` + julia> g(x) = gauss(x; μ = -1, σ = 1.4); -We use the `sum` function, which can accept a function as the first argument and apply it to each value before summation. The result is the same as `sum(gauss.(x))`. The difference is that the former, similarly to generators, does not allocate an array. The summation is then multiplied by the stepsize `0.01` to approximate the continuous interval `[-100, 100]`. + julia> sum(g, x) * step + 1.0000000000000007 + ``` -We can also visualize the probability density functions with the [Plots.jl](https://github.com/JuliaPlots/Plots.jl) package. + We use the `sum` function, which can accept a function as the first argument and apply it to each value before summation. The result is the same as `sum(gauss.(x))`. The difference is that the former, similarly to generators, does not allocate an array. The summation is then multiplied by the stepsize `0.01` to approximate the continuous interval `[-100, 100]`. -```@setup plots -using Plots -function gauss(x::Real; μ::Real = 0, σ::Real = 1) - σ^2 > 0 || error("the variance `σ^2` must be positive") - return exp(-1/2 * ((x - μ)/σ)^2)/(σ * sqrt(2*π)) -end -``` + We can also visualize the probability density functions with the [Plots.jl](https://github.com/JuliaPlots/Plots.jl) package. -```@example plots -using Plots -x = -15:0.1:15 + ```@setup plots + using Plots + function gauss(x::Real; μ::Real = 0, σ::Real = 1) + σ^2 > 0 || error("the variance `σ^2` must be positive") + return exp(-1/2 * ((x - μ)/σ)^2)/(σ * sqrt(2*π)) + end + ``` -plot(x, gauss.(x); label = "μ = 0, σ = 1", linewidth = 2, xlabel = "x", ylabel = "f(x)"); -plot!(x, gauss.(x; μ = 4, σ = 2); label = "μ = 4, σ = 2", linewidth = 2); -plot!(x, gauss.(x; μ = -3, σ = 2); label = "μ = -3, σ = 2", linewidth = 2); -savefig("gauss.svg") # hide -``` + ```@example plots + using Plots + x = -15:0.1:15 -![](gauss.svg) + plot(x, gauss.(x); label = "μ = 0, σ = 1", linewidth = 2, xlabel = "x", ylabel = "f(x)"); + plot!(x, gauss.(x; μ = 4, σ = 2); label = "μ = 4, σ = 2", linewidth = 2); + plot!(x, gauss.(x; μ = -3, σ = 2); label = "μ = -3, σ = 2", linewidth = 2); + savefig("gauss.svg") # hide + ``` -```@raw html -
-``` + ![](gauss.svg) ## Variable number of arguments @@ -605,107 +545,92 @@ julia> roundmod(12.529, 5; sigdigits = 2) This construction is beneficial whenever there are multiple chained functions, and only the deepest ones need keyword arguments. -```@raw html -
-
Exercise:
-
-``` - -Write a function `wrapper`, that accepts a number and applies one of `round`, `ceil` or `floor` functions based on the keyword argument `type`. Use the function to solve the following tasks: -- Round `1252.1518` to the nearest larger integer and convert the resulting value to `Int64`. -- Round `1252.1518` to the nearest smaller integer and convert the resulting value to `Int16`. -- Round `1252.1518` to `2` digits after the decimal point. -- Round `1252.1518` to `3` significant digits. - -```@raw html -
-
-Solution: -
-``` - -The one way to define this function is the `if-elseif-else` statement. - -```jldoctest varargs_ex; output = false -function wrapper(x...; type = :round, kwargs...) - if type == :ceil - return ceil(x...; kwargs...) - elseif type == :floor - return floor(x...; kwargs...) - else - return round(x...; kwargs...) +!!! warning "Exercise:" + Write a function `wrapper`, that accepts a number and applies one of `round`, `ceil` or `floor` functions based on the keyword argument `type`. Use the function to solve the following tasks: + - Round `1252.1518` to the nearest larger integer and convert the resulting value to `Int64`. + - Round `1252.1518` to the nearest smaller integer and convert the resulting value to `Int16`. + - Round `1252.1518` to `2` digits after the decimal point. + - Round `1252.1518` to `3` significant digits. + +!!! details "Solution:" + The one way to define this function is the `if-elseif-else` statement. + + ```jldoctest varargs_ex; output = false + function wrapper(x...; type = :round, kwargs...) + if type == :ceil + return ceil(x...; kwargs...) + elseif type == :floor + return floor(x...; kwargs...) + else + return round(x...; kwargs...) + end end -end - -# output -wrapper (generic function with 1 method) -``` -The `type` keyword argument is used to determine which function should be used. We use an optional number of arguments as well as an optional number of keyword arguments. + # output + wrapper (generic function with 1 method) + ``` -```jldoctest varargs_ex -julia> x = 1252.1518 -1252.1518 + The `type` keyword argument is used to determine which function should be used. We use an optional number of arguments as well as an optional number of keyword arguments. -julia> wrapper(Int64, x; type = :ceil) -1253 + ```jldoctest varargs_ex + julia> x = 1252.1518 + 1252.1518 -julia> wrapper(Int16, x; type = :floor) -1252 + julia> wrapper(Int64, x; type = :ceil) + 1253 -julia> wrapper(x; digits = 2) -1252.15 + julia> wrapper(Int16, x; type = :floor) + 1252 -julia> wrapper(x; sigdigits = 3) -1250.0 -``` + julia> wrapper(x; digits = 2) + 1252.15 -The second way to solve this exercise is to use the fact that it is possible to pass functions as arguments. We can omit the `if` conditions and directly pass the appropriate function. + julia> wrapper(x; sigdigits = 3) + 1250.0 + ``` -```jldoctest varargs_ex; output = false -wrapper_new(x...; type = round, kwargs...) = type(x...; kwargs...) + The second way to solve this exercise is to use the fact that it is possible to pass functions as arguments. We can omit the `if` conditions and directly pass the appropriate function. -# output -wrapper_new (generic function with 1 method) -``` + ```jldoctest varargs_ex; output = false + wrapper_new(x...; type = round, kwargs...) = type(x...; kwargs...) -In the function definition, we use the `type` keyword argument as a function and not as a symbol. + # output + wrapper_new (generic function with 1 method) + ``` -```jldoctest varargs_ex -julia> wrapper_new(1.123; type = ceil) -2.0 -``` + In the function definition, we use the `type` keyword argument as a function and not as a symbol. -If we use, for example, a `Symbol` instead of a function, an error will occur. + ```jldoctest varargs_ex + julia> wrapper_new(1.123; type = ceil) + 2.0 + ``` -```jldoctest varargs_ex -julia> wrapper_new(1.123; type = :ceil) -ERROR: MethodError: objects of type Symbol are not callable -[...] -``` + If we use, for example, a `Symbol` instead of a function, an error will occur. -Finally, we can test the `wrapper_new` function with the same arguments as for the `wrapper` function. + ```jldoctest varargs_ex + julia> wrapper_new(1.123; type = :ceil) + ERROR: MethodError: objects of type Symbol are not callable + [...] + ``` -```jldoctest varargs_ex -julia> x = 1252.1518 -1252.1518 + Finally, we can test the `wrapper_new` function with the same arguments as for the `wrapper` function. -julia> wrapper_new(Int64, x; type = ceil) -1253 + ```jldoctest varargs_ex + julia> x = 1252.1518 + 1252.1518 -julia> wrapper_new(Int16, x; type = floor) -1252 + julia> wrapper_new(Int64, x; type = ceil) + 1253 -julia> wrapper_new(x; digits = 2) -1252.15 + julia> wrapper_new(Int16, x; type = floor) + 1252 -julia> wrapper_new(x; sigdigits = 3) -1250.0 -``` + julia> wrapper_new(x; digits = 2) + 1252.15 -```@raw html -
-``` + julia> wrapper_new(x; sigdigits = 3) + 1250.0 + ``` ## Anonymous functions diff --git a/docs/src/lecture_04/methods.md b/docs/src/lecture_04/methods.md index 7ca86d5f2..cfcb4e190 100644 --- a/docs/src/lecture_04/methods.md +++ b/docs/src/lecture_04/methods.md @@ -72,9 +72,11 @@ julia> product(1, 4.5) julia> product(:a, :b) ERROR: ArgumentError: product is defined for numbers only. +[...] julia> product("a", "b") ERROR: ArgumentError: product is defined for numbers only. +[...] ``` ## Type hierarchy @@ -90,52 +92,37 @@ AbstractFloat The problem with the `supertype` function is that it does not return the whole supertype hierarchy, but only the closest *larger* supertype. For `Float64` the closest larger supertype is `AbstractFloat`. However, as in the example above, we do not want to use this supertype, since then the function will only work for floating point numbers. -```@raw html -
-
Exercise:
-
-``` +!!! warning "Exercise:" + Create a function `supertypes_tree` which prints the whole tree of all supertypes. If the input type `T` satisfies the following condition `T === Any`, then the function should do nothing. Use the following function declaration: -Create a function `supertypes_tree` which prints the whole tree of all supertypes. If the input type `T` satisfies the following condition `T === Any`, then the function should do nothing. Use the following function declaration: + ```julia + function supertypes_tree(T::Type, level::Int = 0) + # code + end + ``` -```julia -function supertypes_tree(T::Type, level::Int = 0) - # code -end -``` + The optional argument `level` sets the printing indentation level. -The optional argument `level` sets the printing indentation level. + **Hints:** + - Use the `supertype` function in combination with recursion. + - Use the `repeat` function and string with white space `" "` to create a proper indentation. -**Hints:** -- Use the `supertype` function in combination with recursion. -- Use the `repeat` function and string with white space `" "` to create a proper indentation. +!!! details "Solution:" + The `supertypes_tree` function can be defined by: -```@raw html -
-
-Solution: -
-``` + ```jldoctest methods; output = false + function supertypes_tree(T::Type, level::Int = 0) + isequal(T, Any) && return + println(repeat(" ", level), T) + supertypes_tree(supertype(T), level + 1) + return + end -The `supertypes_tree` function can be defined by: + # output + supertypes_tree (generic function with 2 methods) + ``` -```jldoctest methods; output = false -function supertypes_tree(T::Type, level::Int = 0) - isequal(T, Any) && return - println(repeat(" ", level), T) - supertypes_tree(supertype(T), level + 1) - return -end - -# output -supertypes_tree (generic function with 2 methods) -``` - -The first line checks if the given input type is `Any`. If yes, then the function returns nothing. Otherwise, the function prints the type with a proper indentation provided by `repeat(" ", level)`, i.e., four white-spaces repeated `level`-times. The third line calls the `supertypes_tree` function recursively for the supertype of the input type `T` and the level of indentation `level + 1`. - -```@raw html -
-``` + The first line checks if the given input type is `Any`. If yes, then the function returns nothing. Otherwise, the function prints the type with a proper indentation provided by `repeat(" ", level)`, i.e., four white-spaces repeated `level`-times. The third line calls the `supertypes_tree` function recursively for the supertype of the input type `T` and the level of indentation `level + 1`. Now we can use the `supertypes_tree` function to get the whole supertype hierarchy for `Float64`. @@ -167,55 +154,40 @@ julia> subtypes(Number) This function suffers from a similar disadvantage as the `supertype` function: It is impossible to get the whole hierarchy of all subtypes using only this function. -```@raw html -
-
Exercise:
-
-``` - -Create a function `subtypes_tree` which prints the whole tree of all subtypes for the given type. Use the following function declaration: - -```@meta -DocTestSetup = quote - using InteractiveUtils: subtypes -end -``` - -```julia -function subtypes_tree(T::Type, level::Int = 0) - # code -end -``` +!!! warning "Exercise:" + Create a function `subtypes_tree` which prints the whole tree of all subtypes for the given type. Use the following function declaration: -The optional argument `level` sets the printing indentation level. + ```@meta + DocTestSetup = quote + using InteractiveUtils: subtypes + end + ``` -**Hints:** -- Use the `subtypes` function in combination with recursion. -- Use the `repeat` function and string with white space `" "` to create a proper indentation. + ```julia + function subtypes_tree(T::Type, level::Int = 0) + # code + end + ``` -```@raw html -
-
-Solution: -
-``` + The optional argument `level` sets the printing indentation level. -The `subtypes_tree` function is similar to `supertypes_tree`. The only differences are that we do not need to check for the top level of `Any`, and that we need to call the vectorized version `subtypes_tree.` because `subtypes(T)` returns an array. + **Hints:** + - Use the `subtypes` function in combination with recursion. + - Use the `repeat` function and string with white space `" "` to create a proper indentation. -```jldoctest methods; output = false -function subtypes_tree(T::Type, level::Int = 0) - println(repeat(" ", level), T) - subtypes_tree.(subtypes(T), level + 1) - return -end +!!! details "Solution:" + The `subtypes_tree` function is similar to `supertypes_tree`. The only differences are that we do not need to check for the top level of `Any`, and that we need to call the vectorized version `subtypes_tree.` because `subtypes(T)` returns an array. -# output -subtypes_tree (generic function with 2 methods) -``` + ```jldoctest methods; output = false + function subtypes_tree(T::Type, level::Int = 0) + println(repeat(" ", level), T) + subtypes_tree.(subtypes(T), level + 1) + return + end -```@raw html -
-``` + # output + subtypes_tree (generic function with 2 methods) + ``` Now we can use the `subtypes_tree` function to get the whole subtypes hierarchy for the `Number` type. @@ -283,6 +255,7 @@ julia> product("a", "b") julia> product(:a, :b) ERROR: ArgumentError: product is defined for numbers and strings only. +[...] ``` Sometimes, it may be complicated to guess which method is used for concrete inputs. In such a case, there is a useful macro `@which` that returns the method that is called for given arguments. @@ -326,145 +299,120 @@ julia> g(:a) ERROR: MethodError: no method matching g(::Symbol) Closest candidates are: - g(!Matched::Real) - @ Main none:1 g(!Matched::String) @ Main none:1 + g(!Matched::Real) + @ Main none:1 [...] ``` +!!! info "Do not overuse type annotation:" + The `product` function should be defined without the type annotation. It is a good practice not to restrict input argument types unless necessary. The reason is that, in this case, there is no benefit of using the type annotation. It is better to define the function `product_new` by: + ```jldoctest methods; output = false + product_new(x, y) = x * y -```@raw html -
-
Do not overuse type annotation:
-
-``` - -The `product` function should be defined without the type annotation. It is a good practice not to restrict input argument types unless necessary. The reason is that, in this case, there is no benefit of using the type annotation. It is better to define the function `product_new` by: - -```jldoctest methods; output = false -product_new(x, y) = x * y - -# output -product_new (generic function with 1 method) -``` + # output + product_new (generic function with 1 method) + ``` -Then we can apply this function to the same inputs as the original `product` function, and we will get the same results + Then we can apply this function to the same inputs as the original `product` function, and we will get the same results -```jldoctest methods -julia> product(1, 4.5) -4.5 + ```jldoctest methods + julia> product(1, 4.5) + 4.5 -julia> product_new(1, 4.5) -4.5 + julia> product_new(1, 4.5) + 4.5 -julia> product("a", "b") -"ab" + julia> product("a", "b") + "ab" -julia> product_new("a", "b") -"ab" -``` + julia> product_new("a", "b") + "ab" + ``` -with only one exception - -```jldoctest methods -julia> product("a", :a) -ERROR: ArgumentError: product is defined for numbers and strings only. + with only one exception -julia> product_new("a", :a) -ERROR: MethodError: no method matching *(::String, ::Symbol) -[...] -``` + ```jldoctest methods + julia> product("a", :a) + ERROR: ArgumentError: product is defined for numbers and strings only. + [...] -Here we get a different error. However, the error returned by the `product_new` function is more useful because it tells us what the real problem is. We can see that it is impossible to use the `*` operator to multiply a `String` and a `Symbol`. We can decide if this is the desired behaviour, and if not, we can define a method for the `*` operator that will fix it. + julia> product_new("a", :a) + ERROR: MethodError: no method matching *(::String, ::Symbol) + [...] + ``` -```@raw html -
-``` + Here we get a different error. However, the error returned by the `product_new` function is more useful because it tells us what the real problem is. We can see that it is impossible to use the `*` operator to multiply a `String` and a `Symbol`. We can decide if this is the desired behaviour, and if not, we can define a method for the `*` operator that will fix it. We show a simple example when the multiple dispatch is useful. -```@raw html -
-
Exercise:
-
-``` +!!! warning "Exercise:" + We define the abstract type `Student` and specific types `Master` and `Doctoral`. The latter two are defined as structures containing one and three fields, respectively. -We define the abstract type `Student` and specific types `Master` and `Doctoral`. The latter two are defined as structures containing one and three fields, respectively. + ```@example methods + abstract type Student end -```@example methods -abstract type Student end + struct Master <: Student + salary + end -struct Master <: Student - salary -end + struct Doctoral <: Student + salary + exam_mid::Bool + exam_english::Bool + end -struct Doctoral <: Student - salary - exam_mid::Bool - exam_english::Bool -end + nothing # hide + ``` -nothing # hide -``` + We can check that the `subtypes_tree` works correctly on any type, including the type `Student` which we defined. -We can check that the `subtypes_tree` works correctly on any type, including the type `Student` which we defined. + ```julia + julia> subtypes_tree(Student) + Student + Doctoral + Master + ``` -```julia -julia> subtypes_tree(Student) -Student - Doctoral - Master -``` + We create instances of two students by providing values for the struct fields. -We create instances of two students by providing values for the struct fields. + ```@example methods + s1 = Master(5000) + s2 = Doctoral(30000, 1, 0) -```@example methods -s1 = Master(5000) -s2 = Doctoral(30000, 1, 0) + nothing # hide + ``` -nothing # hide -``` + Write the `salary_yearly` function which computes the yearly salary for both student types. The monthly salary is computed from the base salary (which can be accessed via `s1.salary`). Monthly bonus for doctoral students is 2000 for the mid exam and 1000 for the English exam. -Write the `salary_yearly` function which computes the yearly salary for both student types. The monthly salary is computed from the base salary (which can be accessed via `s1.salary`). Monthly bonus for doctoral students is 2000 for the mid exam and 1000 for the English exam. +!!! details "Solution:" + Julia prefers to write many simple functions. We write `salary_yearly` based on the not-yet-defined `salary_monthly` function. -```@raw html -
-
-Solution: -
-``` - -Julia prefers to write many simple functions. We write `salary_yearly` based on the not-yet-defined `salary_monthly` function. + ```@example methods + salary_yearly(s::Student) = 12*salary_monthly(s) -```@example methods -salary_yearly(s::Student) = 12*salary_monthly(s) + nothing # hide + ``` -nothing # hide -``` + We specified that the input to `salary_yearly` is any `Student`. Since `Student` is an abstract type, we can call `salary_yearly` with both `Master` and `Doctoral` student. Now we need to define the `salary_monthly` function. Since the salary is computed in different ways for both students, we write two methods. -We specified that the input to `salary_yearly` is any `Student`. Since `Student` is an abstract type, we can call `salary_yearly` with both `Master` and `Doctoral` student. Now we need to define the `salary_monthly` function. Since the salary is computed in different ways for both students, we write two methods. + ```@example methods + salary_monthly(s::Master) = s.salary + salary_monthly(s::Doctoral) = s.salary + s.exam_mid*2000 + s.exam_english*1000 -```@example methods -salary_monthly(s::Master) = s.salary -salary_monthly(s::Doctoral) = s.salary + s.exam_mid*2000 + s.exam_english*1000 + nothing # hide + ``` -nothing # hide -``` + Both methods have the same name (they are the same function) but have different inputs. While the first one is used for `Master` students, the second one for `Doctoral` students. Now we print the salary. -Both methods have the same name (they are the same function) but have different inputs. While the first one is used for `Master` students, the second one for `Doctoral` students. Now we print the salary. + ```@example methods + println("The yearly salary is $(salary_yearly(s1)).") + println("The yearly salary is $(salary_yearly(s2)).") -```@example methods -println("The yearly salary is $(salary_yearly(s1)).") -println("The yearly salary is $(salary_yearly(s2)).") - -nothing # hide -``` - -```@raw html -
-``` + nothing # hide + ``` ## Method ambiguities @@ -495,10 +443,10 @@ julia> f(2.0, 3.0) ERROR: MethodError: f(::Float64, ::Float64) is ambiguous. Candidates: - f(x::Float64, y) - @ Main none:1 f(x, y::Float64) @ Main none:1 + f(x::Float64, y) + @ Main none:1 Possible fix, define f(::Float64, ::Float64) diff --git a/docs/src/lecture_04/scope.md b/docs/src/lecture_04/scope.md index 5993f1e18..efbd12a1e 100644 --- a/docs/src/lecture_04/scope.md +++ b/docs/src/lecture_04/scope.md @@ -95,7 +95,7 @@ While variables can be read externally, they can only be changed within the modu ```jldoctest global julia> b = 4 -ERROR: cannot assign a value to imported variable A.b from module Main +ERROR: cannot assign a value to imported variable b [...] ``` diff --git a/docs/src/lecture_05/Plots.md b/docs/src/lecture_05/Plots.md index 5c86f2909..7090d07d0 100644 --- a/docs/src/lecture_05/Plots.md +++ b/docs/src/lecture_05/Plots.md @@ -115,104 +115,89 @@ Descriptions for these attributes can be found using the attribute name without plotattr("guide") ``` -```@raw html -
-
Exercise:
-
-``` - -Consider the following set of equations - -```math -\begin{aligned} -x(t) & = \cos(3t), \\ -y(t) & = \sin(2t),\\ -\end{aligned} -``` - -where ``t \in [0, 2\pi]``. Create a plot of the curve described by the equations above. Use plot attributes to set the following properties -1. The line width should start at `1`, increase to `50` and then decrease back to `1`. -2. The line color should change with the changing line width. -Use `:viridis` color scheme or any other [color scheme](http://docs.juliaplots.org/latest/generated/colorschemes/) supported by the Plots package. Use additional plot attributes to get a nice looking graph. - -**Hints:** -- use the `palette` function combined with the `collect` function to generate a vector of colors from the `:viridis` color scheme. -- remove all decorators by using: `axis = nothing`, `border = :none`. - -```@raw html -
-
-Solution: -
-``` - -We first define vector `t` by the `range` function with a predefined length. - -```@example plots -n = 1000 -t = range(0, 2π; length = n) -nothing # hide -``` - -Then we define functions described by the set of equations in the exercise description. - -```@example plots -fx(t) = cos(3t) -fy(t) = sin(2t) -nothing # hide -``` - -Since we want to use different plot attributes for each point, the attributes will have length `n`. Since the linewidth should first increase and then decrease, we use twice `range` and then `vcat` them into one column vector. - -```@example plots -linewidth = vcat( - range(1, 50; length = n ÷ 2), - range(50, 1; length = n - n ÷ 2) -) -nothing # hide -``` - -We used integer division to set the length in the `range` function. In the same way, we can create a vector of colors. The Plots package provides the `palette` function that allows generating equidistantly spaced colors from a color scheme. - -```@repl plots -c = palette(:viridis, 2); -typeof(c) -``` - -The `palette` function returns the `ColorPalette` type. Since we want to concatenate two vectors of colors together, we have to use the `collect` function to extract the vector of colors from the `ColorPalette` type. - -```@repl plots -c = collect(palette(:viridis, 2)) -``` - -Now we can use a similar code as before in combination with the `rev` keyword to change the order. - -```@example plots -color = vcat( - collect(palette(:viridis, n ÷ 2)), - collect(palette(:viridis, n - n ÷ 2; rev = true)) -) -nothing # hide -``` - -Finally, we can call the `plot` function with input arguments and attributes defined above. We use `axis = nothing` and `border = :none` to remove all decorators such as ticks or axis frame. - -```@example plots -plot(fx.(t), fy.(t); - linewidth, - color, - lims = (-1.2, 1.2), - legend = false, - axis = nothing, - border = :none, -) - -savefig("plot_exercise1.svg") # hide -``` - -```@raw html -
-``` +!!! warning "Exercise:" + Consider the following set of equations + + ```math + \begin{aligned} + x(t) & = \cos(3t), \\ + y(t) & = \sin(2t),\\ + \end{aligned} + ``` + + where ``t \in [0, 2\pi]``. Create a plot of the curve described by the equations above. Use plot attributes to set the following properties + 1. The line width should start at `1`, increase to `50` and then decrease back to `1`. + 2. The line color should change with the changing line width. + Use `:viridis` color scheme or any other [color scheme](http://docs.juliaplots.org/latest/generated/colorschemes/) supported by the Plots package. Use additional plot attributes to get a nice looking graph. + + **Hints:** + - use the `palette` function combined with the `collect` function to generate a vector of colors from the `:viridis` color scheme. + - remove all decorators by using: `axis = nothing`, `border = :none`. + +!!! details "Solution:" + We first define vector `t` by the `range` function with a predefined length. + + ```@example plots + n = 1000 + t = range(0, 2π; length = n) + nothing # hide + ``` + + Then we define functions described by the set of equations in the exercise description. + + ```@example plots + fx(t) = cos(3t) + fy(t) = sin(2t) + nothing # hide + ``` + + Since we want to use different plot attributes for each point, the attributes will have length `n`. Since the linewidth should first increase and then decrease, we use twice `range` and then `vcat` them into one column vector. + + ```@example plots + linewidth = vcat( + range(1, 50; length = n ÷ 2), + range(50, 1; length = n - n ÷ 2) + ) + nothing # hide + ``` + + We used integer division to set the length in the `range` function. In the same way, we can create a vector of colors. The Plots package provides the `palette` function that allows generating equidistantly spaced colors from a color scheme. + + ```@repl plots + c = palette(:viridis, 2); + typeof(c) + ``` + + The `palette` function returns the `ColorPalette` type. Since we want to concatenate two vectors of colors together, we have to use the `collect` function to extract the vector of colors from the `ColorPalette` type. + + ```@repl plots + c = collect(palette(:viridis, 2)) + ``` + + Now we can use a similar code as before in combination with the `rev` keyword to change the order. + + ```@example plots + color = vcat( + collect(palette(:viridis, n ÷ 2)), + collect(palette(:viridis, n - n ÷ 2; rev = true)) + ) + nothing # hide + ``` + + Finally, we can call the `plot` function with input arguments and attributes defined above. We use `axis = nothing` and `border = :none` to remove all decorators such as ticks or axis frame. + + ```@example plots + plot(fx.(t), fy.(t); + linewidth, + color, + lims = (-1.2, 1.2), + legend = false, + axis = nothing, + border = :none, + ) + + savefig("plot_exercise1.svg") # hide + ``` ![](plot_exercise1.svg) @@ -242,55 +227,40 @@ Instead of a vector of values, we can also use a similar syntax as for ranges wi plot(sin, x -> sin(2x), 0, 2π, 100; linewidth = 2, label = "") ``` -```@raw html -
-
Exercise:
-
-``` +!!! warning "Exercise:" + Create a plot given by the following set of equations: -Create a plot given by the following set of equations: + ```math + \begin{aligned} + x(t) & = (a + b)\cos(t) - b \cdot \cos \left( \left(\frac{a}{b} + 1 \right)t \right), \\ + y(t) & = (a + b)\sin(t) - b \cdot \sin \left( \left(\frac{a}{b} + 1 \right)t \right), \\ + \end{aligned} + ``` -```math -\begin{aligned} -x(t) & = (a + b)\cos(t) - b \cdot \cos \left( \left(\frac{a}{b} + 1 \right)t \right), \\ -y(t) & = (a + b)\sin(t) - b \cdot \sin \left( \left(\frac{a}{b} + 1 \right)t \right), \\ -\end{aligned} -``` + where ``a = 4.23``, ``b = 2.35`` and ``t \in [-15, 20]``. Use additional plot attributes to get a nicely looking graph. -where ``a = 4.23``, ``b = 2.35`` and ``t \in [-15, 20]``. Use additional plot attributes to get a nicely looking graph. +!!! details "Solution:" + This exercise is straightforward. We first define the functions described by the set of equations. -```@raw html -
-
-Solution: -
-``` + ```@example plots_fce + fx(t; a = 4.23, b = 2.35) = (a + b)*cos(t) - b*cos((a/b + 1)*t) + fy(t; a = 4.23, b = 2.35) = (a + b)*sin(t) - b*sin((a/b + 1)*t) -This exercise is straightforward. We first define the functions described by the set of equations. + nothing # hide + ``` -```@example plots_fce -fx(t; a = 4.23, b = 2.35) = (a + b)*cos(t) - b*cos((a/b + 1)*t) -fy(t; a = 4.23, b = 2.35) = (a + b)*sin(t) - b*sin((a/b + 1)*t) + Now we plot these functions. -nothing # hide -``` + ```@example plots_fce + plot(fx, fy, -15, 20, 500; + linewidth = 2, + legend = false, + axis = nothing, + border = :none, + ) -Now we plot these functions. - -```@example plots_fce -plot(fx, fy, -15, 20, 500; - linewidth = 2, - legend = false, - axis = nothing, - border = :none, -) - -savefig("plot_exercise2.svg") # hide -``` - -```@raw html -
-``` + savefig("plot_exercise2.svg") # hide + ``` ![](plot_exercise2.svg) @@ -315,78 +285,63 @@ The second way is to use a specialized function provided for each series type. T scatter(x, y) ``` -```@raw html -
-
Exercise:
-
-``` - -Consider the following function: - -```math -f(x, y) = \frac{x^2 \cdot y^2}{x^4 + y^4}. -``` - -Draw this function for ``x, y \in [-5, 5]``. Use the following three plot series `contourf`, `heatmap`, and `surface` with the following settings: -- `:viridis` color scheme, -- camera angle `(25, 65)`, -- no legend, color bar, or decorators (`axis`, `frame` and `ticks`). +!!! warning "Exercise:" + Consider the following function: -```@raw html -
-
-Solution: -
-``` + ```math + f(x, y) = \frac{x^2 \cdot y^2}{x^4 + y^4}. + ``` -As usual, we first define the function and the values, where it will be evaluated. + Draw this function for ``x, y \in [-5, 5]``. Use the following three plot series `contourf`, `heatmap`, and `surface` with the following settings: + - `:viridis` color scheme, + - camera angle `(25, 65)`, + - no legend, color bar, or decorators (`axis`, `frame` and `ticks`). -```@example plots_srs -x = range(-5, 5; length = 200) -fz(x, y) = x^2*y^2/(x^4 + y^4) -nothing # hide -``` +!!! details "Solution:" + As usual, we first define the function and the values, where it will be evaluated. -Since we want to create three different plots with the same attributes, we create a named tuple to store the attribute values. This allows us to reuse them. + ```@example plots_srs + x = range(-5, 5; length = 200) + fz(x, y) = x^2*y^2/(x^4 + y^4) + nothing # hide + ``` -```@example plots_srs -kwargs = ( - color = :viridis, - legend = false, - cbar = false, - axis = nothing, - border = :none, -) -nothing # hide -``` + Since we want to create three different plots with the same attributes, we create a named tuple to store the attribute values. This allows us to reuse them. -We can use the `plot` function with the `seriestype = :contourf` keyword to draw a filled contour plot. The simpler option is to use the `contourf` function. + ```@example plots_srs + kwargs = ( + color = :viridis, + legend = false, + cbar = false, + axis = nothing, + border = :none, + ) + nothing # hide + ``` -```julia -contourf(x, x, fz; kwargs...) # or plot(x, x, fz; seriestype = :contourf, kwargs...) -``` + We can use the `plot` function with the `seriestype = :contourf` keyword to draw a filled contour plot. The simpler option is to use the `contourf` function. -![](plots_srs_ex1.svg) + ```julia + contourf(x, x, fz; kwargs...) # or plot(x, x, fz; seriestype = :contourf, kwargs...) + ``` -We used the triple-dot syntax to unpack keyword arguments. Recall that in this case, the semi-colon is mandatory. Similarly, we can draw the `heatmap` plot. + ![](plots_srs_ex1.svg) -```julia -heatmap(x, x, fz; kwargs...) -``` + We used the triple-dot syntax to unpack keyword arguments. Recall that in this case, the semi-colon is mandatory. Similarly, we can draw the `heatmap` plot. -![](plots_srs_ex2.svg) + ```julia + heatmap(x, x, fz; kwargs...) + ``` -For the `surface` plot, we can change the camera angle by setting the `camera` attribute. + ![](plots_srs_ex2.svg) -```julia -surface(x, x, fz; camera = (25, 65), kwargs...) -``` + For the `surface` plot, we can change the camera angle by setting the `camera` attribute. -![](plots_srs_ex3.svg) + ```julia + surface(x, x, fz; camera = (25, 65), kwargs...) + ``` -```@raw html -
-``` + ![](plots_srs_ex3.svg) ## Subplots diff --git a/docs/src/lecture_05/otherpackages.md b/docs/src/lecture_05/otherpackages.md index d9eb0e2bb..60fa99907 100644 --- a/docs/src/lecture_05/otherpackages.md +++ b/docs/src/lecture_05/otherpackages.md @@ -58,88 +58,73 @@ histogram(x; normalize = :pdf, legend = false, opacity = 0.5) plot!(D; linewidth = 2, xlabel = "x", ylabel = "pdf(x)") ``` -```@raw html -
-
Exercise:
-
-``` - -Create a figure that shows the gamma distributions with the following parameters: `(2, 2)`, `(9, 0.5)`, `(7.5, 1)` and `(0.5, 1)`. - -**Hint:** to plot cumulative probability functions, use the Plots ability to plot functions. - -```@raw html -
-
-Solution: -
-``` - -The easiest way to create multiple distributions is to use the broadcasting system. - -```@example distr -Ds = Gamma.([2, 9, 7.5, 0.5], [2, 0.5, 1, 1]) -nothing #hide -``` - -Similarly, we use broadcasting to create a vector of labels. - -```@example distr -labels = reshape(string.("Gamma", params.(Ds)), 1, :) -nothing #hide -``` - -We need to reshape the labels to become a row vector. The reason is that we want to plot multiple distributions, and the Plot package expects that labels will be a row vector. Now, we call the `plot` function to plot all distributions. - -```@example distr -plot(Ds; - xaxis = ("x", (0, 20)), - yaxis = ("pdf(x)", (0, 0.5)), - labels = labels, - linewidth = 2, - legend = :topright, -) -``` - -A plot of the cumulative probability functions cannot be done in the same way. However, StatsPlots provides the `func` keyword argument that allows specifying which function should be plotted. - -```@example distr -plot(Ds; - func = cdf, - xaxis = ("x", (0, 20)), - yaxis = ("cdf(x)", (0, 1.05)), - labels = labels, - linewidth = 2, - legend = :bottomright, -) -``` - -Another possibility is to use the Plots package directly. To do so, we need to define a function with one argument, which at a given point returns the value of the cumulative probability function. Such functions for all our distributions can be easily defined as anonymous functions. - -```@example distr -cdfs = [x -> cdf(D, x) for D in Ds] -nothing # hide -``` - -The previous expression returns a vector of functions. Now we can use the `plot` function to create a curve for each element of the vector of cumulative probability functions. The example below creates these curves for ``x`` from ``0`` to ``20``. - -```@example distr -plot(cdfs, 0, 20; - xaxis = ("x", (0, 20)), - yaxis = ("cdf(x)", (0, 1.05)), - labels = labels, - linewidth = 2, - legend = :bottomright, -) - -savefig("Gamma_cdf.svg") # hide -``` - -![](Gamma_cdf.svg) - -```@raw html -
-``` +!!! warning "Exercise:" + Create a figure that shows the gamma distributions with the following parameters: `(2, 2)`, `(9, 0.5)`, `(7.5, 1)` and `(0.5, 1)`. + + **Hint:** to plot cumulative probability functions, use the Plots ability to plot functions. + +!!! details "Solution:" + The easiest way to create multiple distributions is to use the broadcasting system. + + ```@example distr + Ds = Gamma.([2, 9, 7.5, 0.5], [2, 0.5, 1, 1]) + nothing #hide + ``` + + Similarly, we use broadcasting to create a vector of labels. + + ```@example distr + labels = reshape(string.("Gamma", params.(Ds)), 1, :) + nothing #hide + ``` + + We need to reshape the labels to become a row vector. The reason is that we want to plot multiple distributions, and the Plot package expects that labels will be a row vector. Now, we call the `plot` function to plot all distributions. + + ```@example distr + plot(Ds; + xaxis = ("x", (0, 20)), + yaxis = ("pdf(x)", (0, 0.5)), + labels = labels, + linewidth = 2, + legend = :topright, + ) + ``` + + A plot of the cumulative probability functions cannot be done in the same way. However, StatsPlots provides the `func` keyword argument that allows specifying which function should be plotted. + + ```@example distr + plot(Ds; + func = cdf, + xaxis = ("x", (0, 20)), + yaxis = ("cdf(x)", (0, 1.05)), + labels = labels, + linewidth = 2, + legend = :bottomright, + ) + ``` + + Another possibility is to use the Plots package directly. To do so, we need to define a function with one argument, which at a given point returns the value of the cumulative probability function. Such functions for all our distributions can be easily defined as anonymous functions. + + ```@example distr + cdfs = [x -> cdf(D, x) for D in Ds] + nothing # hide + ``` + + The previous expression returns a vector of functions. Now we can use the `plot` function to create a curve for each element of the vector of cumulative probability functions. The example below creates these curves for ``x`` from ``0`` to ``20``. + + ```@example distr + plot(cdfs, 0, 20; + xaxis = ("x", (0, 20)), + yaxis = ("cdf(x)", (0, 1.05)), + labels = labels, + linewidth = 2, + legend = :bottomright, + ) + + savefig("Gamma_cdf.svg") # hide + ``` + + ![](Gamma_cdf.svg) ## BSON.jl diff --git a/docs/src/lecture_06/compositetypes.md b/docs/src/lecture_06/compositetypes.md index 1786adfd4..1976ce9bf 100644 --- a/docs/src/lecture_06/compositetypes.md +++ b/docs/src/lecture_06/compositetypes.md @@ -186,6 +186,7 @@ Composite types declared with `struct` keyword are immutable and cannot be modif ```jldoctest structs julia> r.bottomleft = [2;2] ERROR: setfield!: immutable struct of type Rectangle cannot be changed +[...] ``` However, immutability is not recursive. If an immutable object contains a mutable object, such as an array, elements of this mutable object can be modified. Even though `Rectangle` is an immutable type, its `bottomleft` field is a mutable array and can be changed. @@ -246,41 +247,32 @@ julia> mr MutableRectangle([1.0, 2.0], 1.5, 2.5) ``` -```@raw html -
-
Type unions:
-
-``` +!!! info "Type unions:" + The `area` function defined earlier will only work for `Rectangle` but not for `MutableRectangle` types. To define it for both types, we need type unions. The `Union` keyword creates a supertype of its inputs. -The `area` function defined earlier will only work for `Rectangle` but not for `MutableRectangle` types. To define it for both types, we need type unions. The `Union` keyword creates a supertype of its inputs. + ```jldoctest structs + julia> const AbstractRectangle = Union{Rectangle, MutableRectangle} + Union{MutableRectangle, Rectangle} -```jldoctest structs -julia> const AbstractRectangle = Union{Rectangle, MutableRectangle} -Union{MutableRectangle, Rectangle} + julia> Rectangle <: AbstractRectangle + true -julia> Rectangle <: AbstractRectangle -true - -julia> MutableRectangle <: AbstractRectangle -true -``` - -We now create the `perimeter(r::AbstractRectangle)` function. Since we specify that its input is an `AbstractRectangle`, it will work for both mutable `MutableRectangle` and immutable `Rectangle` types. + julia> MutableRectangle <: AbstractRectangle + true + ``` -```jldoctest structs -julia> perimeter(r::AbstractRectangle) = 2*(r.width + r.height) -perimeter (generic function with 1 method) + We now create the `perimeter(r::AbstractRectangle)` function. Since we specify that its input is an `AbstractRectangle`, it will work for both mutable `MutableRectangle` and immutable `Rectangle` types. -julia> perimeter(r) -14 + ```jldoctest structs + julia> perimeter(r::AbstractRectangle) = 2*(r.width + r.height) + perimeter (generic function with 1 method) -julia> perimeter(mr) -8.0 -``` + julia> perimeter(r) + 14 -```@raw html -
-``` + julia> perimeter(mr) + 8.0 + ``` ## Parametric types @@ -392,56 +384,40 @@ ERROR: MethodError: no method matching Point(::Int64, ::Float64) Closest candidates are: Point(::T, !Matched::T) where T<:Real - @ Main none:3 [...] ``` This situation can be handled by defining custom constructors, as we will discuss in the next section. -```@raw html -
-
Exercise:
-
-``` - -Define a structure that represents 3D-points. Do not forget to define it as a subtype of the AbstractPoint type. Then add a new method to the `coordinates` function. - -```@raw html -
-
-Solution: -
-``` - -There are several possibilities for defining the structure. We define it as a structure with three fields. Another option is to use a tuple to store the point coordinates. +!!! warning "Exercise:" + Define a structure that represents 3D-points. Do not forget to define it as a subtype of the AbstractPoint type. Then add a new method to the `coordinates` function. -```jldoctest structs; output = false -struct Point3D{T <: Real} <: AbstractPoint{T} - x::T - y::T - z::T -end +!!! details "Solution:" + There are several possibilities for defining the structure. We define it as a structure with three fields. Another option is to use a tuple to store the point coordinates. -coordinates(p::Point3D) = (p.x, p.y, p.z) + ```jldoctest structs; output = false + struct Point3D{T <: Real} <: AbstractPoint{T} + x::T + y::T + z::T + end -# output + coordinates(p::Point3D) = (p.x, p.y, p.z) -coordinates (generic function with 2 methods) -``` + # output -Since the `show` function was defined for the abstract type `AbstractPoint` and uses the `coordinates` function, the custom print is applied to `Point3D` without the need for further changes. + coordinates (generic function with 2 methods) + ``` -```jldoctest structs -julia> Point3D(1, 2, 3) -(1, 2, 3) + Since the `show` function was defined for the abstract type `AbstractPoint` and uses the `coordinates` function, the custom print is applied to `Point3D` without the need for further changes. -julia> Point3D{Float32}(1, 2, 3) -(1.0f0, 2.0f0, 3.0f0) -``` + ```jldoctest structs + julia> Point3D(1, 2, 3) + (1, 2, 3) -```@raw html -
-``` + julia> Point3D{Float32}(1, 2, 3) + (1.0f0, 2.0f0, 3.0f0) + ``` ## Constructors @@ -501,71 +477,57 @@ ERROR: the first argument must be less than or equal to the second one Inner constructors have an additional advantage. Since outer constructors create the object by calling an appropriate inner constructor, even if we define any number of outer constructors, the resulting instances of the `OrderedPair` type will always satisfy `x <= y`. -```@raw html -
-
Exercise:
-
-``` - -Define a structure that represents ND-points and stores their coordinates as `Tuple`. Do not forget to define it as a subtype of the `AbstractPoint` type. Redefine the default inner constructor to create an instance of `PointND` from different types. Then add a new method to the `coordinates` function, and define function `dim` that returns the dimension of the point. +!!! warning "Exercise:" + Define a structure that represents ND-points and stores their coordinates as `Tuple`. Do not forget to define it as a subtype of the `AbstractPoint` type. Redefine the default inner constructor to create an instance of `PointND` from different types. Then add a new method to the `coordinates` function, and define function `dim` that returns the dimension of the point. -**Hints:** use the `new` function in the definition of the new inner constructor. + **Hints:** use the `new` function in the definition of the new inner constructor. -**Bonus:** Tuples with elements of the same type can be described by the special type `NTuple{N, T}`, where `N` is the number of elements and `T` their type. + **Bonus:** Tuples with elements of the same type can be described by the special type `NTuple{N, T}`, where `N` is the number of elements and `T` their type. -```jldoctest -julia> NTuple{2, Int64} <: Tuple{Int64, Int64} -true -``` + ```jldoctest + julia> NTuple{2, Int64} <: Tuple{Int64, Int64} + true + ``` -```@raw html -
-
-Solution: -
-``` +!!! details "Solution:" + In this case, we can use an inner constructor with the optional number of input arguments. In the definition below, we use type annotation to set these arguments to be real numbers. Since we use the `new` function and our type is parametric, we have to specify `N` and type `T`. -In this case, we can use an inner constructor with the optional number of input arguments. In the definition below, we use type annotation to set these arguments to be real numbers. Since we use the `new` function and our type is parametric, we have to specify `N` and type `T`. + ```jldoctest structs; output = false + struct PointND{N, T <: Real} <: AbstractPoint{T} + x::NTuple{N, T} -```jldoctest structs; output = false -struct PointND{N, T <: Real} <: AbstractPoint{T} - x::NTuple{N, T} - - function PointND(args::Real...) - vals = promote(args...) - return new{length(args), eltype(vals)}(vals) + function PointND(args::Real...) + vals = promote(args...) + return new{length(args), eltype(vals)}(vals) + end end -end -coordinates(p::PointND) = p.x -dim(p::PointND{N}) where N = N + coordinates(p::PointND) = p.x + dim(p::PointND{N}) where N = N -# output + # output -dim (generic function with 1 method) -``` + dim (generic function with 1 method) + ``` -Note that we use the parameter `N` in the definition of the `dim` function. + Note that we use the parameter `N` in the definition of the `dim` function. -Since the `show` function was defined for the abstract type `AbstractPoint` and uses the `coordinates` function, the custom printing function is immediately applied to the new type. Since we redefined the default constructors, we can create an instance of the `PointND` type from inputs of mixed types. + Since the `show` function was defined for the abstract type `AbstractPoint` and uses the `coordinates` function, the custom printing function is immediately applied to the new type. Since we redefined the default constructors, we can create an instance of the `PointND` type from inputs of mixed types. -```jldoctest structs -julia> p = PointND(1, 2) -(1, 2) + ```jldoctest structs + julia> p = PointND(1, 2) + (1, 2) -julia> dim(p) -2 + julia> dim(p) + 2 -julia> p = PointND(1, 2.2, 3, 4.5) -(1.0, 2.2, 3.0, 4.5) + julia> p = PointND(1, 2.2, 3, 4.5) + (1.0, 2.2, 3.0, 4.5) -julia> dim(p) -4 -``` + julia> dim(p) + 4 + ``` -```@raw html -
-``` ## Default field values @@ -609,184 +571,151 @@ MyType(3, 2.3, "hello") julia> MyType(; a = 5, b = 4.5) MyType(5, 4.5, "hello") ``` +!!! info "Function-like objects (functors):" + Methods are associated with types; therefore, it is possible to make an arbitrary Julia object "callable" by adding methods to its type. Such "callable" objects are sometimes called **functors**. Using this technique to the `MyType` defined above, we can define a method that returns values of all its fields. -```@raw html -
-
Function-like objects (functors):
-
-``` + ```jldoctest structs + julia> (m::MyType)() = (m.a, m.b, m.c) -Methods are associated with types; therefore, it is possible to make an arbitrary Julia object "callable" by adding methods to its type. Such "callable" objects are sometimes called **functors**. Using this technique to the `MyType` defined above, we can define a method that returns values of all its fields. + julia> m = MyType(; a = 5, b = 4.5) + MyType(5, 4.5, "hello") -```jldoctest structs -julia> (m::MyType)() = (m.a, m.b, m.c) - -julia> m = MyType(; a = 5, b = 4.5) -MyType(5, 4.5, "hello") - -julia> m() -(5, 4.5, "hello") -``` - -Moreover, we can use multiple-dispatch for functors. We show an example, where the functor has a different behaviour when it is called with a number and a string. - -```jldoctest structs; output = false -(m::MyType)(x::Real) = m.a*x + m.b -(m::MyType)(x::String) = "$(m.c), $(x)" - -# output + julia> m() + (5, 4.5, "hello") + ``` -``` + Moreover, we can use multiple-dispatch for functors. We show an example, where the functor has a different behaviour when it is called with a number and a string. -These two methods give different results. + ```jldoctest structs; output = false + (m::MyType)(x::Real) = m.a*x + m.b + (m::MyType)(x::String) = "$(m.c), $(x)" -```jldoctest structs -julia> m(1) -9.5 + # output -julia> m("world") -"hello, world" -``` + ``` -```@raw html -
-``` + These two methods give different results. -```@raw html -
-
Exercise:
-
-``` + ```jldoctest structs + julia> m(1) + 9.5 -[Gaussian distribution](https://en.wikipedia.org/wiki/Normal_distribution) is uniquely represented by its mean ``\mu`` and variance ``\sigma^2>0``. Write a structure `Gauss` with the proper fields and an inner constructor that checks if the input parameters are correct. Initialization without arguments `Gauss()` should return the standardized normal distribution (`` \mu = 0`` and `` \sigma = 1``). Define a functor that computes the probability density function at a given point defined by + julia> m("world") + "hello, world" + ``` -```math -f_{\mu, \sigma}(x) = \frac{1}{\sigma \sqrt{ 2\pi }} \exp\left\{ -\frac{1}{2} \left( \frac{x - \mu}{\sigma} \right) ^2 \right\}, -``` +!!! warning "Exercise:" + [Gaussian distribution](https://en.wikipedia.org/wiki/Normal_distribution) is uniquely represented by its mean ``\mu`` and variance ``\sigma^2>0``. Write a structure `Gauss` with the proper fields and an inner constructor that checks if the input parameters are correct. Initialization without arguments `Gauss()` should return the standardized normal distribution (`` \mu = 0`` and `` \sigma = 1``). Define a functor that computes the probability density function at a given point defined by -Verify that the probability density function is defined correctly, i.e., its integral equals 1. + ```math + f_{\mu, \sigma}(x) = \frac{1}{\sigma \sqrt{ 2\pi }} \exp\left\{ -\frac{1}{2} \left( \frac{x - \mu}{\sigma} \right) ^2 \right\}, + ``` -```@raw html -
-
-Solution: -
-``` + Verify that the probability density function is defined correctly, i.e., its integral equals 1. -One possible way to define this structure is the `@kwdef` macro, where we specify the default parameters. We also define an inner constructor that promotes the inputs to a same type, and checks if the variance is positive. +!!! details "Solution:" + One possible way to define this structure is the `@kwdef` macro, where we specify the default parameters. We also define an inner constructor that promotes the inputs to a same type, and checks if the variance is positive. -```jldoctest structs_gauss; output = false -Base.@kwdef struct Gauss{T<:Real} - μ::T = 0 - σ::T = 1 + ```jldoctest structs_gauss; output = false + Base.@kwdef struct Gauss{T<:Real} + μ::T = 0 + σ::T = 1 - function Gauss(μ::Real, σ::Real) - σ^2 > 0 || error("the variance `σ^2` must be positive") - pars = promote(μ, σ) - return new{eltype(pars)}(pars...) + function Gauss(μ::Real, σ::Real) + σ^2 > 0 || error("the variance `σ^2` must be positive") + pars = promote(μ, σ) + return new{eltype(pars)}(pars...) + end end -end -# output + # output -``` + ``` -We specified the parameter `T` by `eltype(pars)` in the call of the `new` function. The probability density function can be defined as a functor in the following way: + We specified the parameter `T` by `eltype(pars)` in the call of the `new` function. The probability density function can be defined as a functor in the following way: -```jldoctest structs_gauss; output = false -(d::Gauss)(x::Real) = exp(-1/2 * ((x - d.μ)/d.σ)^2)/(d.σ * sqrt(2*π)) + ```jldoctest structs_gauss; output = false + (d::Gauss)(x::Real) = exp(-1/2 * ((x - d.μ)/d.σ)^2)/(d.σ * sqrt(2*π)) -# output + # output -``` - -We use type annotation to ensure that all input arguments are real numbers. + ``` -```jldoctest structs_gauss -julia> gauss = Gauss() -Gauss{Int64}(0, 1) + We use type annotation to ensure that all input arguments are real numbers. -julia> gauss(0) -0.3989422804014327 -``` + ```jldoctest structs_gauss + julia> gauss = Gauss() + Gauss{Int64}(0, 1) -The integral of the probability density function over the real line should equal one. We check it numerically by discretizing the integral into a finite sum. + julia> gauss(0) + 0.3989422804014327 + ``` -```jldoctest structs_gauss -julia> step = 0.01 -0.01 + The integral of the probability density function over the real line should equal one. We check it numerically by discretizing the integral into a finite sum. -julia> x = -100:step:100; + ```jldoctest structs_gauss + julia> step = 0.01 + 0.01 -julia> sum(Gauss(), x) * step -1.0000000000000002 + julia> x = -100:step:100; -julia> sum(Gauss(0.1, 2.3), x) * step -1.0 -``` + julia> sum(Gauss(), x) * step + 1.0000000000000002 -We use `sum` with a function as the first input argument and apply it to each value of the second argument. This is possible because we defined a functor for `Gauss`. The result is the same as `sum(Gauss().(x))`. The difference is that the former, similarly to generators, does not allocate an array. + julia> sum(Gauss(0.1, 2.3), x) * step + 1.0 + ``` -```@raw html -
-``` + We use `sum` with a function as the first input argument and apply it to each value of the second argument. This is possible because we defined a functor for `Gauss`. The result is the same as `sum(Gauss().(x))`. The difference is that the former, similarly to generators, does not allocate an array. -```@raw html -
-
Plot recipes:
-
-``` +!!! compat "Plot recipes:" + The previous exercise defined a new type representing the Gaussian distribution. We also defined a functor that computes the probability density function of this distribution. It makes sense to visualize the probability density function using the [Plots](@ref Plots.jl) package. Unfortunately, it is not possible to use [Function plotting](@ref Function-plotting), i.e., the following will not work even though the `Gauss` type is callable. -The previous exercise defined a new type representing the Gaussian distribution. We also defined a functor that computes the probability density function of this distribution. It makes sense to visualize the probability density function using the [Plots](@ref Plots.jl) package. Unfortunately, it is not possible to use [Function plotting](@ref Function-plotting), i.e., the following will not work even though the `Gauss` type is callable. + ```julia + plot(x, Gauss()) + ``` -```julia -plot(x, Gauss()) -``` + Using the system of Julia types, it is possible to obtain special behaviour for a certain type only by defining a new method for this type. For example, if we use the `plot` function, all input data and plot attributes are preprocessed to some standard format and then the final graph is created. Due to the Julia type system, we can easily change how this preprocessing happens and define special behaviour for custom types. -Using the system of Julia types, it is possible to obtain special behaviour for a certain type only by defining a new method for this type. For example, if we use the `plot` function, all input data and plot attributes are preprocessed to some standard format and then the final graph is created. Due to the Julia type system, we can easily change how this preprocessing happens and define special behaviour for custom types. + For plotting, this is done by the `@recipe` macro from the [RecipesBase](https://github.com/JuliaPlots/RecipesBase.jl) package. The RecipesBase package provides the functionality related to creating custom plots and the Plots package uses this functionality. Moreover, since the RecipesBase package is much smaller, its first run is faster. The syntax is straightforward. In the function head, we define two inputs: our type and input `x`. In the function body, we define plot attributes in the same way as if we pass them into the `plot` function. Finally, we define the output of the function. -For plotting, this is done by the `@recipe` macro from the [RecipesBase](https://github.com/JuliaPlots/RecipesBase.jl) package. The RecipesBase package provides the functionality related to creating custom plots and the Plots package uses this functionality. Moreover, since the RecipesBase package is much smaller, its first run is faster. The syntax is straightforward. In the function head, we define two inputs: our type and input `x`. In the function body, we define plot attributes in the same way as if we pass them into the `plot` function. Finally, we define the output of the function. + ```julia + using RecipesBase -```julia -using RecipesBase - -@recipe function f(d::Gauss, x = (d.μ - 4d.σ):0.1:(d.μ + 4d.σ)) - seriestype := :path - label --> "Gauss(μ = $(d.μ), σ = $(d.σ))" - xguide --> "x" - yguide --> "f(x)" - linewidth --> 2 - return x, d.(x) -end -``` + @recipe function f(d::Gauss, x = (d.μ - 4d.σ):0.1:(d.μ + 4d.σ)) + seriestype := :path + label --> "Gauss(μ = $(d.μ), σ = $(d.σ))" + xguide --> "x" + yguide --> "f(x)" + linewidth --> 2 + return x, d.(x) + end + ``` -The operators `:=` and `-->` are specific for this package. Both set default values for plotting attributes. The difference is that the default values can be changed for `-->` but cannot be changed for `:=`. + The operators `:=` and `-->` are specific for this package. Both set default values for plotting attributes. The difference is that the default values can be changed for `-->` but cannot be changed for `:=`. -The recipe above is equivalent to calling the `plot` function. + The recipe above is equivalent to calling the `plot` function. -```julia -d = Gauss() -plot(x, d.(x); - seriestype := :path, - label = "Gauss(μ = $(d.μ), σ = $(d.σ))", - xguide = "x", - yguide = "f(x)", - linewidth = 2 -) -``` + ```julia + d = Gauss() + plot(x, d.(x); + seriestype := :path, + label = "Gauss(μ = $(d.μ), σ = $(d.σ))", + xguide = "x", + yguide = "f(x)", + linewidth = 2 + ) + ``` -With the new plot recipe, we can plot the probability density function of the Gaussian distribution with different parameters. + With the new plot recipe, we can plot the probability density function of the Gaussian distribution with different parameters. -```julia -using Plots + ```julia + using Plots -plot(Gauss()) -plot!(Gauss(4, 2); linewidth = 4, color = :red) -plot!(Gauss(-3, 2); label = "new label", linestyle = :dash) -``` + plot(Gauss()) + plot!(Gauss(4, 2); linewidth = 4, color = :red) + plot!(Gauss(-3, 2); label = "new label", linestyle = :dash) + ``` -![](gauss.svg) -```@raw html -
-``` + ![](gauss.svg) \ No newline at end of file diff --git a/docs/src/lecture_06/currencies.md b/docs/src/lecture_06/currencies.md index 9c570b863..3828c7f2c 100644 --- a/docs/src/lecture_06/currencies.md +++ b/docs/src/lecture_06/currencies.md @@ -86,40 +86,30 @@ ERROR: MethodError: Cannot `convert` an object of type Dollar to an object of ty We used only the abstract type `Currency` to define the `BankAccount` type. This allows us to write a generic code that not constrained to one concrete type. We created an instance of `BankAccount` and added a new transaction. However, we cannot calculate an account balance (the sum of all transactions), and we cannot convert money from one currency to another. In the rest of the lecture, we will fix this, and we will also define basic arithmetic operations such as `+` or `-`. - -```@raw html -
-
Avoid containers with abstract type parameters:
-
-``` - -It is generally not good to use [containers with abstract element type](https://docs.julialang.org/en/v1/manual/performance-tips/#man-performance-abstract-container) as we did for storing transactions. We used it in the example above because we do not want to convert all transactions to a common currency. When we create an array from different types, the promotion system converts these types to their smallest supertype for efficient memory storage. - -```jldoctest -julia> [Int32(123), 1, 1.5, 1.234f0] -4-element Vector{Float64}: - 123.0 - 1.0 - 1.5 - 1.2339999675750732 -``` - -The smallest supertype is `Float64`, and the result is `Array{Float64, 1}`. When we do not want to convert the variables, we must manually specify the resulting array supertype. - -```jldoctest -julia> Real[Int32(123), 1, 1.5, 1.234f0] -4-element Vector{Real}: - 123 - 1 - 1.5 - 1.234f0 -``` - -In this case, the types of all elements are preserved. - -```@raw html -
-``` +!!! info "Avoid containers with abstract type parameters:" + It is generally not good to use [containers with abstract element type](https://docs.julialang.org/en/v1/manual/performance-tips/#man-performance-abstract-container) as we did for storing transactions. We used it in the example above because we do not want to convert all transactions to a common currency. When we create an array from different types, the promotion system converts these types to their smallest supertype for efficient memory storage. + + ```jldoctest + julia> [Int32(123), 1, 1.5, 1.234f0] + 4-element Vector{Float64}: + 123.0 + 1.0 + 1.5 + 1.2339999675750732 + ``` + + The smallest supertype is `Float64`, and the result is `Array{Float64, 1}`. When we do not want to convert the variables, we must manually specify the resulting array supertype. + + ```jldoctest + julia> Real[Int32(123), 1, 1.5, 1.234f0] + 4-element Vector{Real}: + 123 + 1 + 1.5 + 1.234f0 + ``` + + In this case, the types of all elements are preserved. ## Custom print @@ -161,46 +151,31 @@ julia> Euro(1.5) There is one big difference with Python, where we can create a class and define methods inside the class. If we wanted to add a new method, we have to would modify the class. In Julia, we can add or alter methods any time without the necessity to change the class. -```@raw html -
-
Exercise:
-
-``` - -Define a new method for the `symbol` function for `Dollar`. +!!! warning "Exercise:" + Define a new method for the `symbol` function for `Dollar`. -**Hint:** the dollar symbol `$` has a special meaning in Julia. Do not forget to use the `\` symbol when using the dollar symbol in a string. + **Hint:** the dollar symbol `$` has a special meaning in Julia. Do not forget to use the `\` symbol when using the dollar symbol in a string. -```@raw html -
-
-Solution: -
-``` +!!! details "Solution:" + When adding a new method to the `symbol` function, we have to remember that we used the currency type for dispatch, i.e., we have to use `::Type{Dollar}` instead of `::Dollar` in the type annotation. -When adding a new method to the `symbol` function, we have to remember that we used the currency type for dispatch, i.e., we have to use `::Type{Dollar}` instead of `::Dollar` in the type annotation. + ```jldoctest currency; output=false + symbol(::Type{Dollar}) = "\$" -```jldoctest currency; output=false -symbol(::Type{Dollar}) = "\$" + # output -# output + symbol (generic function with 3 methods) + ``` -symbol (generic function with 3 methods) -``` + Now we can check that everything works well. -Now we can check that everything works well. + ```jldoctest currency + julia> Dollar(1) + 1.0 $ -```jldoctest currency -julia> Dollar(1) -1.0 $ - -julia> Dollar(1.5) -1.5 $ -``` - -```@raw html -
-``` + julia> Dollar(1.5) + 1.5 $ + ``` ## Conversion @@ -388,51 +363,36 @@ julia> dlr = convert(Dollar, pnd) 1.3 $ ``` -```@raw html -
-
Exercise:
-
-``` +!!! warning "Exercise:" + The printing style is not ideal because we are usually not interested in more than the first two digits after the decimal point. Redefine the method in the `show` function to print currencies so that the result is rounded to 2 digits after the decimal point. -The printing style is not ideal because we are usually not interested in more than the first two digits after the decimal point. Redefine the method in the `show` function to print currencies so that the result is rounded to 2 digits after the decimal point. +!!! details "Solution:" + Any real number can be rounded to 2 digits after the decimal point by the `round` function with the keyword argument `digits = 2`. Then we can use an almost identical definition of the method as before. -```@raw html -
-
-Solution: -
-``` + ```jldoctest currency; output=false + function Base.show(io::IO, c::T) where {T <: Currency} + val = round(c.value; digits = 2) + return print(io, val, " ", symbol(T)) + end -Any real number can be rounded to 2 digits after the decimal point by the `round` function with the keyword argument `digits = 2`. Then we can use an almost identical definition of the method as before. + # output -```jldoctest currency; output=false -function Base.show(io::IO, c::T) where {T <: Currency} - val = round(c.value; digits = 2) - return print(io, val, " ", symbol(T)) -end + ``` -# output + The same code as before this example gives the following results. -``` + ```jldoctest currency + julia> eur = convert(Euro, Dollar(1.3)) + 1.08 € -The same code as before this example gives the following results. + julia> pnd = convert(Pound, eur) + 0.95 £ -```jldoctest currency -julia> eur = convert(Euro, Dollar(1.3)) -1.08 € + julia> dlr = convert(Dollar, pnd) + 1.3 $ + ``` -julia> pnd = convert(Pound, eur) -0.95 £ - -julia> dlr = convert(Dollar, pnd) -1.3 $ -``` - -We realize that the rounding is done only for printing, while the original value remains unchanged. - -```@raw html -
-``` + We realize that the rounding is done only for printing, while the original value remains unchanged. ## Promotion @@ -480,72 +440,57 @@ julia> promote(Pound(1.3), Dollar(2.4), Euro(2)) (1.47 €, 1.99 €, 2.0 €) ``` -```@raw html -
-
Exercise:
-
-``` - -Define a new currency `CzechCrown` representing Czech crowns. The exchange rate to euro is `0.038`, and all other currencies should take precedence over the Czech crown. - -```@raw html -
-
-Solution: -
-``` - -We define first the new type `CzechCrown`. +!!! warning "Exercise:" + Define a new currency `CzechCrown` representing Czech crowns. The exchange rate to euro is `0.038`, and all other currencies should take precedence over the Czech crown. -```jldoctest currency; output=false -struct CzechCrown <: Currency - value::Float64 -end +!!! details "Solution:" + We define first the new type `CzechCrown`. -# output + ```jldoctest currency; output=false + struct CzechCrown <: Currency + value::Float64 + end -``` + # output -We must add new methods for the `symbol` and `rate` functions. + ``` -```jldoctest currency; output=false -symbol(::Type{CzechCrown}) = "Kč" -rate(::Type{Euro}, ::Type{CzechCrown}) = 0.038 + We must add new methods for the `symbol` and `rate` functions. -# output + ```jldoctest currency; output=false + symbol(::Type{CzechCrown}) = "Kč" + rate(::Type{Euro}, ::Type{CzechCrown}) = 0.038 -rate (generic function with 7 methods) -``` + # output -We also must add promotion rules for the dollar and pound. + rate (generic function with 7 methods) + ``` -```jldoctest currency; output=false -Base.promote_rule(::Type{CzechCrown}, ::Type{Dollar}) = Dollar -Base.promote_rule(::Type{CzechCrown}, ::Type{Pound}) = Pound + We also must add promotion rules for the dollar and pound. -# output + ```jldoctest currency; output=false + Base.promote_rule(::Type{CzechCrown}, ::Type{Dollar}) = Dollar + Base.promote_rule(::Type{CzechCrown}, ::Type{Pound}) = Pound -``` + # output -Finally, we can test the functionality. + ``` -```jldoctest currency -julia> CzechCrown(2.8) -2.8 Kč + Finally, we can test the functionality. -julia> dl = convert(Dollar, CzechCrown(64)) -2.93 $ + ```jldoctest currency + julia> CzechCrown(2.8) + 2.8 Kč -julia> convert(CzechCrown, dl) -64.0 Kč + julia> dl = convert(Dollar, CzechCrown(64)) + 2.93 $ -julia> promote(Pound(1.3), Dollar(2.4), Euro(2), CzechCrown(2.8)) -(1.47 €, 1.99 €, 2.0 €, 0.11 €) -``` + julia> convert(CzechCrown, dl) + 64.0 Kč -```@raw html -
-``` + julia> promote(Pound(1.3), Dollar(2.4), Euro(2), CzechCrown(2.8)) + (1.47 €, 1.99 €, 2.0 €, 0.11 €) + ``` ## Basic arithmetic operations @@ -639,108 +584,93 @@ julia> CzechCrown.([4.5, 2.4, 16.7, 18.3]) .+ Dollar(12) 12.84 $ ``` -```@raw html -
-
Exercise:
-
-``` +!!! warning "Exercise:" + In the section above, we defined the addition for all subtypes of `Currency`. We also told the broadcasting system in Julia to treat all subtypes of the `Currency` as scalars. Follow the same pattern and define the following operations: `-`, `*`, `/`. -In the section above, we defined the addition for all subtypes of `Currency`. We also told the broadcasting system in Julia to treat all subtypes of the `Currency` as scalars. Follow the same pattern and define the following operations: `-`, `*`, `/`. + **Hint:** Define only operations that make sense. For example, it makes sense to multiply `1 €` by 2 to get `2 €`. But it does not make sense to multiply `1 €` by `2 €`. -**Hint:** Define only operations that make sense. For example, it makes sense to multiply `1 €` by 2 to get `2 €`. But it does not make sense to multiply `1 €` by `2 €`. +!!! details "Solution:" + The `-` operation can be defined exactly as the addition. -```@raw html -
-
-Solution: -
-``` + ```jldoctest currency; output=false + Base.:-(x::Currency, y::Currency) = -(promote(x, y)...) + Base.:-(x::T, y::T) where {T <: Currency} = T(x.value - y.value) -The `-` operation can be defined exactly as the addition. + # output -```jldoctest currency; output=false -Base.:-(x::Currency, y::Currency) = -(promote(x, y)...) -Base.:-(x::T, y::T) where {T <: Currency} = T(x.value - y.value) + ``` -# output + In the example below, we can see that everything works as intended. -``` + ```jldoctest currency + julia> Dollar(1.3) - CzechCrown(4.5) + 1.09 $ -In the example below, we can see that everything works as intended. + julia> CzechCrown.([4.5, 2.4, 16.7, 18.3]) .- Dollar(12) + 4-element Vector{Dollar}: + -11.79 $ + -11.89 $ + -11.24 $ + -11.16 $ + ``` -```jldoctest currency -julia> Dollar(1.3) - CzechCrown(4.5) -1.09 $ + The situation with the multiplication is different as it makes sense to multiply `1 €` by 2 but not by `2 €`. We have to define a method for multiplying any `Currency` subtype by a real number. We have to define the multiplication both from the right and the left. -julia> CzechCrown.([4.5, 2.4, 16.7, 18.3]) .- Dollar(12) -4-element Vector{Dollar}: - -11.79 $ - -11.89 $ - -11.24 $ - -11.16 $ -``` + ```jldoctest currency; output=false + Base.:*(a::Real, x::T) where {T <: Currency} = T(a * x.value) + Base.:*(x::T, a::Real) where {T <: Currency} = T(a * x.value) -The situation with the multiplication is different as it makes sense to multiply `1 €` by 2 but not by `2 €`. We have to define a method for multiplying any `Currency` subtype by a real number. We have to define the multiplication both from the right and the left. + # output -```jldoctest currency; output=false -Base.:*(a::Real, x::T) where {T <: Currency} = T(a * x.value) -Base.:*(x::T, a::Real) where {T <: Currency} = T(a * x.value) + ``` -# output + As in the previous cases, everything works as expected, and broadcasting is supported without any additional steps. -``` + ```jldoctest currency + julia> 2 * Dollar(1.3) * 0.5 + 1.3 $ -As in the previous cases, everything works as expected, and broadcasting is supported without any additional steps. + julia> 2 .* CzechCrown.([4.5, 2.4, 16.7, 18.3]) .* 0.5 + 4-element Vector{CzechCrown}: + 4.5 Kč + 2.4 Kč + 16.7 Kč + 18.3 Kč + ``` -```jldoctest currency -julia> 2 * Dollar(1.3) * 0.5 -1.3 $ + Finally, we can define division. In this case, it makes sense to divide a currency by a real number. -julia> 2 .* CzechCrown.([4.5, 2.4, 16.7, 18.3]) .* 0.5 -4-element Vector{CzechCrown}: - 4.5 Kč - 2.4 Kč - 16.7 Kč - 18.3 Kč -``` + ```jldoctest currency; output=false + Base.:/(x::T, a::Real) where {T <: Currency} = T(x.value / a) -Finally, we can define division. In this case, it makes sense to divide a currency by a real number. + # output -```jldoctest currency; output=false -Base.:/(x::T, a::Real) where {T <: Currency} = T(x.value / a) + ``` -# output + But it also makes sense to define the division of one amount of money by another amount of money in different currencies. In this case, a result is a real number representing their ratio. -``` + ```jldoctest currency; output=false + Base.:/(x::Currency, y::Currency) = /(promote(x, y)...) + Base.:/(x::T, y::T) where {T <: Currency} = x.value / y.value -But it also makes sense to define the division of one amount of money by another amount of money in different currencies. In this case, a result is a real number representing their ratio. + # output -```jldoctest currency; output=false -Base.:/(x::Currency, y::Currency) = /(promote(x, y)...) -Base.:/(x::T, y::T) where {T <: Currency} = x.value / y.value - -# output - -``` + ``` -The result is as follows. + The result is as follows. -```jldoctest currency -julia> Dollar(1.3) / 2 -0.65 $ + ```jldoctest currency + julia> Dollar(1.3) / 2 + 0.65 $ -julia> 2 .* CzechCrown.([1, 2, 3, 4]) ./ CzechCrown(1) -4-element Vector{Float64}: - 2.0 - 4.0 - 6.0 - 8.0 -``` - -```@raw html -
-``` + julia> 2 .* CzechCrown.([1, 2, 3, 4]) ./ CzechCrown(1) + 4-element Vector{Float64}: + 2.0 + 4.0 + 6.0 + 8.0 + ``` ## Currency comparison @@ -892,6 +822,7 @@ julia> b(Dollar(10)) julia> b(-2*balance(b)) ERROR: ArgumentError: insufficient bank account balance. +[...] julia> b(Pound(10)) diff --git a/docs/src/lecture_07/develop.md b/docs/src/lecture_07/develop.md index a3324cf69..f6fbfc56d 100644 --- a/docs/src/lecture_07/develop.md +++ b/docs/src/lecture_07/develop.md @@ -45,85 +45,70 @@ Since the `Project.toml` file `src/*.jl` files are sufficient for determining a The built-in `generate` function provides only basic functionality for generating packages. Even though it is sufficient in many cases, the [PkgTemplates](https://github.com/invenia/PkgTemplates.jl) package offers a straightforward and customizable way for creating packages. -```@raw html -
-
Exercise:
-
-``` - -The goal of this exercise is to create a new package by the PkgTemplates package. Install PkgTemplates and then use the following code to generate a new package template. - -```julia -using PkgTemplates - -template = Template(; - user = "GithubUserName", # github user name - authors = ["Author1", "Author2"], # list of authors - dir = "/Path/To/Dir/", # dir in which the package will be created - julia = v"1.7", # compat version of Julia - plugins = [ - !CompatHelper, # disable CompatHelper - !TagBot, # disable TagBot - Readme(; inline_badges = true), # added readme file with badges - Tests(; project = true), # added Project.toml file for unit tests - Git(; manifest = false), # add manifest.toml to .gitignore - License(; name = "MIT") # addedMIT licence - ], -) -``` +!!! warning "Exercise:" + The goal of this exercise is to create a new package by the PkgTemplates package. Install PkgTemplates and then use the following code to generate a new package template. -Do not forget to change `user`, `authors` and `dir`. + ```julia + using PkgTemplates + + template = Template(; + user = "GithubUserName", # github user name + authors = ["Author1", "Author2"], # list of authors + dir = "/Path/To/Dir/", # dir in which the package will be created + julia = v"1.7", # compat version of Julia + plugins = [ + !CompatHelper, # disable CompatHelper + !TagBot, # disable TagBot + Readme(; inline_badges = true), # added readme file with badges + Tests(; project = true), # added Project.toml file for unit tests + Git(; manifest = false), # add manifest.toml to .gitignore + License(; name = "MIT") # addedMIT licence + ], + ) + ``` -In the rest of the lecture, we will write code to visualize grayscale and colour images. Come up with a proper package name and use the following code to generate a new package. + Do not forget to change `user`, `authors` and `dir`. -```julia -template("PackageName") -``` + In the rest of the lecture, we will write code to visualize grayscale and colour images. Come up with a proper package name and use the following code to generate a new package. -For naming conventions, see the official [package naming guidelines](https://julialang.github.io/Pkg.jl/v1/creating-packages/#Package-naming-guidelines). Finally, create the folder `examples` in the main package folder. + ```julia + template("PackageName") + ``` -```@raw html -
-
-Solution: -
-``` + For naming conventions, see the official [package naming guidelines](https://julialang.github.io/Pkg.jl/v1/creating-packages/#Package-naming-guidelines). Finally, create the folder `examples` in the main package folder. -There is no best way to choose the correct package name. We decided to use `ImageInspector` and create the package by the following code: +!!! details "Solution:" + There is no best way to choose the correct package name. We decided to use `ImageInspector` and create the package by the following code: -```julia -template("ImageInspector") -``` + ```julia + template("ImageInspector") + ``` -After creating the `ImageInspector` package, we can add the `examples` folder manually or use the `mkdir` function to create it. For the latter, we use the `joinpath` function to specify the correct path. + After creating the `ImageInspector` package, we can add the `examples` folder manually or use the `mkdir` function to create it. For the latter, we use the `joinpath` function to specify the correct path. -```julia -mkdir(joinpath("/Path/To/Dir/", "ImageInspector", "examples")) -``` + ```julia + mkdir(joinpath("/Path/To/Dir/", "ImageInspector", "examples")) + ``` -The generated folder contains more files than the folder generated by the built-in `generate` function. + The generated folder contains more files than the folder generated by the built-in `generate` function. -```julia -├── .git -├── .gitignore -├── LICENSE -├── Manifest.toml -├── Project.toml -├── README.md -├── examples -├── src -│ └── ImageInspector.jl -└── test + ```julia + ├── .git + ├── .gitignore + ├── LICENSE ├── Manifest.toml ├── Project.toml - └── runtests.jl -``` - -```@raw html -
-``` + ├── README.md + ├── examples + ├── src + │ └── ImageInspector.jl + └── test + ├── Manifest.toml + ├── Project.toml + └── runtests.jl + ``` -!!! bonus "Interactive package generation:" +!!! compat "Interactive package generation:" PkgTemplate provides an interactive way to generate a new package. The template can be created interactively by the following command: ```julia @@ -241,82 +226,67 @@ Hello World!!!! This section adds content to the package. -```@raw html -
-
Exercise:
-
-``` +!!! warning "Exercise:" + This exercise defines the `image` function that converts a matrix of real numbers to a matrix of Gray points. Real numbers can be converted to Gray points by the `Gray` constructor from the Colors package. Use the following code to test the function. -This exercise defines the `image` function that converts a matrix of real numbers to a matrix of Gray points. Real numbers can be converted to Gray points by the `Gray` constructor from the Colors package. Use the following code to test the function. - -```julia -# /examples/example.jl -using Revise # this must come before `using ImageInspector` -using ImageInspector, MLDatasets, Plots - -X = MLDatasets.FashionMNIST(Float64, :train)[:][1]; -x = selectdim(X, ndims(X), 1) + ```julia + # /examples/example.jl + using Revise # this must come before `using ImageInspector` + using ImageInspector, MLDatasets, Plots -plot(image(x); axis = nothing, border = :none) -``` + X = MLDatasets.FashionMNIST(Float64, :train)[:][1]; + x = selectdim(X, ndims(X), 1) -**Hint:** Each Julia package contains its environment for tracking package dependencies. Use proper commands in the Pkg REPL to add `Colors` as a dependency of the ImageInspector package. Do not forget to add `MLDatasets` and `Plots` to the `examples` environment. + plot(image(x); axis = nothing, border = :none) + ``` -```@raw html -
-
-Solution: -
-``` + **Hint:** Each Julia package contains its environment for tracking package dependencies. Use proper commands in the Pkg REPL to add `Colors` as a dependency of the ImageInspector package. Do not forget to add `MLDatasets` and `Plots` to the `examples` environment. -First, we need to install all necessary packages. Since we set the `examples` environment as the default one for this project, we first install `MLDatasets` and `Plots`. +!!! details "Solution:" + First, we need to install all necessary packages. Since we set the `examples` environment as the default one for this project, we first install `MLDatasets` and `Plots`. -```julia -(examples) pkg> add MLDatasets Plots -``` + ```julia + (examples) pkg> add MLDatasets Plots + ``` -Since we want to add the `image` function to the ImageInspector package, we have to install the Colors package. However, we do not want to add it to `examples` but to `ImageInspector`. Printing the working directory by `pwd()`, we realize that we are in the correct folder and activate the working environment by `activate .` The dot represents the current working directory. + Since we want to add the `image` function to the ImageInspector package, we have to install the Colors package. However, we do not want to add it to `examples` but to `ImageInspector`. Printing the working directory by `pwd()`, we realize that we are in the correct folder and activate the working environment by `activate .` The dot represents the current working directory. -```julia -julia> pwd() -".../ImageInspector" - -(examples) pkg> activate . - Activating environment at `/path/ImageInspector/Project.toml` -``` + ```julia + julia> pwd() + ".../ImageInspector" -Now we use `add Colors` to install the Colors package. + (examples) pkg> activate . + Activating environment at `/path/ImageInspector/Project.toml` + ``` -```julia -(ImageInspector) pkg> add Colors -``` + Now we use `add Colors` to install the Colors package. -Since we want to work in `examples`, we change the environment back. + ```julia + (ImageInspector) pkg> add Colors + ``` -```julia -(ImageInspector) pkg> activate ./examples + Since we want to work in `examples`, we change the environment back. -(examples) -``` + ```julia + (ImageInspector) pkg> activate ./examples -With the Colors package installed, we have to add `using Colors` into the ImageInspector module. Then we can define the `image` function and `export` it. + (examples) + ``` -```julia -# /src/ImageInspector.jl -module ImageInspector + With the Colors package installed, we have to add `using Colors` into the ImageInspector module. Then we can define the `image` function and `export` it. -using Colors + ```julia + # /src/ImageInspector.jl + module ImageInspector -export image + using Colors -image(x::AbstractMatrix{<:Real}) = Gray.(x) + export image -end -``` + image(x::AbstractMatrix{<:Real}) = Gray.(x) -```@raw html -
-``` + end + ``` ![](image_1.svg) @@ -351,127 +321,97 @@ The `image` function also used `AbstractMatrix` to specify that the input must b - `AbstractVector` or `AbstractVector{T}` is equivalent to `AbstractArray` with `N=1`. We will now extend the `image` function to three-dimensional inputs. The third dimension represents the colour channels. -```@raw html -
-
Exercise:
-
-``` - -Write a method for the `image` function that converts a 3D array of real numbers to its image representation. Assume that the third dimension represents the colour channels. Three channels should be converted to an RGB point extracting the RGB channels and `RGB.(r, g, b)`. If the size of the third dimension is: +!!! warning "Exercise:" + Write a method for the `image` function that converts a 3D array of real numbers to its image representation. Assume that the third dimension represents the colour channels. Three channels should be converted to an RGB point extracting the RGB channels and `RGB.(r, g, b)`. If the size of the third dimension is: -- `1` the function should return a grayscale image, -- `3` the function should return a colour image, -- otherwise, the function should throw an error. + - `1` the function should return a grayscale image, + - `3` the function should return a colour image, + - otherwise, the function should throw an error. -Use the following code to test the `image` function. + Use the following code to test the `image` function. -```julia -# /examples/example.jl -X1 = MLDatasets.FashionMNIST(Float64, :train)[:][1]; -X2 = MLDatasets.CIFAR10(Float64, :train)[:][1]; + ```julia + # /examples/example.jl + X1 = MLDatasets.FashionMNIST(Float64, :train)[:][1]; + X2 = MLDatasets.CIFAR10(Float64, :train)[:][1]; -x1 = selectdim(X1, ndims(X1), 1) -x2 = selectdim(X2, ndims(X2), 1) + x1 = selectdim(X1, ndims(X1), 1) + x2 = selectdim(X2, ndims(X2), 1) -plot( - plot(image(x1)), - plot(image(x2)); - axis = nothing, - border = :none -) -``` - -**Hint:** use the `eachslice` function to split the array along the third dimension and the `dropdims` function to drop a dimension slice. + plot( + plot(image(x1)), + plot(image(x2)); + axis = nothing, + border = :none + ) + ``` -```@raw html -
-
-Solution: -
-``` + **Hint:** use the `eachslice` function to split the array along the third dimension and the `dropdims` function to drop a dimension slice. -The functionality depends on the size of the third dimension. -- If the size of the third dimension is 1, we use the `dropdims` to remove the third dimension. Then we call the `image` method from the previous exercise. -- If the dimension size is 3, we use `PermutedDimsArray` if `flip` is true. We can extract the three channels manually, or we can use the `eachslice` function. -- Otherwise, we throw an `ArgumentError`. -Altogether, the new method can be defined as follows. +!!! details "Solution:" + The functionality depends on the size of the third dimension. + - If the size of the third dimension is 1, we use the `dropdims` to remove the third dimension. Then we call the `image` method from the previous exercise. + - If the dimension size is 3, we use `PermutedDimsArray` if `flip` is true. We can extract the three channels manually, or we can use the `eachslice` function. + - Otherwise, we throw an `ArgumentError`. + Altogether, the new method can be defined as follows. -```julia -# /src/ImageInspector.jl -function image(x::AbstractArray{T,3}; flip = true) where {T <: Real} - s = size(x, 3) - if s == 1 - return image(dropdims(x; dims = 3); flip) - elseif s == 3 - xx = flip ? PermutedDimsArray(x, (2, 1, 3)) : x - r, g, b = eachslice(xx; dims=3) - return RGB.(r, g, b) - else - throw(ArgumentError("unsupported size of the third dimension $(s) ∉ [1,3].")) + ```julia + # /src/ImageInspector.jl + function image(x::AbstractArray{T,3}; flip = true) where {T <: Real} + s = size(x, 3) + if s == 1 + return image(dropdims(x; dims = 3); flip) + elseif s == 3 + xx = flip ? PermutedDimsArray(x, (2, 1, 3)) : x + r, g, b = eachslice(xx; dims=3) + return RGB.(r, g, b) + else + throw(ArgumentError("unsupported size of the third dimension $(s) ∉ [1,3].")) + end end -end -``` - -```@raw html -
-``` + ``` ![](image_3.svg) Multiple images are usually stored in multi-dimensional arrays. For example, grayscale images are stored as 3D or 4D arrays, where the last dimension represents individual images. Similarly, colour images are stored as a 4D array. -```@raw html -
-
Exercise:
-
-``` +!!! warning "Exercise:" + Add new methods for the `image` function with the following properties: -Add new methods for the `image` function with the following properties: + - New methods should accept two arguments: + - `x`: 3D or 4D array of real numbers that represents images, + - `inds`: one or more image indices to extract and convert to Gray/RGB representation. + - If only one index is provided, the method should return a single image in its representation. + - If more indices are provided, the method should return an array of images. -- New methods should accept two arguments: - - `x`: 3D or 4D array of real numbers that represents images, - - `inds`: one or more image indices to extract and convert to Gray/RGB representation. -- If only one index is provided, the method should return a single image in its representation. -- If more indices are provided, the method should return an array of images. + Use the following code to test the `image` function. -Use the following code to test the `image` function. - -```julia -# /examples/example.jl -X = MLDatasets.FashionMNIST(Float64, :train)[:][1]; - -plot(plot.(image(X, [1,2]))...; axis = nothing, border = :none) -``` - -```@raw html -
-
-Solution: -
-``` + ```julia + # /examples/example.jl + X = MLDatasets.FashionMNIST(Float64, :train)[:][1]; -We have four possible combinations of the input arguments: + plot(plot.(image(X, [1,2]))...; axis = nothing, border = :none) + ``` -1. 3D array and one index, -2. 3D array and multiple indices, -3. 4D array and one index, -4. 4D array and multiple indices. +!!! details "Solution:" + We have four possible combinations of the input arguments: -We should, therefore, define a method for each combination of input arguments. We can do this in the following way: + 1. 3D array and one index, + 2. 3D array and multiple indices, + 3. 4D array and one index, + 4. 4D array and multiple indices. -```julia -# /src/ImageInspector.jl -image(x::AbstractArray{T,3}, inds; flip = true) where {T <: Real} = [image(x[:,:,i]; flip) for i in inds] -image(x::AbstractArray{T,4}, inds; flip = true) where {T <: Real} = [image(x[:,:,:,i]; flip) for i in inds] -image(x::AbstractArray{T,3}, ind::Int; flip = true) where {T <: Real} = image(x, [ind]; flip)[1] -image(x::AbstractArray{T,4}, ind::Int; flip = true) where {T <: Real} = image(x, [ind]; flip)[1] -``` + We should, therefore, define a method for each combination of input arguments. We can do this in the following way: -Since `x[:,:,i]` creates a new copy, it can be replaced by `selectdim(x, 3, i)`, which creates a view. + ```julia + # /src/ImageInspector.jl + image(x::AbstractArray{T,3}, inds; flip = true) where {T <: Real} = [image(x[:,:,i]; flip) for i in inds] + image(x::AbstractArray{T,4}, inds; flip = true) where {T <: Real} = [image(x[:,:,:,i]; flip) for i in inds] + image(x::AbstractArray{T,3}, ind::Int; flip = true) where {T <: Real} = image(x, [ind]; flip)[1] + image(x::AbstractArray{T,4}, ind::Int; flip = true) where {T <: Real} = image(x, [ind]; flip)[1] + ``` -```@raw html -
-``` + Since `x[:,:,i]` creates a new copy, it can be replaced by `selectdim(x, 3, i)`, which creates a view. ![](image_4.svg) @@ -802,7 +742,7 @@ plot(imagegrid(X, 1:10; nrows = 2, sep = 2); axis = nothing, border = :none) ![](image_5.svg) -!!! bonus "Optional dependencies:" +!!! compat "Optional dependencies:" We used the same settings for the `plot` function in all previous examples. Therefore, it makes sense to write an auxiliary function setting attributes for the `plot` function. However, this function will depend on the `Plots` package, and if we add `Plots` to `ImageInspector`, it will significantly slow the loading time. The `Requires` package prevents explicit dependencies (and long load times) by allowing conditional code loading. In our case, we first add `Requires` to the `ImageInspector`. ```julia diff --git a/docs/src/lecture_07/modules.md b/docs/src/lecture_07/modules.md index 95e8dab0f..57b79f1f1 100644 --- a/docs/src/lecture_07/modules.md +++ b/docs/src/lecture_07/modules.md @@ -109,7 +109,7 @@ distance(p, q) Besides the `using` keyword, Julia also provides the `import` keyword to import modules and packages. Its behaviour is slightly different; for more information, see the [official documentation](https://docs.julialang.org/en/v1/manual/modules/#Summary-of-module-usage). -!!! bonus "Relative and absolute module paths:" +!!! compat "Relative and absolute module paths:" In the previous section, we added a dot before the module name in the `using` keyword. The reason is that if we import a module, the system consults an internal table of top-level modules to find the given module name. If the module does not exist, the system attempts to `require(:ModuleName)`, which typically results in loading code from an installed package. However, if we evaluate code in the REPL, the code is evaluated in the `Main` module. Then `Points` are not in a top-level module but in a submodule of `Main`. @@ -130,7 +130,7 @@ Besides the `using` keyword, Julia also provides the `import` keyword to import Adding one more leading dot moves the path one additional level up in the module hierarchy. For example, `using ..Points` would look for `Points` in the enclosing module for `Main` rather than `Main` itself. -!!! bonus "Modules and files:" +!!! compat "Modules and files:" Since modules are associated only with module expressions, files are largely unrelated to modules. One can have multiple files in a module. ```julia diff --git a/docs/src/lecture_07/pkg.md b/docs/src/lecture_07/pkg.md index 3cf2c4c32..b71cf2a79 100644 --- a/docs/src/lecture_07/pkg.md +++ b/docs/src/lecture_07/pkg.md @@ -121,7 +121,7 @@ Like the help for functions, we can use `?` in the Pkg REPL to list all its avai [...] ``` -!!! bonus "Non-interactive package manager:" +!!! compat "Non-interactive package manager:" We can also use the package manager in a non-interactive way from the Julia REPL by the `Pkg` package. ```julia @@ -130,7 +130,7 @@ Like the help for functions, we can use `?` in the Pkg REPL to list all its avai Pkg.add(url = "https://github.com/JuliaLang/Example.jl") ``` -!!! bonus "JuliaHub:" +!!! compat "JuliaHub:" [JuliaHub](https://juliahub.com) is a web service provided by [Julia Computing](https://juliacomputing.com/) that allows to explore the Julia ecosystem, build packages, and run code in the cloud. It allows for exploring packages, documentation, repositories and code written by other users. ## Environments diff --git a/docs/src/lecture_08/constrained.md b/docs/src/lecture_08/constrained.md index 8732ced5a..4a4a1fc20 100644 --- a/docs/src/lecture_08/constrained.md +++ b/docs/src/lecture_08/constrained.md @@ -105,7 +105,7 @@ Even though the primal and dual formulations are not generally equivalent, they For the unconstrained optimization, we showed that each local minimum satisfies the optimality condition ``\nabla f(x)=0``. This condition does not have to hold for constrained optimization, where the optimality conditions are of a more complex form. -!!! theorem "Theorem: Karush-Kuhn-Tucker conditions" +!!! todo "Theorem: Karush-Kuhn-Tucker conditions" Let ``f``, ``g_i`` and ``h_j`` be differentiable function and let a constraint qualification hold. If ``x`` is a local minimum of the primal problem (P), then there are $\lambda\ge 0$ and $\mu$ such that ```math diff --git a/docs/src/lecture_08/exercises.md b/docs/src/lecture_08/exercises.md index d6dd4e33c..ec9d311d6 100644 --- a/docs/src/lecture_08/exercises.md +++ b/docs/src/lecture_08/exercises.md @@ -1,120 +1,91 @@ # [Exercises](@id l7-exercises) -```@raw html -
-
Exercise 1: Solving a system of linear equations
-
-``` - -The update of Newton's method computes ``A^{-1}b``. The most intuitive way of writing this is to use `inv(A) * b`, which first computes the inverse of `A` and then multiplies it with a vector. However, this approach has several disadvantages: -- Specialized algorithms for solving the linear system ``Ax=b`` cannot be used. -- When `A` is sparse, this inverse is dense and additional memory is needed to store the dense matrix. -For these reasons, the linear system of equations is solved by `A \ b`, which calls specialized algorithms. - -Use the package `BenchmarkTools` to benchmark both possibilities. - -```@raw html -
-
-Solution: -
-``` -We first create a random matrix `A` and a random vector `b`. - -```julia -using BenchmarkTools - -n = 1000 -A = randn(n,n) -b = randn(n) -``` - -We first verify that both possibilities result in the same number. - -```julia -julia> using LinearAlgebra - -julia> norm(inv(A)*b - A \ b) -9.321906736594836e-12 -``` - -We benchmark the first possibility. - -```julia -julia> @btime inv($A)*($b) - 71.855 ms (6 allocations: 8.13 MiB) -``` - -We benchmark the second possibility. - -```julia -julia> @btime ($A) \ ($b) - 31.126 ms (4 allocations: 7.64 MiB) -``` - -The second possibility is faster and has lower memory requirements. - -```@raw html -
-``` - -```@raw html -
-
Exercise 2: Bisection method
-
-``` - -Similarly to Newton's method, the bisection method is primarily designed to solve equations by finding their zero points. It is only able to solve equations ``f(x)=0`` where ``f:\mathbb{R}\to\mathbb{R}``. It starts with an interval ``[a,b]`` where ``f`` has opposite values ``f(a)f(b)<0``. Then it selects the middle point on ``[a,b]`` and halves the interval so that the new interval again satisfies the constraint on opposite signs ``f(a)f(b)<0``. This is repeated until the function value is small or until the interval has a small length. - -Implement the bisection method and use it to minimize ``f(x) = x^2 - x`` on ``[-1,1]``. During the implementation, do not evaluate ``f`` unless necessary. - -```@raw html -
-
-Solution: -
-``` - -First, we write the bisection method. We initialize it with arguments ``f`` and the initial interval ``[a,b]``. We also specify the optional tolerance. First, we save the function value ```fa = f(a)``` to not need to recompute it every time. The syntax ```fa == 0 && return a``` is a bit complex. Since ```&&``` is the "and" operator, this first checks whether ```fa == 0``` is satisfied, and if so, it evaluates the second part. However, the second part exits the function and returns ```a```. Since we need to have ``f(a)f(b)<0``, we check this condition, and if it is not satisfied, we return an error message. Finally, we run the while loop, where every iteration halves the interval. The condition on opposite signs is enforced in the if condition inside the loop. - -```@example bisec -function bisection(f, a, b; tol=1e-6) - fa = f(a) - fb = f(b) - fa == 0 && return a - fb == 0 && return b - fa*fb > 0 && error("Wrong initial values for bisection") - while b-a > tol - c = (a+b)/2 - fc = f(c) - fc == 0 && return c - if fa*fc > 0 - a = c - fa = fc - else - b = c - fb = fc +!!! warning "Exercise 1: Solving a system of linear equations" + The update of Newton's method computes ``A^{-1}b``. The most intuitive way of writing this is to use `inv(A) * b`, which first computes the inverse of `A` and then multiplies it with a vector. However, this approach has several disadvantages: + - Specialized algorithms for solving the linear system ``Ax=b`` cannot be used. + - When `A` is sparse, this inverse is dense and additional memory is needed to store the dense matrix. + For these reasons, the linear system of equations is solved by `A \ b`, which calls specialized algorithms. + + Use the package `BenchmarkTools` to benchmark both possibilities. + +!!! details "Solution:" + We first create a random matrix `A` and a random vector `b`. + + ```julia + using BenchmarkTools + + n = 1000 + A = randn(n,n) + b = randn(n) + ``` + + We first verify that both possibilities result in the same number. + + ```julia + julia> using LinearAlgebra + + julia> norm(inv(A)*b - A \ b) + 9.321906736594836e-12 + ``` + + We benchmark the first possibility. + + ```julia + julia> @btime inv($A)*($b) + 71.855 ms (6 allocations: 8.13 MiB) + ``` + + We benchmark the second possibility. + + ```julia + julia> @btime ($A) \ ($b) + 31.126 ms (4 allocations: 7.64 MiB) + ``` + + The second possibility is faster and has lower memory requirements. + +!!! warning "Exercise 2: Bisection method" + Similarly to Newton's method, the bisection method is primarily designed to solve equations by finding their zero points. It is only able to solve equations ``f(x)=0`` where ``f:\mathbb{R}\to\mathbb{R}``. It starts with an interval ``[a,b]`` where ``f`` has opposite values ``f(a)f(b)<0``. Then it selects the middle point on ``[a,b]`` and halves the interval so that the new interval again satisfies the constraint on opposite signs ``f(a)f(b)<0``. This is repeated until the function value is small or until the interval has a small length. + + Implement the bisection method and use it to minimize ``f(x) = x^2 - x`` on ``[-1,1]``. During the implementation, do not evaluate ``f`` unless necessary. + +!!! details "Solution:" + First, we write the bisection method. We initialize it with arguments ``f`` and the initial interval ``[a,b]``. We also specify the optional tolerance. First, we save the function value ```fa = f(a)``` to not need to recompute it every time. The syntax ```fa == 0 && return a``` is a bit complex. Since ```&&``` is the "and" operator, this first checks whether ```fa == 0``` is satisfied, and if so, it evaluates the second part. However, the second part exits the function and returns ```a```. Since we need to have ``f(a)f(b)<0``, we check this condition, and if it is not satisfied, we return an error message. Finally, we run the while loop, where every iteration halves the interval. The condition on opposite signs is enforced in the if condition inside the loop. + + ```@example bisec + function bisection(f, a, b; tol=1e-6) + fa = f(a) + fb = f(b) + fa == 0 && return a + fb == 0 && return b + fa*fb > 0 && error("Wrong initial values for bisection") + while b-a > tol + c = (a+b)/2 + fc = f(c) + fc == 0 && return c + if fa*fc > 0 + a = c + fa = fc + else + b = c + fb = fc + end end + return (a+b)/2 end - return (a+b)/2 -end -nothing # hide -``` + nothing # hide + ``` -This implementation is efficient in the way that only one function evaluation is needed per iteration. The price to pay are additional variables ```fa```, ```fb``` and ```fc```. + This implementation is efficient in the way that only one function evaluation is needed per iteration. The price to pay are additional variables ```fa```, ```fb``` and ```fc```. -To use the bisection method to minimize a function ``f(x)``, we use it find the solution of the optimality condition ``f'(x)=0``. + To use the bisection method to minimize a function ``f(x)``, we use it find the solution of the optimality condition ``f'(x)=0``. -```@example bisec -f(x) = x^2 - x -g(x) = 2*x - 1 -x_opt = bisection(g, -1, 1) -nothing # hide -``` - -```@raw html -
-``` + ```@example bisec + f(x) = x^2 - x + g(x) = 2*x - 1 + x_opt = bisection(g, -1, 1) + nothing # hide + ``` The correct solution is @@ -122,56 +93,41 @@ The correct solution is println(round(x_opt, digits=4)) # hide ``` -```@raw html -
-
Exercise 3: JuMP
-
-``` +!!! warning "Exercise 3: JuMP" + The library to perform optimization is called ```JuMP```. Install it, go briefly through its documentation, and use it to solve the linear optimization problem -The library to perform optimization is called ```JuMP```. Install it, go briefly through its documentation, and use it to solve the linear optimization problem + ```math + \begin{aligned} + \text{minimize}\qquad &x_1 + x_2 + x_5 \\ + \text{subject to}\qquad &x_1+2x_2+3x_3+4x_4+5x_5 = 8, \\ + &x_3+x_4+x_5 = 2, \\ + &x_1+x_2 = 2. + \end{aligned} + ``` -```math -\begin{aligned} -\text{minimize}\qquad &x_1 + x_2 + x_5 \\ -\text{subject to}\qquad &x_1+2x_2+3x_3+4x_4+5x_5 = 8, \\ -&x_3+x_4+x_5 = 2, \\ -&x_1+x_2 = 2. -\end{aligned} -``` +!!! details "Solution:" + The best start is the official documentation of the [JuMP package](https://jump.dev/JuMP.jl/stable/quickstart/). Since ```JuMP``` is only an interface for solvers, we need to include an actual solver as well. For linear programs, we can use ```using GLPK```, for non-linear ones, we would need to use ```using Ipopt```. We specify the constraints in a matrix form. It is possible to write them directly via ```@constraint(model, x[1] + x[2] == 2)```. This second way is more pleasant for complex constraints. Since ```x``` is a vector, we need to use ```value.(x)``` instead of the wrong ```value(x)```. -```@raw html -
-
-Solution: -
-``` + ```@example optim + using JuMP + using GLPK -The best start is the official documentation of the [JuMP package](https://jump.dev/JuMP.jl/stable/quickstart/). Since ```JuMP``` is only an interface for solvers, we need to include an actual solver as well. For linear programs, we can use ```using GLPK```, for non-linear ones, we would need to use ```using Ipopt```. We specify the constraints in a matrix form. It is possible to write them directly via ```@constraint(model, x[1] + x[2] == 2)```. This second way is more pleasant for complex constraints. Since ```x``` is a vector, we need to use ```value.(x)``` instead of the wrong ```value(x)```. + A = [1 2 3 4 5; 0 0 1 1 1; 1 1 0 0 0] + b = [8; 2; 2] + c = [1; 1; 0; 0; 1] + n = size(A, 2) -```@example optim -using JuMP -using GLPK + model = Model(GLPK.Optimizer) -A = [1 2 3 4 5; 0 0 1 1 1; 1 1 0 0 0] -b = [8; 2; 2] -c = [1; 1; 0; 0; 1] -n = size(A, 2) + @variable(model, x[1:n] >= 0) -model = Model(GLPK.Optimizer) + @objective(model, Min, c'*x) + @constraint(model, A*x .== b) + optimize!(model) -@variable(model, x[1:n] >= 0) - -@objective(model, Min, c'*x) -@constraint(model, A*x .== b) -optimize!(model) - -x_val = value.(x) -nothing # hide -``` - -```@raw html -
-``` + x_val = value.(x) + nothing # hide + ``` The correct solution is @@ -179,104 +135,89 @@ The correct solution is println(round.(x_val, digits=4)) # hide ``` -```@raw html -
-
Exercise 4: SQP method
-
-``` +!!! warning "Exercise 4: SQP" + Derive the SQP method for optimization problem with only equality constraints -Derive the SQP method for optimization problem with only equality constraints + ```math + \begin{aligned} + \text{minimize}\qquad &f(x) \\ + \text{subject to}\qquad &h_j(x) = 0, j=1,\dots,J. + \end{aligned} + ``` -```math -\begin{aligned} -\text{minimize}\qquad &f(x) \\ -\text{subject to}\qquad &h_j(x) = 0, j=1,\dots,J. -\end{aligned} -``` + SQP writes the [Karush-Kuhn-Tucker](@ref lagrangian) optimality conditions and then applies Newton's method to solve the resulting system of equations. -SQP writes the [Karush-Kuhn-Tucker](@ref lagrangian) optimality conditions and then applies Newton's method to solve the resulting system of equations. + Apply the obtained algorithm to -Apply the obtained algorithm to + ```math + \begin{aligned} + \text{minimize}\qquad &\sum_{i=1}^{10} ix_i^4 \\ + \text{subject to}\qquad &\sum_{i=1}^{10} x_i = 1. + \end{aligned} + ``` -```math -\begin{aligned} -\text{minimize}\qquad &\sum_{i=1}^{10} ix_i^4 \\ -\text{subject to}\qquad &\sum_{i=1}^{10} x_i = 1. -\end{aligned} -``` + Verify that the numerically obtained solution is correct. -Verify that the numerically obtained solution is correct. +!!! details "Solution:" + The Lagrangian reads -```@raw html -
-
-Solution: -
-``` + ```math + L(x,\mu) = f(x) + \sum_{j=1}^J\mu_j h_j(x). + ``` -The Lagrangian reads + Since there are no inequality constraints, the optimality conditions contain no complementarity and read -```math -L(x,\mu) = f(x) + \sum_{j=1}^J\mu_j h_j(x). -``` + ```math + \begin{aligned} + \nabla f(x) + \sum_{j=1}^J\mu_j \nabla h_j(x) &= 0,\\ + h_j(x) &= 0, + \end{aligned} + ``` -Since there are no inequality constraints, the optimality conditions contain no complementarity and read + The Newton method's at iteration ``k`` has some pair ``(x^k,\mu^k)`` and performs the update -```math -\begin{aligned} -\nabla f(x) + \sum_{j=1}^J\mu_j \nabla h_j(x) &= 0,\\ -h_j(x) &= 0, -\end{aligned} -``` + ```math + \begin{pmatrix} x^{k+1} \\ \mu^{k+1} \end{pmatrix} = \begin{pmatrix} x^{k} \\ \mu^{k} \end{pmatrix} - \begin{pmatrix} \nabla^2 f(x^k) + \sum_{j=1}^J \mu_j^k \nabla^2 h_j(x^k) & \nabla h(x^k) \\ \nabla h(x^k)^\top & 0 \end{pmatrix}^{-1} \begin{pmatrix} \nabla f(x^k) + \sum_{j=1}^J\mu_j^k \nabla h_j(x^k) \\ h(x^k) \end{pmatrix}. + ``` -The Newton method's at iteration ``k`` has some pair ``(x^k,\mu^k)`` and performs the update + We define functions ``f`` and ``h`` and their derivates and Hessians for the numerical implementation. The simplest way to create a diagonal matrix is `Diagonal` from the `LinearAlgebra` package. It can be, of course, done manually as well. -```math -\begin{pmatrix} x^{k+1} \\ \mu^{k+1} \end{pmatrix} = \begin{pmatrix} x^{k} \\ \mu^{k} \end{pmatrix} - \begin{pmatrix} \nabla^2 f(x^k) + \sum_{j=1}^J \mu_j^k \nabla^2 h_j(x^k) & \nabla h(x^k) \\ \nabla h(x^k)^\top & 0 \end{pmatrix}^{-1} \begin{pmatrix} \nabla f(x^k) + \sum_{j=1}^J\mu_j^k \nabla h_j(x^k) \\ h(x^k) \end{pmatrix}. -``` - -We define functions ``f`` and ``h`` and their derivates and Hessians for the numerical implementation. The simplest way to create a diagonal matrix is `Diagonal` from the `LinearAlgebra` package. It can be, of course, done manually as well. + ```@example sqp + using LinearAlgebra -```@example sqp -using LinearAlgebra + n = 10 + f(x) = sum((1:n) .* x.^4) + f_grad(x) = 4*(1:n).*x.^3 + f_hess(x) = 12*Diagonal((1:n).*x.^2) + h(x) = sum(x) - 1 + h_grad(x) = ones(n) + h_hess(x) = zeros(n,n) + nothing # hide + ``` -n = 10 -f(x) = sum((1:n) .* x.^4) -f_grad(x) = 4*(1:n).*x.^3 -f_hess(x) = 12*Diagonal((1:n).*x.^2) -h(x) = sum(x) - 1 -h_grad(x) = ones(n) -h_hess(x) = zeros(n,n) -nothing # hide -``` + To implement SQP, we first randomly generate initial ``x`` and ``\mu`` and then write the procedure derived above. Since we update ``x`` in a for loop, we need to define it as a ```global``` variables; otherwise, it will be a local variable, and the global (outside of the loop) will not update. We can write ```inv(A)*b``` or the more efficient ```A\b```. To subtract from ``x``, we use the shortened notation ```x -= ?```, which is the same as ```x = x - ?```. -To implement SQP, we first randomly generate initial ``x`` and ``\mu`` and then write the procedure derived above. Since we update ``x`` in a for loop, we need to define it as a ```global``` variables; otherwise, it will be a local variable, and the global (outside of the loop) will not update. We can write ```inv(A)*b``` or the more efficient ```A\b```. To subtract from ``x``, we use the shortened notation ```x -= ?```, which is the same as ```x = x - ?```. - -```@example sqp -x = randn(n) -μ = randn() -for i in 1:100 - global x, μ - A = [f_hess(x) + μ*h_hess(x) h_grad(x); h_grad(x)' 0] - b = [f_grad(x) + μ*h_grad(x); h(x)] - step = A \ b - x -= step[1:n] - μ -= step[n+1] -end -``` - -The need to differentiate global and local variables in scripts is one reason why functions should be used as much as possible. + ```@example sqp + x = randn(n) + μ = randn() + for i in 1:100 + global x, μ + A = [f_hess(x) + μ*h_hess(x) h_grad(x); h_grad(x)' 0] + b = [f_grad(x) + μ*h_grad(x); h(x)] + step = A \ b + x -= step[1:n] + μ -= step[n+1] + end + ``` -To validate, we need to verify the optimality and the feasibility; both need to equal zero. These are the same as the ```b``` variable. However, we cannot call ```b``` directly, as it is inside the for loop and therefore local only. + The need to differentiate global and local variables in scripts is one reason why functions should be used as much as possible. -```@repl sqp -f_grad(x) + μ*h_grad(x) -h(x) -``` + To validate, we need to verify the optimality and the feasibility; both need to equal zero. These are the same as the ```b``` variable. However, we cannot call ```b``` directly, as it is inside the for loop and therefore local only. -```@raw html -
-``` + ```@repl sqp + f_grad(x) + μ*h_grad(x) + h(x) + ``` The correct solution is @@ -284,97 +225,67 @@ The correct solution is println(round.(x, digits=4)) # hide ``` -```@raw html -
-
Exercise 5 (theory):
-
-``` +!!! warning "Exercise 5 (theory)" + Show that the primal formulation for a problem with no inequalities is equivalent to the min-max formulation. -Show that the primal formulation for a problem with no inequalities is equivalent to the min-max formulation. +!!! details "Solution:" + The primal problem with no inequalities reads -```@raw html -
-
-Solution: -
-``` + ```math + \begin{aligned} + \text{minimize}\qquad &f(x) \\ + \text{subject to}\qquad &h_j(x) = 0,\ j=1,\dots,J. + \end{aligned} + ``` -The primal problem with no inequalities reads + The Lagrangian has form -```math -\begin{aligned} -\text{minimize}\qquad &f(x) \\ -\text{subject to}\qquad &h_j(x) = 0,\ j=1,\dots,J. -\end{aligned} -``` + ```math + L(x;\lambda,\mu) = f(x) + \sum_{j=1}^J \mu_j h_j(x). + ``` -The Lagrangian has form + Now consider the min-max formulation -```math -L(x;\lambda,\mu) = f(x) + \sum_{j=1}^J \mu_j h_j(x). -``` + ```math + \operatorname*{minimize}_x\quad \operatorname*{maximize}_{\mu}\quad f(x) + \sum_{j=1}^J \mu_j h_j(x). + ``` -Now consider the min-max formulation + If ``h_j(x)\neq 0``, then it is simple to choose ``\mu_j``so that the inner maximization problem has the optimal value ``+\infty``. However, since the outer problem minimizes the objective, the value of ``+\infty`` is irrelevant. Therefore, we can ignore all points with ``h_j(x)\neq 0`` and prescribe ``h_j(x)=0`` as a hard constraint. That is precisely the primal formulation. -```math -\operatorname*{minimize}_x\quad \operatorname*{maximize}_{\mu}\quad f(x) + \sum_{j=1}^J \mu_j h_j(x). -``` +!!! warning "Exercise 6 (theory)" + Derive the dual formulation for the linear programming. -If ``h_j(x)\neq 0``, then it is simple to choose ``\mu_j``so that the inner maximization problem has the optimal value ``+\infty``. However, since the outer problem minimizes the objective, the value of ``+\infty`` is irrelevant. Therefore, we can ignore all points with ``h_j(x)\neq 0`` and prescribe ``h_j(x)=0`` as a hard constraint. That is precisely the primal formulation. +!!! details "Solution:" + The linear program -```@raw html -
-``` + ```math + \begin{aligned} + \text{minimize}\qquad &c^\top x \\ + \text{subject to}\qquad &Ax=b, \\ + &x\ge 0 + \end{aligned} + ``` -```@raw html -
-
Exercise 6 (theory):
-
-``` + has the Lagrangian -Derive the dual formulation for the linear programming. + ```math + L(x;\lambda,\mu) = c^\top x - \lambda^\top x + \mu^\top (b-Ax) = (c - \lambda - A^\top\mu)^\top x + b^\top \mu. + ``` -```@raw html -
-
-Solution: -
-``` + We need to have ``- \lambda^\top x`` because we require constraints ``g(x)\le 0`` or in other words ``-x\le 0``. The dual problem from its definition reads -The linear program + ```math + \operatorname*{maximize}_{\lambda\ge0, \mu} \quad \operatorname*{minimize}_x \quad (c - \lambda - A^\top\mu)^\top x + b^\top \mu. + ``` -```math -\begin{aligned} -\text{minimize}\qquad &c^\top x \\ -\text{subject to}\qquad &Ax=b, \\ -&x\ge 0 -\end{aligned} -``` - -has the Lagrangian - -```math -L(x;\lambda,\mu) = c^\top x - \lambda^\top x + \mu^\top (b-Ax) = (c - \lambda - A^\top\mu)^\top x + b^\top \mu. -``` - -We need to have ``- \lambda^\top x`` because we require constraints ``g(x)\le 0`` or in other words ``-x\le 0``. The dual problem from its definition reads - -```math -\operatorname*{maximize}_{\lambda\ge0, \mu} \quad \operatorname*{minimize}_x \quad (c - \lambda - A^\top\mu)^\top x + b^\top \mu. -``` - -Since the minimization with respect to ``x`` is unconstrained, the same arguments as the previous exercise imply the hard constraint ``c - \lambda - A^\top\mu=0``. Then we may simplify the dual problem into - -```math -\begin{aligned} -\text{maximize}\qquad &b^\top \mu \\ -\text{subject to}\qquad &c - \lambda - A^\top\mu = 0, \\ -&\lambda\ge 0. -\end{aligned} -``` + Since the minimization with respect to ``x`` is unconstrained, the same arguments as the previous exercise imply the hard constraint ``c - \lambda - A^\top\mu=0``. Then we may simplify the dual problem into -From this formulation, we may remove ``\lambda`` and obtain ``A^\top \mu\le c``. This is the desired dual formulation. + ```math + \begin{aligned} + \text{maximize}\qquad &b^\top \mu \\ + \text{subject to}\qquad &c - \lambda - A^\top\mu = 0, \\ + &\lambda\ge 0. + \end{aligned} + ``` -```@raw html -
-``` \ No newline at end of file + From this formulation, we may remove ``\lambda`` and obtain ``A^\top \mu\le c``. This is the desired dual formulation. \ No newline at end of file diff --git a/docs/src/lecture_08/gradients.md b/docs/src/lecture_08/gradients.md index 484127207..68fceedd6 100644 --- a/docs/src/lecture_08/gradients.md +++ b/docs/src/lecture_08/gradients.md @@ -19,7 +19,7 @@ The formal definition is more complicated, but this one is better for visualizat Functions are usually complicated, and this definition cannot be used to compute the gradient. Instead, the objective function ``f`` is rewritten as a composition of simple functions, these simple functions are differentiated, and the chain rule is applied to get ``\nabla f``. -!!! theorem "Theorem: Chain" +!!! todo "Theorem: Chain" Consider two differentiable functions ``f:\mathbb{R}^m\to\mathbb{R}^s`` and ``g:\mathbb{R}^n\to\mathbb{R}^m``. Then its composition ``h(x) := f(g(x))`` is differentiable with Jacobian ```math \nabla h(x) = \nabla f(g(x))\nabla g(x). @@ -44,64 +44,49 @@ f(x) = \sin(x_1 + x_2) + \cos(x_1)^2 on domain ``[-3,1]\times [-2,1]``. -```@raw html -
-
Exercise: Contour plot
-
-``` - -Write a function ```g(x)``` which computes the derivative of ``f`` at a point ``x``. Plot the contours of ``f`` on the domain. - -**Hint**: Use the keyword argument ```color = :jet``` for better visualization. +!!! warning "Exercise: Contour plot" + Write a function ```g(x)``` which computes the derivative of ``f`` at a point ``x``. Plot the contours of ``f`` on the domain. -```@raw html -
-
-Solution: -
-``` - -Function ```f(x)``` takes as an input a vector of two dimensions and returns a scalar. Therefore, the gradient is a two-dimensional vector, which we create by ```[?; ?]```. Its components are computed from the chain rule. + **Hint**: Use the keyword argument ```color = :jet``` for better visualization. -```@example optim -f(x) = sin(x[1] + x[2]) + cos(x[1])^2 -g(x) = [cos(x[1] + x[2]) - 2*cos(x[1])*sin(x[1]); cos(x[1] + x[2])] +!!! details "Solution:" + Function ```f(x)``` takes as an input a vector of two dimensions and returns a scalar. Therefore, the gradient is a two-dimensional vector, which we create by ```[?; ?]```. Its components are computed from the chain rule. -nothing # hide -``` + ```@example optim + f(x) = sin(x[1] + x[2]) + cos(x[1])^2 + g(x) = [cos(x[1] + x[2]) - 2*cos(x[1])*sin(x[1]); cos(x[1] + x[2])] -Since sometimes it is better to use notation ``f(x)`` and sometimes ``f(x_1,x_2)``, we overload the function ```f```. + nothing # hide + ``` -```@example optim -f(x1,x2) = f([x1;x2]) + Since sometimes it is better to use notation ``f(x)`` and sometimes ``f(x_1,x_2)``, we overload the function ```f```. -f([0; 0]) -f(0, 0) + ```@example optim + f(x1,x2) = f([x1;x2]) -nothing # hide -``` + f([0; 0]) + f(0, 0) -```@example optim -println(f([0; 0])) # hide -println(f(0, 0)) # hide -``` + nothing # hide + ``` -We use the ```Plots``` package for plotting. We create the discretization ```xs``` and ```ys``` of both axis and then call the ```contourf``` function. + ```@example optim + println(f([0; 0])) # hide + println(f(0, 0)) # hide + ``` -```@example optim -using Plots + We use the ```Plots``` package for plotting. We create the discretization ```xs``` and ```ys``` of both axis and then call the ```contourf``` function. -xs = range(-3, 1, length = 40) -ys = range(-2, 1, length = 40) + ```@example optim + using Plots -contourf(xs, ys, f, color = :jet) + xs = range(-3, 1, length = 40) + ys = range(-2, 1, length = 40) -savefig("grad1.svg") # hide -``` + contourf(xs, ys, f, color = :jet) -```@raw html -
-``` + savefig("grad1.svg") # hide + ``` ![](grad1.svg) @@ -119,96 +104,66 @@ by fixing some ``h`` and approximates the gradient by f'(x) \approx \frac{f(x+h)-f(x)}{h}. ``` -```@raw html -
-
Exercise: Finite difference approximation
-
-``` - -Write a function ```finite_difference``` which computes the approximation of ``f'(x)`` by finite differences. The inputs are a function ``f:\mathbb R\to\mathbb R`` and a point ``x\in\mathbb{R}``. It should have an optional input ``h\in\mathbb{R}``, for which you need to choose a reasonable value. - -```@raw html -
-
-Solution: -
-``` - -It is sufficient to rewrite the formula above. Since the argument ```h``` is optional, it should be after ```;```. Its good default value is anything between ``10^{-10}`` and ``10^{-5}``. We specify ```x::Real``` as a sanity check for the case when a function of more variables is passed as input. +!!! warning "Exercise: Finite difference approximation" + Write a function ```finite_difference``` which computes the approximation of ``f'(x)`` by finite differences. The inputs are a function ``f:\mathbb R\to\mathbb R`` and a point ``x\in\mathbb{R}``. It should have an optional input ``h\in\mathbb{R}``, for which you need to choose a reasonable value. -```@example optim -finite_difference(f, x::Real; h=1e-8) = (f(x+h) - f(x)) / h -nothing # hide -``` +!!! details "Solution:" + It is sufficient to rewrite the formula above. Since the argument ```h``` is optional, it should be after ```;```. Its good default value is anything between ``10^{-10}`` and ``10^{-5}``. We specify ```x::Real``` as a sanity check for the case when a function of more variables is passed as input. -```@raw html -
-``` + ```@example optim + finite_difference(f, x::Real; h=1e-8) = (f(x+h) - f(x)) / h + nothing # hide + ``` This way of computing the gradient has two disadvantages: 1. It is slow. For a function of ``n`` variables, we need to evaluate the function at least ``n+1`` times to get the whole gradient. 2. It is not precise, as the following example shows. -```@raw html -
-
Exercise: Finite difference approximation
-
-``` - -Fix a point ``x=(-2,-1)``. For a proper discretization of ``h\in [10^{-15}, 10^{-1}]`` compute the finite difference approximation of the partial derivative of ``f`` with respect to the second variable. +!!! warning "Exercise: Finite difference approximation" + Fix a point ``x=(-2,-1)``. For a proper discretization of ``h\in [10^{-15}, 10^{-1}]`` compute the finite difference approximation of the partial derivative of ``f`` with respect to the second variable. -Plot the dependence of this approximation on ``h``. Add the true derivative computed from ```g```. + Plot the dependence of this approximation on ``h``. Add the true derivative computed from ```g```. -```@raw html -
-
-Solution: -
-``` - -To compute the partial derivative with respect to the second argument, we need to fix the first argument and vary only the second one. We create an anonymous function ```y -> f(-2, y)``` and another function ```fin_diff``` which for an input ```h``` computes the finite difference. - -```@example optim -x = [-2; -1] -fin_diff(h) = finite_difference(y -> f(x[1], y), x[2]; h=h) +!!! details "Solution:" + To compute the partial derivative with respect to the second argument, we need to fix the first argument and vary only the second one. We create an anonymous function ```y -> f(-2, y)``` and another function ```fin_diff``` which for an input ```h``` computes the finite difference. -nothing # hide -``` + ```@example optim + x = [-2; -1] + fin_diff(h) = finite_difference(y -> f(x[1], y), x[2]; h=h) -The true gradient is computed by ```g(x)```. It returns a vector of length two. Since we need only the partial derivative with respect to the second component, we select it by adding ```[2]```. + nothing # hide + ``` -```@example optim -true_grad = g(x)[2] + The true gradient is computed by ```g(x)```. It returns a vector of length two. Since we need only the partial derivative with respect to the second component, we select it by adding ```[2]```. -nothing # hide -``` + ```@example optim + true_grad = g(x)[2] -Now we create the discretization of ``h`` in ```hs```. When the orders of magnitude are so different, the logarithmic scale should be used. For this reason, we create a uniform discretization of the interval ``[-15,-1]`` and then use it as an exponent. + nothing # hide + ``` -```@example optim -hs = 10. .^ (-15:0.01:-1) + Now we create the discretization of ``h`` in ```hs```. When the orders of magnitude are so different, the logarithmic scale should be used. For this reason, we create a uniform discretization of the interval ``[-15,-1]`` and then use it as an exponent. -nothing # hide -``` + ```@example optim + hs = 10. .^ (-15:0.01:-1) -There are many possibilities of how to create the plot. Probably the simplest one is to plot the function ```fin_diff``` and then add the true gradient (which does not depend on ``h`` and is, therefore, a horizontal line) via ```hline!```. + nothing # hide + ``` -```@example optim -plot(hs, fin_diff, - xlabel = "h", - ylabel = "Partial gradient wrt y", - label = ["Approximation" "True gradient"], - xscale = :log10, -) + There are many possibilities of how to create the plot. Probably the simplest one is to plot the function ```fin_diff``` and then add the true gradient (which does not depend on ``h`` and is, therefore, a horizontal line) via ```hline!```. -hline!([true_grad]; label = "True gradient") + ```@example optim + plot(hs, fin_diff, + xlabel = "h", + ylabel = "Partial gradient wrt y", + label = ["Approximation" "True gradient"], + xscale = :log10, + ) -savefig("grad2.svg") # hide -``` + hline!([true_grad]; label = "True gradient") -```@raw html -
-``` + savefig("grad2.svg") # hide + ``` ![](grad2.svg) @@ -231,55 +186,41 @@ gives an error already at the fourth valid digit. It is important to realize how Finally, we show how the gradients look like. -```@raw html -
-
Exercise: Direction of gradients
-
-``` - -Reproduce the previous figure with the vector field of derivatives. Therefore, plot the contours of ``f`` and its gradients at a grid of its domain ``[-3,1]\times [-2,1]``. +!!! warning "Exercise: Direction of gradients" + Reproduce the previous figure with the vector field of derivatives. Therefore, plot the contours of ``f`` and its gradients at a grid of its domain ``[-3,1]\times [-2,1]``. -**Hint**: when a plot is updated in a loop, it needs to be saved to a variable ```plt``` and then displayed via ```display(plt)```. -```@raw html -
-
-Solution: -
-``` - -First we reduce the number of grid elements and plot the contour plot. -```@example optim -xs = range(-3, 1, length = 20) -ys = range(-2, 1, length = 20) + **Hint**: when a plot is updated in a loop, it needs to be saved to a variable ```plt``` and then displayed via ```display(plt)```. -plt = contourf(xs, ys, f; - xlims = (minimum(xs), maximum(xs)), - ylims = (minimum(ys), maximum(ys)), - color = :jet -) -``` +!!! details "Solution:" + First we reduce the number of grid elements and plot the contour plot. + ```@example optim + xs = range(-3, 1, length = 20) + ys = range(-2, 1, length = 20) -We use the same functions as before. Since we want to add a line, we use ```plot!``` instead of ```plot```. We specify its parameters in an optional argument ```line = (:arrow, 2, :black)```. These parameters add the pointed arrow, the thickness and the colour of the line. Since we do not want any legend, we use ```label = ""```. Finally, since we want to create a grid, we make a loop over ```xs``` and ```ys```. + plt = contourf(xs, ys, f; + xlims = (minimum(xs), maximum(xs)), + ylims = (minimum(ys), maximum(ys)), + color = :jet + ) + ``` -```@example optim -α = 0.25 -for x1 in xs, x2 in ys - x = [x1; x2] - x_grad = [x x.+α.*g(x)] + We use the same functions as before. Since we want to add a line, we use ```plot!``` instead of ```plot```. We specify its parameters in an optional argument ```line = (:arrow, 2, :black)```. These parameters add the pointed arrow, the thickness and the colour of the line. Since we do not want any legend, we use ```label = ""```. Finally, since we want to create a grid, we make a loop over ```xs``` and ```ys```. - plot!(x_grad[1, :], x_grad[2, :]; - line = (:arrow, 2, :black), - label = "", - ) -end -display(plt) + ```@example optim + α = 0.25 + for x1 in xs, x2 in ys + x = [x1; x2] + x_grad = [x x.+α.*g(x)] -savefig("grad3.svg") # hide -``` + plot!(x_grad[1, :], x_grad[2, :]; + line = (:arrow, 2, :black), + label = "", + ) + end + display(plt) -```@raw html -
-``` + savefig("grad3.svg") # hide + ``` ![](grad3.svg) diff --git a/docs/src/lecture_08/unconstrained.md b/docs/src/lecture_08/unconstrained.md index 291257f81..07a08578e 100644 --- a/docs/src/lecture_08/unconstrained.md +++ b/docs/src/lecture_08/unconstrained.md @@ -23,7 +23,7 @@ f(x) \le f(y) \text{ for all }y\in X. This point is often challenging to find. Sometimes we can find a local minimum, which is a global minimum on some small neighbourhood of ``x``. However, as the following theorem suggests, we often need to lower our requirements even lower. -!!! theorem "Theorem: Connection between optimization problems and gradients" +!!! todo "Theorem: Connection between optimization problems and gradients" Consider a differentiable function ``f`` over ``X=\mathbb{R}^n``. If ``x`` is its local minimum, then ``\nabla f(x)=0``. Conversely, if ``f`` is convex, then every point ``x`` with ``\nabla f(x)=0`` is a global minimum of ``f``. Points with ``\nabla f(x)=0`` are known as stationary points. Optimization algorithms often try to find local minima or stationary points, hoping to minimize the function ``f``. The reason is the following: To optimize ``f``, we can evaluate it only at a limited number of points. Since evaluating ``f`` at a point conveys only information about the function value at this point or its small neighbourhood, we collect only local information about ``f``. Therefore, unless ``f`` has a special structure, it is possible to obtain global results from only local evaluations. @@ -60,42 +60,26 @@ Here ``c\in(0,1)`` is a small constant, usually ``c=10^{-4}``. Since the left-h In this section, we will implement the gradient descent method. -```@raw html -
-
Exercise: Gradient descent
-
-``` - -Implement function `optim`, which takes as inputs function ``f``, its gradient, starting point ``x^0`` and fixed stepsize ``\alpha`` and runs the gradient descent. Its output should be the first 100 iterations. +!!! warning "Exercise: Gradient descent:" + Implement function `optim`, which takes as inputs function ``f``, its gradient, starting point ``x^0`` and fixed stepsize ``\alpha`` and runs the gradient descent. Its output should be the first 100 iterations. -This example is rather artificial because usually only the last iteration is returned and some stopping criterion is employed instead of the fixed number of iterations. We want to get all iterations to make visualizations. - -```@raw html -
-
-Solution: -
-``` + This example is rather artificial because usually only the last iteration is returned and some stopping criterion is employed instead of the fixed number of iterations. We want to get all iterations to make visualizations. -First we need to create an empty array into which we store the data. Then at every iteration we compute the gradient ```g(x)```, perform the update and save the new value of ``x``. +!!! details "Solution:" + First we need to create an empty array into which we store the data. Then at every iteration we compute the gradient ```g(x)```, perform the update and save the new value of ``x``. -```@example optim -function optim(f, g, x, α; max_iter=100) - xs = zeros(length(x), max_iter+1) - xs[:,1] = x - for i in 1:max_iter - x -= α*g(x) - xs[:,i+1] = x + ```@example optim + function optim(f, g, x, α; max_iter=100) + xs = zeros(length(x), max_iter+1) + xs[:,1] = x + for i in 1:max_iter + x -= α*g(x) + xs[:,i+1] = x + end + return xs end - return xs -end -nothing # hide -``` - -```@raw html -

-
-``` + nothing # hide + ``` The implementation does not use the values of ``f`` but only its gradient ``\nabla f``. If the algorithm converges ``x^k \to \bar x``, then passing to the limit in the gradient update results in ``\nabla f(\bar x)=0``. Therefore, as with most optimization methods, gradient descent looks for stationary points. @@ -147,57 +131,42 @@ nothing # hide We now plot how gradient descent behaves. -```@raw html -
-
Exercise: Gradient descent
-
-``` - -Use the implementation of the gradient descent to minimize the function +!!! warning "Exercise: Gradient descent" + Use the implementation of the gradient descent to minimize the function -```math -f(x) = \sin(x_1 + x_2) + \cos(x_1)^2 -``` -from the starting point ``x^0=(0,-1)``. Use the constant stepsize ``\alpha=0.1``. Store all iterations into matrix ```xs```. + ```math + f(x) = \sin(x_1 + x_2) + \cos(x_1)^2 + ``` + from the starting point ``x^0=(0,-1)``. Use the constant stepsize ``\alpha=0.1``. Store all iterations into matrix ```xs```. -Use the `create_anim` function to plot the iteration into a gif file. + Use the `create_anim` function to plot the iteration into a gif file. -Use one line of code to evaluate the function values for all iterations ```xs``` and plot these function values. + Use one line of code to evaluate the function values for all iterations ```xs``` and plot these function values. -**Hint**: to evaluate all ``xs`` in one line, use iterate either via ```eachcol(xs)``` or ```eachrow(xs)```. + **Hint**: to evaluate all ``xs`` in one line, use iterate either via ```eachcol(xs)``` or ```eachrow(xs)```. -```@raw html -
-
-Solution: -
-``` +!!! details "Solution:" + We call ```optim``` from the previous exercise and then create the animation. -We call ```optim``` from the previous exercise and then create the animation. + ```@example optim + x_gd = optim([], g, [0; -1], 0.1) -```@example optim -x_gd = optim([], g, [0; -1], 0.1) + xlims = (-3, 1) + ylims = (-2, 1) + create_anim(f, x_gd, xlims, ylims, "anim1.gif") -xlims = (-3, 1) -ylims = (-2, 1) -create_anim(f, x_gd, xlims, ylims, "anim1.gif") + nothing # hide + ``` -nothing # hide -``` + To plot the function values, we need to iterate over all columns. We use ```[? for x in eachcol(x_gd)]``` and apply ```f(x)``` instead of ```?```. Another (more complicated) way is to iterate over indices instead of vectors and write ```[f(x_gs[:,i]) for i in 1:size(x_gd,2)]```. -To plot the function values, we need to iterate over all columns. We use ```[? for x in eachcol(x_gd)]``` and apply ```f(x)``` instead of ```?```. Another (more complicated) way is to iterate over indices instead of vectors and write ```[f(x_gs[:,i]) for i in 1:size(x_gd,2)]```. + ```@example optim + f_gd = [f(x) for x in eachcol(x_gd)] -```@example optim -f_gd = [f(x) for x in eachcol(x_gd)] + plot(f_gd, label="", xlabel="Iteration", ylabel="Function value") -plot(f_gd, label="", xlabel="Iteration", ylabel="Function value") - -savefig("obj.svg") # hide -``` - -```@raw html -
-``` + savefig("obj.svg") # hide + ``` ![](anim1.gif) ![](obj.svg) @@ -283,65 +252,50 @@ nothing # hide The result is the same as in the previous case. This is not surprising as the code does the same things; it is only written differently. The following exercise shows the power of defining the ```Step``` type. -```@raw html -
-
Exercise: Armijo condition
-
-``` - -Implement the ```Armijo``` subtype of the ```Step``` type. It should have two parameters ```c``` from the definition and ```α_max``` which will be the initial value of ``\alpha``. The value ``\alpha`` should be divided by two until the Armijo condition is satisfied. +!!! warning "Exercise: Armijo condition" + Implement the ```Armijo``` subtype of the ```Step``` type. It should have two parameters ```c``` from the definition and ```α_max``` which will be the initial value of ``\alpha``. The value ``\alpha`` should be divided by two until the Armijo condition is satisfied. -Then run the optimization with the Armijo stepsize selection and plot the animation. - -```@raw html -
-
-Solution: -
-``` - -We define the type in the same way as for ```GD```: - -```@example optim -struct Armijo <: Step - c::Float64 - α_max::Float64 -end -``` + Then run the optimization with the Armijo stepsize selection and plot the animation. -For the search for the stepsize, we first save the values for the function value ``f(x)`` and the gradient ``\nabla f(x)``. If we do not do this, it will be recomputed at every step. Then we initialize the value of ``\alpha`` and run the while loop until the Armijo condition is satisfied. We add a termination condition ```α <= 1e-6``` to prevent the loop from continuing indefinitely. +!!! details "Solution:" + We define the type in the same way as for ```GD```: -```@example optim -function optim_step(s::Armijo, f, g, x) - fun = f(x) - grad = g(x) - α = s.α_max - while f(x .- α*grad) > fun - s.c*α*(grad'*grad) - α /= 2 - if α <= 1e-6 - warning("Armijo line search failed.") - break + ```@example optim + struct Armijo <: Step + c::Float64 + α_max::Float64 + end + ``` + + For the search for the stepsize, we first save the values for the function value ``f(x)`` and the gradient ``\nabla f(x)``. If we do not do this, it will be recomputed at every step. Then we initialize the value of ``\alpha`` and run the while loop until the Armijo condition is satisfied. We add a termination condition ```α <= 1e-6``` to prevent the loop from continuing indefinitely. + + ```@example optim + function optim_step(s::Armijo, f, g, x) + fun = f(x) + grad = g(x) + α = s.α_max + while f(x .- α*grad) > fun - s.c*α*(grad'*grad) + α /= 2 + if α <= 1e-6 + warning("Armijo line search failed.") + break + end end + return -α*grad end - return -α*grad -end -nothing # hide -``` + nothing # hide + ``` -Then we create the ```Armijo``` struct and run the optimization. + Then we create the ```Armijo``` struct and run the optimization. -```@example optim -gd = Armijo(1e-4, 1) -x_opt = optim(f, g, [0;-1], gd) - -create_anim(f, x_opt, xlims, ylims, "anim5.gif") + ```@example optim + gd = Armijo(1e-4, 1) + x_opt = optim(f, g, [0;-1], gd) -nothing # hide -``` + create_anim(f, x_opt, xlims, ylims, "anim5.gif") -```@raw html -
-``` + nothing # hide + ``` ![](anim5.gif) diff --git a/docs/src/lecture_09/exercises.md b/docs/src/lecture_09/exercises.md index 065392c2c..83d435dac 100644 --- a/docs/src/lecture_09/exercises.md +++ b/docs/src/lecture_09/exercises.md @@ -40,188 +40,128 @@ w = log_reg(X, y, zeros(size(X,2))) # [Exercises](@id l8-exercises) -```@raw html -
-
Exercise 1:
-
-``` - -The logistic regression on the iris dataset failed in 6 out of 100 samples. But the visualization shows the failure only in 5 cases. How is it possible? - -```@raw html -
-
-Solution: -
-``` - -We use the `iris_reduced` dataframe and add the column `prediction` to it. - -```@example ex_log -df = iris_reduced -df.prediction = σ.(X*w) .>= 0.5 - -nothing # hide -``` - -Now we show all misclassified samples. - -```@example ex_log -sort(df[df.label .!= df.prediction, :], [:PetalLength, :PetalWidth]) -``` - -A quick look at the image shows that the point ``(4.8,1.8)`` is misclassified, but the image shows it correctly. Let us show all such points. - -```@example ex_log -df[(df.PetalLength .== 4.8) .& (df.PetalWidth .== 1.8), :] -``` - -As we can see, there are three samples with the same data. Two of them have label 1 and one label 0. Since the incorrectly classified sample was redrawn, it was not possible to see it. - -```@raw html -
-``` - -```@raw html -
-
Exercise 2: Disadvantages of the sigmoid function
-
-``` +!!! warning "Exercise 1:" + The logistic regression on the iris dataset failed in 6 out of 100 samples. But the visualization shows the failure only in 5 cases. How is it possible? -Show that Newton's method fails when started from the vector ``(1,2,3)``. Can you guess why it happened? What are the consequences for optimization? Is gradient descent going to suffer from the same problems? +!!! details "Solution:" + We use the `iris_reduced` dataframe and add the column `prediction` to it. -```@raw html -
-
-Solution: -
-``` + ```@example ex_log + df = iris_reduced + df.prediction = σ.(X*w) .>= 0.5 -First, we run the logistic regression as before, only with a different starting point + nothing # hide + ``` -```julia -log_reg(X, y, [1;2;3]) -``` -```julia -ERROR: SingularException(1) -``` + Now we show all misclassified samples. -This resulted in an error (or possibly in NaNs for older versions of Julia). When something fails, it may be a good idea to run a step-by-step analysis. In this case, we will run the first iteration of Newton's method + ```@example ex_log + sort(df[df.label .!= df.prediction, :], [:PetalLength, :PetalWidth]) + ``` -```@repl ex_log -w = [1;2;3]; -X_mult = [row*row' for row in eachrow(X)]; -y_hat = 1 ./(1 .+exp.(-X*w)) -grad = X'*(y_hat.-y) / size(X,1) -hess = y_hat.*(1 .-y_hat).*X_mult |> mean -w -= hess \ grad -``` + A quick look at the image shows that the point ``(4.8,1.8)`` is misclassified, but the image shows it correctly. Let us show all such points. -Starting from the bottom, we can see that even though we started with relatively small ``w``, the next iteration is four degrees of magnitude larger. This happened because the Hessian ```hess``` is much smaller than the gradient ```grad```. This indicates that there is some kind of numerical instability. The prediction ```y_hat``` should lie in the interval ``[0,1]`` but it seems that it is almost always close to 1. Let us verify this by showing the extrema of ```y_hat``` + ```@example ex_log + df[(df.PetalLength .== 4.8) .& (df.PetalWidth .== 1.8), :] + ``` -```@example ex_log -extrema(y_hat) -``` + As we can see, there are three samples with the same data. Two of them have label 1 and one label 0. Since the incorrectly classified sample was redrawn, it was not possible to see it. -They are indeed too large. +!!! warning "Exercise 2: Disadvantages of the sigmoid function" + Show that Newton's method fails when started from the vector ``(1,2,3)``. Can you guess why it happened? What are the consequences for optimization? Is gradient descent going to suffer from the same problems? -Now we explain the reason. We know that the prediction equals to +!!! details "Solution:" + First, we run the logistic regression as before, only with a different starting point -```math -\hat y_i = \sigma(w^\top x_i), -``` + ```julia + log_reg(X, y, [1;2;3]) + ``` + ```julia + ERROR: SingularException(1) + ``` -where ``\sigma`` is the sigmoid function. Since the mimimum from ``w^\top x_i`` + This resulted in an error (or possibly in NaNs for older versions of Julia). When something fails, it may be a good idea to run a step-by-step analysis. In this case, we will run the first iteration of Newton's method -```@example ex_log -minimum(X*[1;2;3]) -``` + ```@repl ex_log + w = [1;2;3]; + X_mult = [row*row' for row in eachrow(X)]; + y_hat = 1 ./(1 .+exp.(-X*w)) + grad = X'*(y_hat.-y) / size(X,1) + hess = y_hat.*(1 .-y_hat).*X_mult |> mean + w -= hess \ grad + ``` -is large, all ``w^\top x_i`` are large. But plotting the sigmoid funtion + Starting from the bottom, we can see that even though we started with relatively small ``w``, the next iteration is four degrees of magnitude larger. This happened because the Hessian ```hess``` is much smaller than the gradient ```grad```. This indicates that there is some kind of numerical instability. The prediction ```y_hat``` should lie in the interval ``[0,1]`` but it seems that it is almost always close to 1. Let us verify this by showing the extrema of ```y_hat``` -```@example ex_log -xs = -10:0.01:10 -plot(xs, σ, label="", ylabel="Sigmoid function") + ```@example ex_log + extrema(y_hat) + ``` -savefig("sigmoid.svg") # hide -``` + They are indeed too large. -![](sigmoid.svg) + Now we explain the reason. We know that the prediction equals to -it is clear that all ``w^\top x_i`` hit the part of the sigmoid which is flat. This means that the derivative is almost zero, and the Hessian is "even smaller" zero. Then the ratio of the gradient and Hessian is huge. + ```math + \hat y_i = \sigma(w^\top x_i), + ``` -The gradient descent will probably run into the same difficulty. Since the gradient will be too small, it will take a huge number of iterations to escape the flat region of the sigmoid. This is a known problem of the sigmoid function. It is also the reason why it was replaced in neural networks by other activation functions. + where ``\sigma`` is the sigmoid function. Since the mimimum from ``w^\top x_i`` -```@raw html -
-``` + ```@example ex_log + minimum(X*[1;2;3]) + ``` -```@raw html -
-
Exercise 3 (theory):
-
-``` + is large, all ``w^\top x_i`` are large. But plotting the sigmoid funtion -Show the details for the derivation of the loss function of the logistic regression. + ```@example ex_log + xs = -10:0.01:10 + plot(xs, σ, label="", ylabel="Sigmoid function") -```@raw html -
-
-Solution: -
-``` + savefig("sigmoid.svg") # hide + ``` -Since ``\hat y`` equals the probability of predicting ``1``, we have + ![](sigmoid.svg) -```math -\hat y = \frac{1}{1+e^{-w^\top x}} -``` + it is clear that all ``w^\top x_i`` hit the part of the sigmoid which is flat. This means that the derivative is almost zero, and the Hessian is "even smaller" zero. Then the ratio of the gradient and Hessian is huge. -Then the cross-entropy loss reduces to + The gradient descent will probably run into the same difficulty. Since the gradient will be too small, it will take a huge number of iterations to escape the flat region of the sigmoid. This is a known problem of the sigmoid function. It is also the reason why it was replaced in neural networks by other activation functions. -```math -\begin{aligned} -\operatorname{loss}(y,\hat y) &= - y\log \hat y - (1-y)\log(1-\hat y) \\ -&= y\log(1+e^{-w^\top x}) - (1-y)\log(e^{-w^\top x}) + (1-y)\log(1+e^{-w^\top x}) \\ -&= \log(1+e^{-w^\top x}) + (1-y)w^\top x. -\end{aligned} -``` +!!! warning "Exercise 3 (theory)" + Show the details for the derivation of the loss function of the logistic regression. -Then it remains to sum this term over all samples. +!!! details "Solution:" + Since ``\hat y`` equals the probability of predicting ``1``, we have -```@raw html -
-``` + ```math + \hat y = \frac{1}{1+e^{-w^\top x}} + ``` -```@raw html -
-
Exercise 4 (theory):
-
-``` + Then the cross-entropy loss reduces to -Show that if the Newton's method converged for the logistic regression, then it found a point globally minimizing the logistic loss. + ```math + \begin{aligned} + \operatorname{loss}(y,\hat y) &= - y\log \hat y - (1-y)\log(1-\hat y) \\ + &= y\log(1+e^{-w^\top x}) - (1-y)\log(e^{-w^\top x}) + (1-y)\log(1+e^{-w^\top x}) \\ + &= \log(1+e^{-w^\top x}) + (1-y)w^\top x. + \end{aligned} + ``` -```@raw html -
-
-Solution: -
-``` + Then it remains to sum this term over all samples. -We derived that the Hessian of the objective function for logistic regression is +!!! warning "Exercise 4 (theory)" + Show that if the Newton's method converged for the logistic regression, then it found a point globally minimizing the logistic loss. -```math -\nabla^2 L(w) = \frac 1n \sum_{i=1}^n\hat y_i(1-\hat y_i)x_i x_i^\top. -``` +!!! details "Solution:" + We derived that the Hessian of the objective function for logistic regression is -For any vector ``a``, we have + ```math + \nabla^2 L(w) = \frac 1n \sum_{i=1}^n\hat y_i(1-\hat y_i)x_i x_i^\top. + ``` -```math -a^\top x_i x_i^\top a = (x_i^\top a)^\top (x_i^\top a) = \|x_i^\top a\|^2 \ge 0, -``` + For any vector ``a``, we have -which implies that ``x_i x_i^\top`` is a positive semidefinite matrix (it is known as rank-1 matrix as its rank is always 1 if ``x_i`` is a non-zero vector). Since ``y_i(1-\hat y_i)\ge 0``, it follows that ``\nabla^2 L(w)`` is a positive semidefinite matrix. If a Hessian of a function is positive semidefinite everywhere, the function is immediately convex. Since Newton's method found a stationary point, this points is a global minimum. + ```math + a^\top x_i x_i^\top a = (x_i^\top a)^\top (x_i^\top a) = \|x_i^\top a\|^2 \ge 0, + ``` -```@raw html -
-``` \ No newline at end of file + which implies that ``x_i x_i^\top`` is a positive semidefinite matrix (it is known as rank-1 matrix as its rank is always 1 if ``x_i`` is a non-zero vector). Since ``y_i(1-\hat y_i)\ge 0``, it follows that ``\nabla^2 L(w)`` is a positive semidefinite matrix. If a Hessian of a function is positive semidefinite everywhere, the function is immediately convex. Since Newton's method found a stationary point, this points is a global minimum. \ No newline at end of file diff --git a/docs/src/lecture_09/linear.md b/docs/src/lecture_09/linear.md index 37a6cde47..4a57148e8 100644 --- a/docs/src/lecture_09/linear.md +++ b/docs/src/lecture_09/linear.md @@ -76,51 +76,36 @@ Printing the first five entries of the data shows that they are saved in DataFra When designing a classification method, a good practice is to perform at least a basic analysis of the data. That may include checking for NaNs, infinite values, obvious errors, standard deviations of features or others. Here, we only plot the data. -```@raw html -
-
Exercise:
-
-``` +!!! warning "Exercise:" + We will simplify the goal and estimate the dependence of petal width on petal length. Create the data ``X`` (do not forget to add the bias) and the labels ``y``. -We will simplify the goal and estimate the dependence of petal width on petal length. Create the data ``X`` (do not forget to add the bias) and the labels ``y``. + Make a graph of the dependence of petal width on petal length. -Make a graph of the dependence of petal width on petal length. +!!! details "Solution:" + Since the petal length and width are the third and fourth columns, we assign them to ```X``` and ```y```, respectively. We can use ```iris[:, 4]``` or ```iris[:, :PetalWidth]``` instead of ```iris.PetalWidth```, but the first possibility is vulnerable to errors. We need to concatenate ```X``` it with a vector of ones to add the bias. -```@raw html -
-
-Solution: -
-``` + ```@example linear + y = iris.PetalWidth + X = hcat(iris.PetalLength, ones(length(y))) -Since the petal length and width are the third and fourth columns, we assign them to ```X``` and ```y```, respectively. We can use ```iris[:, 4]``` or ```iris[:, :PetalWidth]``` instead of ```iris.PetalWidth```, but the first possibility is vulnerable to errors. We need to concatenate ```X``` it with a vector of ones to add the bias. + nothing # hide + ``` -```@example linear -y = iris.PetalWidth -X = hcat(iris.PetalLength, ones(length(y))) + The best visualization is by the scatter plot. We use the version from the `StatsPlots` package but the one from the `Plots` package would be naturally sufficient. -nothing # hide -``` + ```@example linear + @df iris scatter( + :PetalLength, + :PetalWidth; + label="", + xlabel = "Petal length", + ylabel = "Petal width" + ) -The best visualization is by the scatter plot. We use the version from the `StatsPlots` package but the one from the `Plots` package would be naturally sufficient. + savefig("iris_lin1.svg") # hide -```@example linear -@df iris scatter( - :PetalLength, - :PetalWidth; - label="", - xlabel = "Petal length", - ylabel = "Petal width" -) - -savefig("iris_lin1.svg") # hide - -nothing # hide -``` - -```@raw html -
-``` + nothing # hide + ``` ![](iris_lin1.svg) @@ -129,51 +114,36 @@ The figure shows a positive correlation between length and width. This is natura ## Training the classifier -```@raw html -
-
Exercise:
-
-``` - -Use the closed-form formula to get the coefficients ``w`` for the linear regression. Then use the ```optim``` method derived in the previous lecture to solve the optimization problem via gradient descent. The results should be identical. - -```@raw html -
-
-Solution: -
-``` - -The closed-form expression is ``(X^\top X)^{-1}X^\top y``. In the [exercises](@ref l7-exercises) to the previous lecture, we explained that writing ```(X'*X) \ (X'*y)``` is better than `inv(X'*X)*X'*y` because the former does not compute the matrix inverse. As a side-note, can you guess the difference between `inv(X'*X)*X'*y` and `inv(X'*X)*(X'*y)`? +!!! warning "Exercise:" + Use the closed-form formula to get the coefficients ``w`` for the linear regression. Then use the ```optim``` method derived in the previous lecture to solve the optimization problem via gradient descent. The results should be identical. -```@example linear -w = (X'*X) \ (X'*y) +!!! details "Solution:" + The closed-form expression is ``(X^\top X)^{-1}X^\top y``. In the [exercises](@ref l7-exercises) to the previous lecture, we explained that writing ```(X'*X) \ (X'*y)``` is better than `inv(X'*X)*X'*y` because the former does not compute the matrix inverse. As a side-note, can you guess the difference between `inv(X'*X)*X'*y` and `inv(X'*X)*(X'*y)`? -nothing # hide -``` + ```@example linear + w = (X'*X) \ (X'*y) -For the gradient descent, we first realize that the formula for the derivate is ``X^\top (Xw-y)``. Defining the derivative function in ```g```, we call the ```optim``` method in the same way as in the last lecture. Since we use the sum and not mean in the objective, we need to use a much smaller stepsize. + nothing # hide + ``` -```@example linear -g(w) = X'*(X*w-y) -w2 = optim([], g, zeros(size(X,2)), GD(1e-4); max_iter=10000) + For the gradient descent, we first realize that the formula for the derivate is ``X^\top (Xw-y)``. Defining the derivative function in ```g```, we call the ```optim``` method in the same way as in the last lecture. Since we use the sum and not mean in the objective, we need to use a much smaller stepsize. -nothing # hide -``` + ```@example linear + g(w) = X'*(X*w-y) + w2 = optim([], g, zeros(size(X,2)), GD(1e-4); max_iter=10000) -The difference between the solutions is + nothing # hide + ``` -```@example linear -using LinearAlgebra + The difference between the solutions is -norm(w-w2) -``` + ```@example linear + using LinearAlgebra -which is acceptable. + norm(w-w2) + ``` -```@raw html -
-``` + which is acceptable. The correct solution is @@ -183,56 +153,41 @@ w # hide Now we can estimate the petal width if only petal length is known. -```@raw html -
-
Exercise:
-
-``` +!!! warning "Exercise:" + Write the dependence on the petal width on the petal length. Plot it in the previous graph. -Write the dependence on the petal width on the petal length. Plot it in the previous graph. +!!! details "Solution:" + The desired dependence is -```@raw html -
-
-Solution: -
-``` + ```math + \text{width} \approx -0.36 + 0.42*\text{length}. + ``` -The desired dependence is + Before plotting the prediction, we save it into ```f_pred```. -```math -\text{width} \approx -0.36 + 0.42*\text{length}. -``` + ```@example linear + f_pred(x::Real, w) = w[1]*x + w[2] -Before plotting the prediction, we save it into ```f_pred```. + nothing # hide + ``` -```@example linear -f_pred(x::Real, w) = w[1]*x + w[2] - -nothing # hide -``` - -Then we create the limits ```x_lim``` and finally plot the prediction function. + Then we create the limits ```x_lim``` and finally plot the prediction function. -```@example linear -x_lims = extrema(iris.PetalLength) .+ [-0.1, 0.1] - -@df iris scatter( - :PetalLength, - :PetalWidth; - xlabel = "Petal length", - ylabel = "Petal width", - label = "", - legend = :topleft, -) + ```@example linear + x_lims = extrema(iris.PetalLength) .+ [-0.1, 0.1] -plot!(x_lims, x -> f_pred(x,w); label = "Prediction", line = (:black,3)) + @df iris scatter( + :PetalLength, + :PetalWidth; + xlabel = "Petal length", + ylabel = "Petal width", + label = "", + legend = :topleft, + ) -savefig("iris_lin2.svg") # hide -``` + plot!(x_lims, x -> f_pred(x,w); label = "Prediction", line = (:black,3)) -```@raw html -
-``` + savefig("iris_lin2.svg") # hide + ``` ![](iris_lin2.svg) diff --git a/docs/src/lecture_09/logistic.md b/docs/src/lecture_09/logistic.md index fa22146f8..d06ce3614 100644 --- a/docs/src/lecture_09/logistic.md +++ b/docs/src/lecture_09/logistic.md @@ -103,69 +103,54 @@ nothing # hide The data contain three classes. However, we considered only binary problems with two classes. We therefore cheat. -```@raw html -
-
Exercise:
-
-``` - -Create the `iris_reduced` dataframe in the following way: -- Label "setosa" will be deleted. -- Label "versicolor" will be the negative class. -- Label "virginica" will be the positive class. -- Add the `intercept` column with ones as entries. -For the features, consider only petal length and petal width. - -**Hint**: Use the `Query` package or do it manually via the `!insertcols` function. - -```@raw html -
-
-Solution: -
-``` - -The modification of the dataframe can be by the `Query` package. - -```@example logistic -using Query - -iris_reduced = @from i in iris begin - @where i.Species != "setosa" - @select { - i.PetalLength, - i.PetalWidth, - intercept = 1, - i.Species, - label = i.Species == "virginica", - } - @collect DataFrame - end - -nothing # hide -``` +!!! warning "Exercise:" + Create the `iris_reduced` dataframe in the following way: + - Label "setosa" will be deleted. + - Label "versicolor" will be the negative class. + - Label "virginica" will be the positive class. + - Add the `intercept` column with ones as entries. + For the features, consider only petal length and petal width. + + **Hint**: Use the `Query` package or do it manually via the `!insertcols` function. + +!!! details "Solution:" + The modification of the dataframe can be by the `Query` package. + + ```@example logistic + using Query + + iris_reduced = @from i in iris begin + @where i.Species != "setosa" + @select { + i.PetalLength, + i.PetalWidth, + intercept = 1, + i.Species, + label = i.Species == "virginica", + } + @collect DataFrame + end -We can also perform this procedure manually. + nothing # hide + ``` -```@example logistic -iris_reduced2 = iris[iris.Species .!= "setosa", :] -iris_reduced2 = iris_reduced2[:,[3;4;5]] + We can also perform this procedure manually. -insertcols!(iris_reduced2, 3, :intercept => 1) -insertcols!(iris_reduced2, 5, :label => iris_reduced2.Species .== "virginica") + ```@example logistic + iris_reduced2 = iris[iris.Species .!= "setosa", :] + iris_reduced2 = iris_reduced2[:,[3;4;5]] -nothing # hide -``` + insertcols!(iris_reduced2, 3, :intercept => 1) + insertcols!(iris_reduced2, 5, :label => iris_reduced2.Species .== "virginica") -We can check that both approaches give the same result. + nothing # hide + ``` -```@repl logistic -isequal(iris_reduced, iris_reduced2) -``` + We can check that both approaches give the same result. -```@raw html -
-``` + ```@repl logistic + isequal(iris_reduced, iris_reduced2) + ``` Now we extract the data ```X``` and labels ```y```. Since ```iris_reduced``` is a DataFrame, we need to convert it first into a ```Matrix```. The matrix `X` is formed by the petal length, width and the intercept. @@ -178,41 +163,26 @@ nothing # hide We again plot the data. Since we are interested in a different prediction than last time, we will plot them differently. -```@raw html -
-
Exercise:
-
-``` - -Since ```X``` has two features (columns), it is simple to visualize. Use scatter plot to show the data. Use different colours for different classes. Try to produce a nice graph by including names of classes and axis labels (petal length and petal width). +!!! warning "Exercise:" + Since ```X``` has two features (columns), it is simple to visualize. Use scatter plot to show the data. Use different colours for different classes. Try to produce a nice graph by including names of classes and axis labels (petal length and petal width). -```@raw html -
-
-Solution: -
-``` +!!! details "Solution:" + We make use of the ```iris_reduced``` variable. To plot the points in different colours, we use the keyword ```group = :Species```. -We make use of the ```iris_reduced``` variable. To plot the points in different colours, we use the keyword ```group = :Species```. + ```@example logistic + using Plots -```@example logistic -using Plots + @df iris_reduced scatter( + :PetalLength, + :PetalWidth; + group = :Species, + xlabel = "Petal length", + ylabel = "Petal width", + legend = :topleft, + ) -@df iris_reduced scatter( - :PetalLength, - :PetalWidth; - group = :Species, - xlabel = "Petal length", - ylabel = "Petal width", - legend = :topleft, -) - -savefig("iris1.svg") # hide -``` - -```@raw html -
-``` + savefig("iris1.svg") # hide + ``` ![](iris1.svg) @@ -220,57 +190,42 @@ We see that the classes are almost perfectly separable. It would not be difficul ## Training the classifier -```@raw html -
-
Exercise:
-
-``` +!!! warning "Exercise:" + Write a function ```log_reg``` which takes as an input the dataset, the labels and the initial point. It should use Newton's method to find the optimal weights ``w``. Print the results when started from zero. -Write a function ```log_reg``` which takes as an input the dataset, the labels and the initial point. It should use Newton's method to find the optimal weights ``w``. Print the results when started from zero. + It would be possible to use the code ```optim(f, g, x, s::Step)``` from the previous lecture and define only the step function ```s``` for the Newton's method. However, sometimes it may be better to write simple functions separately instead of using more complex machinery. -It would be possible to use the code ```optim(f, g, x, s::Step)``` from the previous lecture and define only the step function ```s``` for the Newton's method. However, sometimes it may be better to write simple functions separately instead of using more complex machinery. +!!! details "Solution:" + To write the desired function, we need to implement the gradient and Hessian from derived in the theoretical lecture. First, we define the sigmoid function in `σ`. Then we need to create ``\hat y``. We may use for loop notation ```[σ(w'*x) for x in eachrow(X)]```. However, in this case, it is simpler to use matrix operations ```σ.(X*w)``` to get the same result. The gradient can be written in the same way. Again, we use matrix notation. For the Hessian, we first create ```X_mult = [row*row' for row in eachrow(X)]``` which computes all products ``x_ix_i^\top``. This creates an array of length ``100``; each element of this array is a ``2\times 2`` matrix. Since it is an array, we may multiply it by ```y_hat.*(1 .-y_hat)```. As ```mean``` from the ```Statistics``` package operates on any array, we can call it (or similarly ```sum```). We may use ```mean(???)``` but we find the alternative ```??? |> mean``` more readable in this case. We use ```hess \ grad```, as explained in the previous lecture for Newton's method, to update the weights. -```@raw html -
-
-Solution: -
-``` + ```@example logistic + using Statistics -To write the desired function, we need to implement the gradient and Hessian from derived in the theoretical lecture. First, we define the sigmoid function in `σ`. Then we need to create ``\hat y``. We may use for loop notation ```[σ(w'*x) for x in eachrow(X)]```. However, in this case, it is simpler to use matrix operations ```σ.(X*w)``` to get the same result. The gradient can be written in the same way. Again, we use matrix notation. For the Hessian, we first create ```X_mult = [row*row' for row in eachrow(X)]``` which computes all products ``x_ix_i^\top``. This creates an array of length ``100``; each element of this array is a ``2\times 2`` matrix. Since it is an array, we may multiply it by ```y_hat.*(1 .-y_hat)```. As ```mean``` from the ```Statistics``` package operates on any array, we can call it (or similarly ```sum```). We may use ```mean(???)``` but we find the alternative ```??? |> mean``` more readable in this case. We use ```hess \ grad```, as explained in the previous lecture for Newton's method, to update the weights. + σ(z) = 1/(1+exp(-z)) -```@example logistic -using Statistics - -σ(z) = 1/(1+exp(-z)) - -function log_reg(X, y, w; max_iter=100, tol=1e-6) - X_mult = [row*row' for row in eachrow(X)] - for i in 1:max_iter - y_hat = σ.(X*w) - grad = X'*(y_hat.-y) / size(X,1) - hess = y_hat.*(1 .-y_hat).*X_mult |> mean - w -= hess \ grad + function log_reg(X, y, w; max_iter=100, tol=1e-6) + X_mult = [row*row' for row in eachrow(X)] + for i in 1:max_iter + y_hat = σ.(X*w) + grad = X'*(y_hat.-y) / size(X,1) + hess = y_hat.*(1 .-y_hat).*X_mult |> mean + w -= hess \ grad + end + return w end - return w -end -nothing # hide -``` - -The definition of ```X_mult``` should be outside the for loop, as it needs to be computed only once. + nothing # hide + ``` -After the tough work, it remains to call it. + The definition of ```X_mult``` should be outside the for loop, as it needs to be computed only once. -```@example logistic -w = log_reg(X, y, zeros(size(X,2))) + After the tough work, it remains to call it. -nothing # hide -``` + ```@example logistic + w = log_reg(X, y, zeros(size(X,2))) -```@raw html -
-``` + nothing # hide + ``` The correct solution is ```@example logistic @@ -333,36 +288,21 @@ equals to zero, we found a stationary point. It can be shown that logistic regre The picture shows that there are misclassified samples. The next exercise analyses them. -```@raw html -
-
Exercise:
-
-``` - -Compute how many samples were correctly and incorrectly classified. +!!! warning "Exercise:" + Compute how many samples were correctly and incorrectly classified. -```@raw html -
-
-Solution: -
-``` +!!! details "Solution:" + Since ``\hat y_i`` is the probability that a sample is of the positive class, we will predict that it is positive if this probability is greater than ``\frac 12``. Then it suffices to compare the predictions ```pred``` with the correct labels ```y```. -Since ``\hat y_i`` is the probability that a sample is of the positive class, we will predict that it is positive if this probability is greater than ``\frac 12``. Then it suffices to compare the predictions ```pred``` with the correct labels ```y```. + ```@example logistic + pred = y_hat .>= 0.5 + "Correct number of predictions: " * string(sum(pred .== y)) + "Wrong number of predictions: " * string(sum(pred .!= y)) -```@example logistic -pred = y_hat .>= 0.5 -"Correct number of predictions: " * string(sum(pred .== y)) -"Wrong number of predictions: " * string(sum(pred .!= y)) - -nothing # hide -``` - -There is an alternative (but equivalent way). Since the separating hyperplane has form ``w^\top x``, we predict that a sample is positive whenever ``w^\top x\ge 0``. Write arguments on why these two approaches are equivalent. + nothing # hide + ``` -```@raw html -
-``` + There is an alternative (but equivalent way). Since the separating hyperplane has form ``w^\top x``, we predict that a sample is positive whenever ``w^\top x\ge 0``. Write arguments on why these two approaches are equivalent. The correct answer is diff --git a/docs/src/lecture_09/theory.md b/docs/src/lecture_09/theory.md index 245a1b073..a7c6ed48f 100644 --- a/docs/src/lecture_09/theory.md +++ b/docs/src/lecture_09/theory.md @@ -69,7 +69,7 @@ while non-linear predictions are considered in the following lecture. That means that if we add ``1`` to each sample ``x_i``, it is sufficient to consider the classifier in the form ``w^\top x`` without the bias (shift, intercept) ``b``. This allows for simpler implementation. -!!! bonus "BONUS: Data transformation" +!!! compat "BONUS: Data transformation" Linear models have many advantages, such as simplicity or guaranteed convergence for optimization methods. Sometimes it is possible to transform non-linear dependences into linear ones. For example, the body-mass index ```math diff --git a/docs/src/lecture_10/exercises.md b/docs/src/lecture_10/exercises.md index 3160e4e98..43f2d6339 100644 --- a/docs/src/lecture_10/exercises.md +++ b/docs/src/lecture_10/exercises.md @@ -108,71 +108,56 @@ y = iris.Species # [Exercises](@id l9-exercises) -```@raw html -
-
Exercise 1: Keyword arguments
-
-``` +!!! warning "Exercise 1: Keyword arguments" + Keyword arguments (often denoted as `kwargs` but any name may be used) specify additional arguments which do not need to be used when the function is called. We recall the `prepare_data` function written earlier. -Keyword arguments (often denoted as `kwargs` but any name may be used) specify additional arguments which do not need to be used when the function is called. We recall the `prepare_data` function written earlier. + ```@example nn + function prepare_data(X, y; do_normal=true, do_onehot=true, kwargs...) + X_train, y_train, X_test, y_test = split(X, y; kwargs...) -```@example nn -function prepare_data(X, y; do_normal=true, do_onehot=true, kwargs...) - X_train, y_train, X_test, y_test = split(X, y; kwargs...) + if do_normal + X_train, X_test = normalize(X_train, X_test; kwargs...) + end - if do_normal - X_train, X_test = normalize(X_train, X_test; kwargs...) - end + classes = unique(y) - classes = unique(y) + if do_onehot + y_train = onehot(y_train, classes) + y_test = onehot(y_test, classes) + end - if do_onehot - y_train = onehot(y_train, classes) - y_test = onehot(y_test, classes) + return X_train, y_train, X_test, y_test, classes end + nothing # hide + ``` - return X_train, y_train, X_test, y_test, classes -end -nothing # hide -``` + All keyword arguments `kwargs` will be passed to the `split` and `normalize` functions. The benefit is that we do not need to specify the keyword arguments for `split` in `prepare_data`. -All keyword arguments `kwargs` will be passed to the `split` and `normalize` functions. The benefit is that we do not need to specify the keyword arguments for `split` in `prepare_data`. + Recall that `split` takes `ratio_split` as an optional argument. Write a one-line function ```ratio_train``` which gets the training and testing sets and computes the ratio of samples in the training set. Then call the `prepare_data` with: + - no normalization and the default split ratio; + - normalization and the split ratio of 50/50; -Recall that `split` takes `ratio_split` as an optional argument. Write a one-line function ```ratio_train``` which gets the training and testing sets and computes the ratio of samples in the training set. Then call the `prepare_data` with: -- no normalization and the default split ratio; -- normalization and the split ratio of 50/50; +!!! details "Solution:" + The ```ratio_train``` function reads: -```@raw html -
-
-Solution: -
-``` + ```@example nn + ratio_train(X_train, X_test) = size(X_train, 2) / (size(X_train,2) + size(X_test,2)) + nothing # hide + ``` -The ```ratio_train``` function reads: + The first case uses the default ratio; hence we do not pass `ratio_split`. Since we do not want to use normalization, we need to pass `do_normal=false`. -```@example nn -ratio_train(X_train, X_test) = size(X_train, 2) / (size(X_train,2) + size(X_test,2)) -nothing # hide -``` - -The first case uses the default ratio; hence we do not pass `ratio_split`. Since we do not want to use normalization, we need to pass `do_normal=false`. + ```@example nn + X_train, y_train, X_test, y_test, classes = prepare_data(X', y; dims=2, do_normal=false) + println("Ratio train/test = ", ratio_train(X_train, X_test)) + ``` -```@example nn -X_train, y_train, X_test, y_test, classes = prepare_data(X', y; dims=2, do_normal=false) -println("Ratio train/test = ", ratio_train(X_train, X_test)) -``` + The second case behaves the other way round. We use the default normalization; thus, we do not need to specify `do_normal=true` (even though it may be a good idea). We need to pass `ratio_train=0.5`. -The second case behaves the other way round. We use the default normalization; thus, we do not need to specify `do_normal=true` (even though it may be a good idea). We need to pass `ratio_train=0.5`. - -```@example nn -X_train, y_train, X_test, y_test, classes = prepare_data(X', y; dims=2, ratio_train=0.5) -println("Ratio train/test = ", ratio_train(X_train, X_test)) -``` - -```@raw html -
-``` + ```@example nn + X_train, y_train, X_test, y_test, classes = prepare_data(X', y; dims=2, ratio_train=0.5) + println("Ratio train/test = ", ratio_train(X_train, X_test)) + ``` The goal of the following exercise is to show the prediction function graphically. For this reason, we will consider only two features. All the following exercises use the data with the fixed seed for reproducibility. @@ -183,263 +168,203 @@ X_train, y_train, X_test, y_test, classes = prepare_data(X[:,3:4]', y; dims = 2) nothing # hide ``` -```@raw html -
-
Exercise 2: Showing the contours
-
-``` +!!! warning "Exercise 2: Showing the contours" + Use the same training procedure for 1000 iterations to train the classifier with the new data. Then plot a graph depicting which classes are predicted at subregions of ``[-2,2]\times [-2,2]``. Moreover, depict the testing data in this graph. -Use the same training procedure for 1000 iterations to train the classifier with the new data. Then plot a graph depicting which classes are predicted at subregions of ``[-2,2]\times [-2,2]``. Moreover, depict the testing data in this graph. + **Hint**: use the `heatmap` function. -**Hint**: use the `heatmap` function. +!!! details "Solution:" + The procedure for training the network is the same as during the lecture. -```@raw html -
-
-Solution: -
-``` + ```@example nn + m = SimpleNet(size(X_train,1), 5, size(y_train,1)) -The procedure for training the network is the same as during the lecture. + α = 1e-1 + max_iter = 1000 + for iter in 1:max_iter + grad_all = [grad(m, X_train[:,k], y_train[:,k]) for k in 1:size(X_train,2)] + grad_mean = mean_tuple(grad_all) -```@example nn -m = SimpleNet(size(X_train,1), 5, size(y_train,1)) - -α = 1e-1 -max_iter = 1000 -for iter in 1:max_iter - grad_all = [grad(m, X_train[:,k], y_train[:,k]) for k in 1:size(X_train,2)] - grad_mean = mean_tuple(grad_all) - - m.W1 .-= α*grad_mean[2] - m.b1 .-= α*grad_mean[3] - m.W2 .-= α*grad_mean[4] - m.b2 .-= α*grad_mean[5] -end + m.W1 .-= α*grad_mean[2] + m.b1 .-= α*grad_mean[3] + m.W2 .-= α*grad_mean[4] + m.b2 .-= α*grad_mean[5] + end -nothing # hide -``` + nothing # hide + ``` -The prediction function is `m([x;y])`. Since this creates a one-hot representation, we need to convert it into a one-cold representation. However, it is not possible to use `onecold(m([x; y]), classes)`, which would result in one of the three string labels. We need to use `onecold(m([x; y]), 1:3)` to convert it to a real number. Then we call the `heatmap` function. Since we will later use plotting in a loop, we assign the graph to `plt`. + The prediction function is `m([x;y])`. Since this creates a one-hot representation, we need to convert it into a one-cold representation. However, it is not possible to use `onecold(m([x; y]), classes)`, which would result in one of the three string labels. We need to use `onecold(m([x; y]), 1:3)` to convert it to a real number. Then we call the `heatmap` function. Since we will later use plotting in a loop, we assign the graph to `plt`. -```@example nn -colours = [:blue, :red, :green] - -xs = -2:0.01:2 -plt = heatmap(xs, xs, (x, y) -> onecold(m([x; y]), 1:3)[1]; - color = colours, - opacity = 0.2, - axis = false, - ticks = false, - cbar = false, - legend = :topleft, -) + ```@example nn + colours = [:blue, :red, :green] -nothing # hide -``` + xs = -2:0.01:2 + plt = heatmap(xs, xs, (x, y) -> onecold(m([x; y]), 1:3)[1]; + color = colours, + opacity = 0.2, + axis = false, + ticks = false, + cbar = false, + legend = :topleft, + ) -To add the predictions of the testing set, we find the indices `inds` of samples from each class. Then we add them via the `scatter!` plot. We keep `colours` from the previous part to have the same colours. Since we plotted in a loop, we need to `display` the plot. + nothing # hide + ``` -```@example nn -for (i, class) in enumerate(classes) - inds = findall(onecold(y_test, classes) .== class) - scatter!(plt, X_test[1, inds], X_test[2, inds]; - label = class, - marker=(8, 0.8, colours[i]), - ) -end -display(plt) + To add the predictions of the testing set, we find the indices `inds` of samples from each class. Then we add them via the `scatter!` plot. We keep `colours` from the previous part to have the same colours. Since we plotted in a loop, we need to `display` the plot. -savefig("Separation.svg") # hide -``` + ```@example nn + for (i, class) in enumerate(classes) + inds = findall(onecold(y_test, classes) .== class) + scatter!(plt, X_test[1, inds], X_test[2, inds]; + label = class, + marker=(8, 0.8, colours[i]), + ) + end + display(plt) -```@raw html -
-``` + savefig("Separation.svg") # hide + ``` ![](Separation.svg) -```@raw html -
-
Exercise 3: Overfitting
-
-``` +!!! warning "Exercise 3: Overfitting" + This exercise shows the well-known effect of overfitting. Since the model sees only the training set, it may fit it too perfectly (overfit it) and generalize poorly to the testing set of unseen examples. -This exercise shows the well-known effect of overfitting. Since the model sees only the training set, it may fit it too perfectly (overfit it) and generalize poorly to the testing set of unseen examples. + Consider the same data as in the previous exercise but train a network with 25 hidden neurons for 25000 iterations. Plot the loss function values on the training and testing sets. Then plot the same prediction visualization as in the previous exercise for both testing and training sets. Describe what went wrong. -Consider the same data as in the previous exercise but train a network with 25 hidden neurons for 25000 iterations. Plot the loss function values on the training and testing sets. Then plot the same prediction visualization as in the previous exercise for both testing and training sets. Describe what went wrong. +!!! details "Solution:" + We first specify the loss function. -```@raw html -
-
-Solution: -
-``` + ```@example nn + loss(X, y; ϵ = 1e-10) = mean(-sum(y .* log.(m(X) .+ ϵ); dims = 1)) + nothing # hide + ``` -We first specify the loss function. + Then we train the network as before. The only change is that we need to save the training and testing objective. -```@example nn -loss(X, y; ϵ = 1e-10) = mean(-sum(y .* log.(m(X) .+ ϵ); dims = 1)) -nothing # hide -``` + ```@example nn + m = SimpleNet(size(X_train,1), 25, size(y_train,1)) -Then we train the network as before. The only change is that we need to save the training and testing objective. + α = 1e-1 + max_iter = 25000 + L_train = zeros(max_iter) + L_test = zeros(max_iter) + for iter in 1:max_iter + grad_all = [grad(m, X_train[:,k], y_train[:,k]) for k in 1:size(X_train,2)] + grad_mean = mean_tuple(grad_all) + + m.W1 .-= α*grad_mean[2] + m.b1 .-= α*grad_mean[3] + m.W2 .-= α*grad_mean[4] + m.b2 .-= α*grad_mean[5] -```@example nn -m = SimpleNet(size(X_train,1), 25, size(y_train,1)) - -α = 1e-1 -max_iter = 25000 -L_train = zeros(max_iter) -L_test = zeros(max_iter) -for iter in 1:max_iter - grad_all = [grad(m, X_train[:,k], y_train[:,k]) for k in 1:size(X_train,2)] - grad_mean = mean_tuple(grad_all) - - m.W1 .-= α*grad_mean[2] - m.b1 .-= α*grad_mean[3] - m.W2 .-= α*grad_mean[4] - m.b2 .-= α*grad_mean[5] - - L_train[iter] = loss(X_train, y_train) - L_test[iter] = loss(X_test, y_test) -end -``` + L_train[iter] = loss(X_train, y_train) + L_test[iter] = loss(X_test, y_test) + end + ``` -Then we plot it. We ignore the first nine iterations, where the loss is large there. We see the classical procedure of overfitting. While the loss function on the training set decreases steadily, on the testing set, it decreases first, and after approximately 100 iterations, it starts increasing. This behaviour may be prevented by several techniques, which we discuss in the following lecture. + Then we plot it. We ignore the first nine iterations, where the loss is large there. We see the classical procedure of overfitting. While the loss function on the training set decreases steadily, on the testing set, it decreases first, and after approximately 100 iterations, it starts increasing. This behaviour may be prevented by several techniques, which we discuss in the following lecture. -```@example nn -plot(L_train[10:end], xlabel="Iteration", label="Training loss", legend=:topleft) -plot!(L_test[10:end], label="Testing loss") + ```@example nn + plot(L_train[10:end], xlabel="Iteration", label="Training loss", legend=:topleft) + plot!(L_test[10:end], label="Testing loss") -savefig("Train_test.svg") # hide -``` + savefig("Train_test.svg") # hide + ``` -![](Train_test.svg) + ![](Train_test.svg) -We create the contour plot in the same way as in the previous exercise. + We create the contour plot in the same way as in the previous exercise. -```@example nn -plt = heatmap(xs, xs, (x, y) -> onecold(m([x; y]), 1:3)[1]; - color = colours, - opacity = 0.2, - axis = false, - ticks = false, - cbar = false, - legend = :topleft, -) - -for (i, class) in enumerate(classes) - inds = findall(onecold(y_test, classes) .== class) - scatter!(plt, X_test[1, inds], X_test[2, inds]; - label = class, - marker=(8, 0.8, colours[i]), + ```@example nn + plt = heatmap(xs, xs, (x, y) -> onecold(m([x; y]), 1:3)[1]; + color = colours, + opacity = 0.2, + axis = false, + ticks = false, + cbar = false, + legend = :topleft, ) -end -display(plt) -savefig("Separation2.svg") # hide -``` + for (i, class) in enumerate(classes) + inds = findall(onecold(y_test, classes) .== class) + scatter!(plt, X_test[1, inds], X_test[2, inds]; + label = class, + marker=(8, 0.8, colours[i]), + ) + end + display(plt) -![](Separation2.svg) + savefig("Separation2.svg") # hide + ``` -```@example nn -plt = heatmap(xs, xs, (x, y) -> onecold(m([x; y]), 1:3)[1]; - color = colours, - opacity = 0.2, - axis = false, - ticks = false, - cbar = false, - legend = :topleft, -) - -for (i, class) in enumerate(classes) - inds = findall(onecold(y_train, classes) .== class) - scatter!(plt, X_train[1, inds], X_train[2, inds]; - label = class, - marker=(8, 0.8, colours[i]), + ![](Separation2.svg) + + ```@example nn + plt = heatmap(xs, xs, (x, y) -> onecold(m([x; y]), 1:3)[1]; + color = colours, + opacity = 0.2, + axis = false, + ticks = false, + cbar = false, + legend = :topleft, ) -end -display(plt) -savefig("Separation3.svg") # hide -``` + for (i, class) in enumerate(classes) + inds = findall(onecold(y_train, classes) .== class) + scatter!(plt, X_train[1, inds], X_train[2, inds]; + label = class, + marker=(8, 0.8, colours[i]), + ) + end + display(plt) -![](Separation3.svg) + savefig("Separation3.svg") # hide + ``` -The separation on the testing set is quite good, but it could be better for the two bottommost green circles (iris virginica). The model predicted (in the background) the red colour (iris versicolor) there. This is wrong. The reason is clear from the picture depicting the training set. The classifier tried to perfectly fit the boundary between the green and red points, making an outward-pointing tip. This is precisely overfitting and the reason for the misclassification on the testing set. + ![](Separation3.svg) -```@raw html -
-``` + The separation on the testing set is quite good, but it could be better for the two bottommost green circles (iris virginica). The model predicted (in the background) the red colour (iris versicolor) there. This is wrong. The reason is clear from the picture depicting the training set. The classifier tried to perfectly fit the boundary between the green and red points, making an outward-pointing tip. This is precisely overfitting and the reason for the misclassification on the testing set. ![](Separation2.svg) ![](Separation3.svg) -```@raw html -
-
Exercise 4: Generalization
-
-``` +!!! warning "Exercise 4: Generalization" + The contour plots from Exercises 2 and 3 are strikingly different, especially in the top-left and bottom-right corners. Why is that? -The contour plots from Exercises 2 and 3 are strikingly different, especially in the top-left and bottom-right corners. Why is that? +!!! details "Solution:" + Since the dataset does not contain any data in the top-left or bottom-right corners, it does not know what to predict. From its perspective, both separations are very good. -```@raw html -
-
-Solution: -
-``` + !!! info "Generalization:" + If a classifier does not have any data in some region, it may predict anything there, including predictions with no sense. -Since the dataset does not contain any data in the top-left or bottom-right corners, it does not know what to predict. From its perspective, both separations are very good. +!!! warning "Exercise 5: Universal approximation of neural networks" + Proof the theorem about universal approximation of neural networks. -!!! info "Generalization:" - If a classifier does not have any data in some region, it may predict anything there, including predictions with no sense. +!!! details "Solution:" + Since piecewise linear functions are dense in the set of continuous functions, there is a piecewise linear function ``h`` such that ``\|h-g\|_{\infty}\le \varepsilon``. Assume that ``h`` has kinks at ``x_1<\dots
-``` + ```math + d_i = \frac{y_{i+1}-y_i}{x_{i+1}-x_i}, + ``` -```@raw html -
-
Exercise 5: Universal approximation of neural networks (theory)
-
-``` + then ``h`` has the form -Proof the theorem about universal approximation of neural networks. + ```math + h(x) = y_i + d_i(x-x_i) \qquad\text{ for }x\in [x_i,x_{i+1}]. + ``` -```@raw html -
-
-Solution: -
-``` + It is not difficult to show that -Since piecewise linear functions are dense in the set of continuous functions, there is a piecewise linear function ``h`` such that ``\|h-g\|_{\infty}\le \varepsilon``. Assume that ``h`` has kinks at ``x_1<\dots
-``` + Then ``h`` can be represented as the following network with two layers: + - Dense layer with ``n`` hidden neurons and ReLU activation function. Neuron ``i`` has weight ``1`` and bias ``-x_i``. + - Dense layer with ``1`` output neurons and identity activation function. Connection ``i`` has weight ``d_i-d_{i-1}`` and the joint bias is ``y_1``. + This finishes the proof. \ No newline at end of file diff --git a/docs/src/lecture_10/nn.md b/docs/src/lecture_10/nn.md index 00d891cdc..08ecf2b46 100644 --- a/docs/src/lecture_10/nn.md +++ b/docs/src/lecture_10/nn.md @@ -22,158 +22,113 @@ nothing # hide The first exercise splits the dataset into the training and testing sets. Recall that the training set is used to train the classifier, while its performance is evaluated on the testing set. Since the classifier does not see the testing set samples during training, the same performance on the training and testing sets indicates no overfitting. -```@raw html -
-
Exercise:
-
-``` - -Write the `split` function, which randomly splits the dataset and the labels into training and testing sets. Its input should be the dataset `X` and the labels `y`. It should have four outputs. Include 80% of data in the training set and 20% of data in the testing set by default. - -**Hints:** -- Use the `randperm` function from the `Random` package. -- While `y` can be assumed to a vector, `X` is a matrix or a more-dimensional array. Then it is beneficial to use the `selectdim` function to select subindices along the correct dimension. +!!! warning "Exercise:" + Write the `split` function, which randomly splits the dataset and the labels into training and testing sets. Its input should be the dataset `X` and the labels `y`. It should have four outputs. Include 80% of data in the training set and 20% of data in the testing set by default. -```@raw html -
-
-Solution: -
-``` - -The function `split` has two required arguments and two optional arguments. The first optional argument is the dimension `dims` along which the split is done. The second optional argument is the fraction of the training set. We first check whether the inputs have the same sizes along the correct dimension. Then we determine the number of samples `n_train` in the training set, create a random permutation `i_rand` and select the correct number of indices. Finally, we return the data and labels in the training and testing sets. + **Hints:** + - Use the `randperm` function from the `Random` package. + - While `y` can be assumed to a vector, `X` is a matrix or a more-dimensional array. Then it is beneficial to use the `selectdim` function to select subindices along the correct dimension. -```@example nn -using Random +!!! details "Solution:" + The function `split` has two required arguments and two optional arguments. The first optional argument is the dimension `dims` along which the split is done. The second optional argument is the fraction of the training set. We first check whether the inputs have the same sizes along the correct dimension. Then we determine the number of samples `n_train` in the training set, create a random permutation `i_rand` and select the correct number of indices. Finally, we return the data and labels in the training and testing sets. -function split(X, y::AbstractVector; dims=1, ratio_train=0.8, kwargs...) - n = length(y) - size(X, dims) == n || throw(DimensionMismatch("...")) + ```@example nn + using Random - n_train = round(Int, ratio_train*n) - i_rand = randperm(n) - i_train = i_rand[1:n_train] - i_test = i_rand[n_train+1:end] + function split(X, y::AbstractVector; dims=1, ratio_train=0.8, kwargs...) + n = length(y) + size(X, dims) == n || throw(DimensionMismatch("...")) - return selectdim(X, dims, i_train), y[i_train], selectdim(X, dims, i_test), y[i_test] -end + n_train = round(Int, ratio_train*n) + i_rand = randperm(n) + i_train = i_rand[1:n_train] + i_test = i_rand[n_train+1:end] -nothing # hide -``` + return selectdim(X, dims, i_train), y[i_train], selectdim(X, dims, i_test), y[i_test] + end -We can verify its functionality by calling this function. + nothing # hide + ``` -```@example nn -X_train, y_train, X_test, y_test = split(X, y) + We can verify its functionality by calling this function. -nothing # hide -``` + ```@example nn + X_train, y_train, X_test, y_test = split(X, y) -```@raw html -
-``` + nothing # hide + ``` The following exercise normalizes the data. In the previous lecture, we have already normalized the training set. We compute the normalizing constants (mean and standard deviation) for each feature and then apply them to the data. Since the normalization needs to be done before training, and since the testing set is not available during training, the normalizing constants can be computed only from the training set. This also means that the features on the training set have zero mean and unit variance, but features on the testing set may have different mean and variance. -```@raw html -
-
Exercise:
-
-``` - -Write the `normalize` functions as described above. It should have two inputs and two outputs. The keyword argument `dims` should also be included. - -**Hint**: check the help for the `mean` function. - -```@raw html -
-
-Solution: -
-``` +!!! warning "Exercise:" + Write the `normalize` functions as described above. It should have two inputs and two outputs. The keyword argument `dims` should also be included. -To compute the mean of `X` along dimension `dims`, we can check the help for the `mean` function to realize that the correct command is `mean(X; dims)`. This is equivalent to `mean(X; dims=dims)`. We do the same for the standard deviation. To normalize, we need to subtract the mean and divide by the standard deviation. Since `col_means` has the same number of dimensions as `X_train`, we can use `X_train .- col_mean` to broadcast `col_mean` along the dimension mean was computed. We need to use the same normalizing constant for the training and testing sets due to the reasons mentioned above. + **Hint**: check the help for the `mean` function. -```@example nn -using Statistics +!!! details "Solution:" + To compute the mean of `X` along dimension `dims`, we can check the help for the `mean` function to realize that the correct command is `mean(X; dims)`. This is equivalent to `mean(X; dims=dims)`. We do the same for the standard deviation. To normalize, we need to subtract the mean and divide by the standard deviation. Since `col_means` has the same number of dimensions as `X_train`, we can use `X_train .- col_mean` to broadcast `col_mean` along the dimension mean was computed. We need to use the same normalizing constant for the training and testing sets due to the reasons mentioned above. -function normalize(X_train, X_test; dims=1, kwargs...) - col_mean = mean(X_train; dims) - col_std = std(X_train; dims) + ```@example nn + using Statistics - return (X_train .- col_mean) ./ col_std, (X_test .- col_mean) ./ col_std -end + function normalize(X_train, X_test; dims=1, kwargs...) + col_mean = mean(X_train; dims) + col_std = std(X_train; dims) -nothing # hide -``` + return (X_train .- col_mean) ./ col_std, (X_test .- col_mean) ./ col_std + end -To obtain the normalized datasets, we run the `normalize` function. + nothing # hide + ``` -```@example nn -X_train, X_test = normalize(X_train, X_test) + To obtain the normalized datasets, we run the `normalize` function. -nothing # hide -``` + ```@example nn + X_train, X_test = normalize(X_train, X_test) -```@raw html -
-``` + nothing # hide + ``` The following exercise modifies the labels into a standard form for machine learning. -```@raw html -
-
Exercise:
-
-``` - -Write the `onehot` function that converts the labels `y` into their one-hot representation. The samples should be along the second dimension. Write the `onecold` function that converts the one-hot representation into the one-cold (original) representation. Both these functions need to have two arguments; the second one is `classes`, which equals `unique(y)`. +!!! warning "Exercise:" + Write the `onehot` function that converts the labels `y` into their one-hot representation. The samples should be along the second dimension. Write the `onecold` function that converts the one-hot representation into the one-cold (original) representation. Both these functions need to have two arguments; the second one is `classes`, which equals `unique(y)`. -Write a check that both functions work correctly. + Write a check that both functions work correctly. -**Hints:** -- The one-hot representation for a label has the size equalling to the number of classes. All entries besides one are zeros. -- Since the one-hot representation represents probabilities, the prediction is the class with the highest probability. + **Hints:** + - The one-hot representation for a label has the size equalling to the number of classes. All entries besides one are zeros. + - Since the one-hot representation represents probabilities, the prediction is the class with the highest probability. -```@raw html -
-
-Solution: -
-``` - -The `onehot` function first creates an array `y_onehot`, where the first dimension is the number of classes, and the second dimension the number of samples. Since all but one entries of each column will be zeros, we initialize it by zeros. Then we run a for loop to fill one into each column. We perform the for loop over all classes, but it is also possible to perform it over all columns. +!!! details "Solution:" + The `onehot` function first creates an array `y_onehot`, where the first dimension is the number of classes, and the second dimension the number of samples. Since all but one entries of each column will be zeros, we initialize it by zeros. Then we run a for loop to fill one into each column. We perform the for loop over all classes, but it is also possible to perform it over all columns. -```@example nn -function onehot(y, classes) - y_onehot = falses(length(classes), length(y)) - for (i, class) in enumerate(classes) - y_onehot[i, y .== class] .= 1 + ```@example nn + function onehot(y, classes) + y_onehot = falses(length(classes), length(y)) + for (i, class) in enumerate(classes) + y_onehot[i, y .== class] .= 1 + end + return y_onehot end - return y_onehot -end -nothing # hide -``` + nothing # hide + ``` -The `onecold` function finds the index of its maximum value. We repeat this for every column `y_col`. + The `onecold` function finds the index of its maximum value. We repeat this for every column `y_col`. -```@example nn -onecold(y, classes) = [classes[argmax(y_col)] for y_col in eachcol(y)] - -nothing # hide -``` + ```@example nn + onecold(y, classes) = [classes[argmax(y_col)] for y_col in eachcol(y)] -Functions `onehot` and `onecold` should be inverse to each other. That means that if we call them in succession, we obtain the original input. + nothing # hide + ``` -```@example nn -classes = unique(y) + Functions `onehot` and `onecold` should be inverse to each other. That means that if we call them in succession, we obtain the original input. -isequal(onecold(onehot(y, classes), classes), y) -``` + ```@example nn + classes = unique(y) -```@raw html -
-``` + isequal(onecold(onehot(y, classes), classes), y) + ``` Preparing the data is spread over many lines. It is better to combine them into the function `prepare_data`. @@ -245,34 +200,19 @@ end We will start with initializing the weights stored in the `SimpleNet` structure. -```@raw html -
-
Exercise:
-
-``` - -Write an outer constructor for `SimpleNet`. Its inputs should be three integers representing the input size of the three layers. All matrices should be initialized based on the normal distribution. - -**Hint**: think about the representation of the dense layer. - -```@raw html -
-
-Solution: -
-``` +!!! warning "Exercise:" + Write an outer constructor for `SimpleNet`. Its inputs should be three integers representing the input size of the three layers. All matrices should be initialized based on the normal distribution. -Since a dense layer computes ``Wx+b``, the size of ``W`` should be the layer output size times the layer input size. The bias ``b`` should be of the size of the layer output. + **Hint**: think about the representation of the dense layer. -```@example nn -SimpleNet(n1, n2, n3) = SimpleNet(randn(n2, n1), randn(n2), randn(n3, n2), randn(n3)) +!!! details "Solution:" + Since a dense layer computes ``Wx+b``, the size of ``W`` should be the layer output size times the layer input size. The bias ``b`` should be of the size of the layer output. -nothing # hide -``` + ```@example nn + SimpleNet(n1, n2, n3) = SimpleNet(randn(n2, n1), randn(n2), randn(n3, n2), randn(n3)) -```@raw html -
-``` + nothing # hide + ``` Out neural network will have five hidden neurons. Therefore, we need to initialize it with the following code. @@ -286,39 +226,24 @@ nothing # hide The following exercise computes the network prediction for samples. For a calling simplicity, we will write it as a functor. -```@raw html -
-
Exercise:
-
-``` - -Write a functor `function (m::SimpleNet)(x)` which computes the prediction (forward pass) of the neural network `SimpleNet`. - -**Bonus**: try to make the functor work for both vectors (one sample) and matrices (multiple samples) `x`. - -```@raw html -
-
-Solution: -
-``` +!!! warning "Exercise:" + Write a functor `function (m::SimpleNet)(x)` which computes the prediction (forward pass) of the neural network `SimpleNet`. -The dense layer is a linear function `z1 = W1*x .+ b1` followed by an activation function. If we assume that `x` is a vector, then `+` would work the same as `.+` because both `W1*x` and `b` are of the same dimension. However, if we want `x` to be a matrix (each column corresponds to one sample), we need to write `.+` because `W1*x` is a matrix and the vector `b` needs to be broadcasted to be of the same size. The activation function is the ReLU function which needs to be applied componentwise. The procedure for the second layer is the same, but we need to finish it with the softmax function. If `x` is a matrix, then `z2` is a matrix, and we specify that we want to normalize along the first dimension. If we assume only vector inputs, then specifying the dimension is not necessary. + **Bonus**: try to make the functor work for both vectors (one sample) and matrices (multiple samples) `x`. -```@example nn -function (m::SimpleNet)(x) - z1 = m.W1*x .+ m.b1 - a1 = max.(z1, 0) - z2 = m.W2*a1 .+ m.b2 - return exp.(z2) ./ sum(exp.(z2), dims=1) -end +!!! details "Solution:" + The dense layer is a linear function `z1 = W1*x .+ b1` followed by an activation function. If we assume that `x` is a vector, then `+` would work the same as `.+` because both `W1*x` and `b` are of the same dimension. However, if we want `x` to be a matrix (each column corresponds to one sample), we need to write `.+` because `W1*x` is a matrix and the vector `b` needs to be broadcasted to be of the same size. The activation function is the ReLU function which needs to be applied componentwise. The procedure for the second layer is the same, but we need to finish it with the softmax function. If `x` is a matrix, then `z2` is a matrix, and we specify that we want to normalize along the first dimension. If we assume only vector inputs, then specifying the dimension is not necessary. -nothing # hide -``` + ```@example nn + function (m::SimpleNet)(x) + z1 = m.W1*x .+ m.b1 + a1 = max.(z1, 0) + z2 = m.W2*a1 .+ m.b2 + return exp.(z2) ./ sum(exp.(z2), dims=1) + end -```@raw html -
-``` + nothing # hide + ``` It is simple now to evaluate the first two samples one the training set. @@ -366,83 +291,59 @@ The function returns the function value `l` and derivatives with respect to all !!! info "That's it? I thought neural networks are magic..." Well, for a network with two layers and a loss, we can compute the function value and its derivative in only 16 lines of code. -```@raw html -
-
Simple implementation:
-
-``` - -The previous function `grad` can compute the gradient for only one sample. Since the objective in training a neural network is a mean over all samples, this mean needs to be included externally. This is NOT the correct way of writing function. However, we decided to present it in the current way to keep the presentation (relatively) simple. When such a simplification is included in the code, we should include a check such as `x::AbstractVector` to prevent unexpected errors. +!!! info "Simple implementation:" + The previous function `grad` can compute the gradient for only one sample. Since the objective in training a neural network is a mean over all samples, this mean needs to be included externally. This is NOT the correct way of writing function. However, we decided to present it in the current way to keep the presentation (relatively) simple. When such a simplification is included in the code, we should include a check such as `x::AbstractVector` to prevent unexpected errors. -When we compute gradients of multiple samples, we obtain an array. Each element is a tuple with five elements from the `grad` function. + When we compute gradients of multiple samples, we obtain an array. Each element is a tuple with five elements from the `grad` function. -```@example nn -g_all = [grad(m, X_train[:,k], y_train[:,k]) for k in 1:size(X_train,2)] - -typeof(g_all) -``` + ```@example nn + g_all = [grad(m, X_train[:,k], y_train[:,k]) for k in 1:size(X_train,2)] -To compute the mean over all samples, we need to use the following obscure function. + typeof(g_all) + ``` -```@example nn -mean_tuple(d::AbstractArray{<:Tuple}) = Tuple([mean([d[k][i] for k in 1:length(d)]) for i in 1:length(d[1])]) + To compute the mean over all samples, we need to use the following obscure function. -nothing # hide -``` + ```@example nn + mean_tuple(d::AbstractArray{<:Tuple}) = Tuple([mean([d[k][i] for k in 1:length(d)]) for i in 1:length(d[1])]) -We see that it produces an averaged output of the `grad` function, where the average is taken with respect to all its inputs. + nothing # hide + ``` -```@example nn -g_mean = mean_tuple(g_all) + We see that it produces an averaged output of the `grad` function, where the average is taken with respect to all its inputs. -typeof(g_mean) -``` + ```@example nn + g_mean = mean_tuple(g_all) -```@raw html -
-``` + typeof(g_mean) + ``` Having the gradient at hand, we can finally train the network. -```@raw html -
-
Exercise:
-
-``` - -Train the network with a gradient descent with stepsize ``\alpha=0.1`` for ``200`` iterations. Save the objective value at each iteration and plot the results. +!!! warning "Exercise:" + Train the network with a gradient descent with stepsize ``\alpha=0.1`` for ``200`` iterations. Save the objective value at each iteration and plot the results. -```@raw html -
-
-Solution: -
-``` +!!! details "Solution:" + Now the process is simple. We compute the gradient `grad_all`, then its mean `grad_mean` via the already written function `mean_tuple`. The first value of the tuple `grad_mean` is the objective; the remaining are the gradients. Thus, we save the first value to an array and use the remaining one to update the weights. -Now the process is simple. We compute the gradient `grad_all`, then its mean `grad_mean` via the already written function `mean_tuple`. The first value of the tuple `grad_mean` is the objective; the remaining are the gradients. Thus, we save the first value to an array and use the remaining one to update the weights. + ```@example nn + α = 1e-1 + max_iter = 200 + L = zeros(max_iter) + for iter in 1:max_iter + grad_all = [grad(m, X_train[:,k], y_train[:,k]) for k in 1:size(X_train,2)] + grad_mean = mean_tuple(grad_all) -```@example nn -α = 1e-1 -max_iter = 200 -L = zeros(max_iter) -for iter in 1:max_iter - grad_all = [grad(m, X_train[:,k], y_train[:,k]) for k in 1:size(X_train,2)] - grad_mean = mean_tuple(grad_all) - - L[iter] = grad_mean[1] - - m.W1 .-= α*grad_mean[2] - m.b1 .-= α*grad_mean[3] - m.W2 .-= α*grad_mean[4] - m.b2 .-= α*grad_mean[5] -end + L[iter] = grad_mean[1] -nothing # hide -``` + m.W1 .-= α*grad_mean[2] + m.b1 .-= α*grad_mean[3] + m.W2 .-= α*grad_mean[4] + m.b2 .-= α*grad_mean[5] + end -```@raw html -
-``` + nothing # hide + ``` ```@example nn using Plots # hide @@ -457,36 +358,21 @@ nothing # hide We have trained our first network. We saw that the loss function keeps decreasing, which indicates a good training procedure. Now we will evaluate the performance. -```@raw html -
-
Exercise:
-
-``` - -Write a function which predict the labels for samples. Show the accuracy on both training and testing sets. - -```@raw html -
-
-Solution: -
-``` - -The predicted probabilities are obtained by using the model `m`. The prediction (highest predicted probability) is obtained by converting the one-hot into the one-cold representation. Finally, the accuracy computes in how many cases the prediction equals to the label. +!!! warning "Exercise:" + Write a function which predict the labels for samples. Show the accuracy on both training and testing sets. -```@example nn -predict(X) = m(X) -accuracy(X, y) = mean(onecold(predict(X), classes) .== onecold(y, classes)) +!!! details "Solution:" + The predicted probabilities are obtained by using the model `m`. The prediction (highest predicted probability) is obtained by converting the one-hot into the one-cold representation. Finally, the accuracy computes in how many cases the prediction equals to the label. -println("Train accuracy = ", accuracy(X_train, y_train)) -println("Test accuracy = ", accuracy(X_test, y_test)) + ```@example nn + predict(X) = m(X) + accuracy(X, y) = mean(onecold(predict(X), classes) .== onecold(y, classes)) -nothing # hide -``` + println("Train accuracy = ", accuracy(X_train, y_train)) + println("Test accuracy = ", accuracy(X_test, y_test)) -```@raw html -
-``` + nothing # hide + ``` The correct answer is diff --git a/docs/src/lecture_10/theory.md b/docs/src/lecture_10/theory.md index 07e106604..f03961f3a 100644 --- a/docs/src/lecture_10/theory.md +++ b/docs/src/lecture_10/theory.md @@ -187,7 +187,7 @@ How should the classifier be evaluated? The figure above suggests that it is a b The following result shows that even shallow neural networks (not many layers) can approximate any continuous function well. As the proof suggests (Exercise 5), the price to pay is that the network needs to be extremely wide (lots of hidden neurons). -!!! bonus "BONUS: Universal approximation of neural networks" +!!! compat "BONUS: Universal approximation of neural networks" Let ``g:[a,b]\to \mathbb{R}`` be a continuous function defined on an interval. Then for every ``\varepsilon>0``, there is a neural network ``f`` such that ``\|f-g\|_{\infty}\le \varepsilon``. Moreover, this network can be chosen as a chain of the following two layers: - Dense layer with the ReLU activation function. - Dense layer with the identity activation function. @@ -196,7 +196,7 @@ A prerequisite for training neural networks is the efficient computation of deri This computation is highly efficient because the forward pass (computing function value) and the backward pass (computing derivatives) have the same complexity. This is in sharp contrast with the finite difference method, where the computation of derivatives is much more expensive. -!!! bonus "BONUS: Computation of gradients" +!!! compat "BONUS: Computation of gradients" For simplicity, we denote ``f = \operatorname{predict}`` and consider ```math L(w) := \sum_{i=1}^n \operatorname{loss}(y_i, f(w;x_i)). diff --git a/docs/src/lecture_11/exercises.md b/docs/src/lecture_11/exercises.md index 4f66c3ed3..a56ac7ec7 100644 --- a/docs/src/lecture_11/exercises.md +++ b/docs/src/lecture_11/exercises.md @@ -50,150 +50,120 @@ X_train, y_train, X_test, y_test = load_data(MLDatasets.MNIST; T=T, onehot=true) The first two exercises handle training neural networks on GPUs instead of CPUs. Even though this is extremely important for reducing the training time, we postponed it to the exercises because some course participants may not have a compatible GPU for training. If anyone is not able to do these two exercises, we apologize. -```@raw html -
-
Exercise 1: Operations on GPUs
-
-``` - -While most computer operations are performed on CPUs (central processing unit), neural networks are trained on other hardware such as GPUs (graphics processing unit) or specialized hardware such as TPUs. - -To use GPUs, include packages Flux and CUDA. Then generate a random matrix ``A\in \mathbb{R}^{100\times 100}`` and a random vector ``b\in \mathbb{R}^{100}``. They will be stored in the memory (RAM), and the computation will be performed on CPU. To move them to the GPU memory and allow computations on GPU, use ```gpu(A)``` or the more commonly used ```A |> gpu```. +!!! warning "Exercise 1: Operations on GPUs" + While most computer operations are performed on CPUs (central processing unit), neural networks are trained on other hardware such as GPUs (graphics processing unit) or specialized hardware such as TPUs. -Investigate how long it takes to perform multiplication ``Ab`` if both objects are on CPU, GPU or if they are saved differently. Check that both multiplications resulted in the same vector. + To use GPUs, include packages Flux and CUDA. Then generate a random matrix ``A\in \mathbb{R}^{100\times 100}`` and a random vector ``b\in \mathbb{R}^{100}``. They will be stored in the memory (RAM), and the computation will be performed on CPU. To move them to the GPU memory and allow computations on GPU, use ```gpu(A)``` or the more commonly used ```A |> gpu```. -```@raw html -
-
-Solution: -
-``` - -The beginning is simple + Investigate how long it takes to perform multiplication ``Ab`` if both objects are on CPU, GPU or if they are saved differently. Check that both multiplications resulted in the same vector. -```julia -using Flux -using CUDA +!!! details "Solution:" + The beginning is simple -A = randn(100,100) -b = randn(100) -A_g = A |> gpu -b_g = b |> gpu -``` + ```julia + using Flux + using CUDA -To test the time, we measure the time for multiplication + A = randn(100,100) + b = randn(100) + A_g = A |> gpu + b_g = b |> gpu + ``` -```julia -julia> @time A*b; -0.069785 seconds (294.76 k allocations: 15.585 MiB, 14.75% gc time) + To test the time, we measure the time for multiplication -julia> @time A_g*b_g; -0.806913 seconds (419.70 k allocations: 22.046 MiB) + ```julia + julia> @time A*b; + 0.069785 seconds (294.76 k allocations: 15.585 MiB, 14.75% gc time) -julia> @time A_g*b; -0.709140 seconds (720.01 k allocations: 34.860 MiB, 1.53% gc time) -``` + julia> @time A_g*b_g; + 0.806913 seconds (419.70 k allocations: 22.046 MiB) -We see that all three times are different. Can we infer anything from it? No! The problem is that during the first call to a function, some compilation usually takes place. We should always compare only the second time. + julia> @time A_g*b; + 0.709140 seconds (720.01 k allocations: 34.860 MiB, 1.53% gc time) + ``` -```julia -julia> @time A*b; -0.000083 seconds (1 allocation: 896 bytes) + We see that all three times are different. Can we infer anything from it? No! The problem is that during the first call to a function, some compilation usually takes place. We should always compare only the second time. -julia> @time A_g*b_g; -0.000154 seconds (11 allocations: 272 bytes) + ```julia + julia> @time A*b; + 0.000083 seconds (1 allocation: 896 bytes) -julia> @time A_g*b; -0.475280 seconds (10.20 k allocations: 957.125 KiB) -``` + julia> @time A_g*b_g; + 0.000154 seconds (11 allocations: 272 bytes) -We conclude that while the computation on CPU and GPU takes approximately the same time, it takes much longer when using the mixed types. + julia> @time A_g*b; + 0.475280 seconds (10.20 k allocations: 957.125 KiB) + ``` -To compare the results, the first idea would be to run + We conclude that while the computation on CPU and GPU takes approximately the same time, it takes much longer when using the mixed types. -```julia -norm(A*b - A_g*b_g) -``` + To compare the results, the first idea would be to run -which would result in an error. We cannot use any operations on arrays stored both on CPU and GPU. The correct way is to move the GPU array to CPU and only then to compute the norm + ```julia + norm(A*b - A_g*b_g) + ``` -```julia -julia> using LinearAlgebra + which would result in an error. We cannot use any operations on arrays stored both on CPU and GPU. The correct way is to move the GPU array to CPU and only then to compute the norm -julia> norm(A*b - cpu(A_g*b_g)) -1.2004562847861718e-5 -``` -The norm is surprisingly large. Checking the types + ```julia + julia> using LinearAlgebra -```julia -julia> (typeof(A), typeof(A_g)) -(Matrix{Float64}, CUDA.CuMatrix{Float32}) -``` + julia> norm(A*b - cpu(A_g*b_g)) + 1.2004562847861718e-5 + ``` + The norm is surprisingly large. Checking the types -we realize that one of the arrays is stored in ```Float64``` while the second one in ```Float32```. Due to the different number of saved digits, the multiplication results in this error. + ```julia + julia> (typeof(A), typeof(A_g)) + (Matrix{Float64}, CUDA.CuMatrix{Float32}) + ``` -```@raw html -
-``` + we realize that one of the arrays is stored in ```Float64``` while the second one in ```Float32```. Due to the different number of saved digits, the multiplication results in this error. The previous exercise did not show any differences when performing a matrix-vector multiplication. The probable reason was that the running times were too short. The following exercise shows the time difference when applied to a larger problem. -```@raw html -
-
Exercise:
-
-``` +!!! warning "Exercise 2:" + Load the MNIST dataset and the model saved in ```data/mnist.bson```. Compare the evaluation of all samples from the testing set when done on CPU and GPU. For the latter, you need to convert the model to GPU. -Load the MNIST dataset and the model saved in ```data/mnist.bson```. Compare the evaluation of all samples from the testing set when done on CPU and GPU. For the latter, you need to convert the model to GPU. +!!! details "Solution:" + We load the data, model and convert everything to GPU -```@raw html -
-
-Solution: -
-``` - -We load the data, model and convert everything to GPU - -```julia -using CUDA - -m = Chain( - Conv((2,2), 1=>16, relu), - MaxPool((2,2)), - Conv((2,2), 16=>8, relu), - MaxPool((2,2)), - flatten, - Dense(288, size(y_train,1)), - softmax, -) - -file_name = joinpath("data", "mnist.bson") -train_or_load!(file_name, m) + ```julia + using CUDA + + m = Chain( + Conv((2,2), 1=>16, relu), + MaxPool((2,2)), + Conv((2,2), 16=>8, relu), + MaxPool((2,2)), + flatten, + Dense(288, size(y_train,1)), + softmax, + ) -m_g = m |> gpu -X_test_g = X_test |> gpu -``` + file_name = joinpath("data", "mnist.bson") + train_or_load!(file_name, m) -Now we can measure the evaluation time. Remember that we need to compile all the functions by evaluating at least one sample before doing so. + m_g = m |> gpu + X_test_g = X_test |> gpu + ``` -```julia -m(X_test[:,:,:,1:1]) -m_g(X_test_g[:,:,:,1:1]) -``` + Now we can measure the evaluation time. Remember that we need to compile all the functions by evaluating at least one sample before doing so. -```julia -julia> @time m(X_test); -1.190033 seconds (40.24 k allocations: 1.069 GiB, 21.73% gc time) + ```julia + m(X_test[:,:,:,1:1]) + m_g(X_test_g[:,:,:,1:1]) + ``` -julia> @time m_g(X_test_g); -0.071805 seconds (789 allocations: 27.641 KiB) -``` -Using GPU speeded the computation by more than ten times. + ```julia + julia> @time m(X_test); + 1.190033 seconds (40.24 k allocations: 1.069 GiB, 21.73% gc time) -```@raw html -
-``` + julia> @time m_g(X_test_g); + 0.071805 seconds (789 allocations: 27.641 KiB) + ``` + Using GPU speeded the computation by more than ten times. !!! info "Computation on GPU:" Using GPUs speeds up the training of neural networks in orders of magnitude. However, one needs to be aware of some pitfalls. @@ -210,68 +180,53 @@ Using GPU speeded the computation by more than ten times. Exercises which do not require GPUs start here. -```@raw html -
-
Exercise 3:
-
-``` - -Load the network from ```data/mnist.bson```. Then create a ``10\times 10`` table, where the ``(i+1,j+1)`` entry is the number of samples, where digit ``i`` was misclassified as digit ``j``. This matrix is called the [confusion matrix](https://en.wikipedia.org/wiki/Confusion_matrix). - -Convert the confusion matrix into a dataframe and add labels. +!!! warning "Exercise 3:" + Load the network from ```data/mnist.bson```. Then create a ``10\times 10`` table, where the ``(i+1,j+1)`` entry is the number of samples, where digit ``i`` was misclassified as digit ``j``. This matrix is called the [confusion matrix](https://en.wikipedia.org/wiki/Confusion_matrix). -```@raw html -
-
-Solution: -
-``` + Convert the confusion matrix into a dataframe and add labels. -First, we load the data as many times before +!!! details "Solution:" + First, we load the data as many times before -```@example gpuu -m = Chain( - Conv((2,2), 1=>16, relu), - MaxPool((2,2)), - Conv((2,2), 16=>8, relu), - MaxPool((2,2)), - flatten, - Dense(288, size(y_train,1)), - softmax, -) - -file_name = joinpath("data", "mnist.bson") -train_or_load!(file_name, m) -``` + ```@example gpuu + m = Chain( + Conv((2,2), 1=>16, relu), + MaxPool((2,2)), + Conv((2,2), 16=>8, relu), + MaxPool((2,2)), + flatten, + Dense(288, size(y_train,1)), + softmax, + ) -When creating a table, we specify that its entries are ```Int```. We save the predictions ```y_hat``` and labels ```y```. Since we do not use the second argument to ```onecold```, the entries of ```y_hat``` and ```y``` are between 1 and 10. Then we run a for loop over all misclassified samples and add to the error counts. + file_name = joinpath("data", "mnist.bson") + train_or_load!(file_name, m) + ``` -```@example gpuu -y_hat = onecold(m(X_test)) -y = onecold(y_test) + When creating a table, we specify that its entries are ```Int```. We save the predictions ```y_hat``` and labels ```y```. Since we do not use the second argument to ```onecold```, the entries of ```y_hat``` and ```y``` are between 1 and 10. Then we run a for loop over all misclassified samples and add to the error counts. -errors = zeros(Int, 10, 10) -for i in findall(y_hat .!= y) - errors[y[i], y_hat[i]] += 1 -end -``` + ```@example gpuu + y_hat = onecold(m(X_test)) + y = onecold(y_test) -To create the dataframe, we use ```df = DataFrame(errors)```. It prints correctly integers and not strings. We change labels x1 to miss0, ... Similarly, we add the labels as the first column. + errors = zeros(Int, 10, 10) + for i in findall(y_hat .!= y) + errors[y[i], y_hat[i]] += 1 + end + ``` -```@example gpuu -using DataFrames + To create the dataframe, we use ```df = DataFrame(errors)```. It prints correctly integers and not strings. We change labels x1 to miss0, ... Similarly, we add the labels as the first column. -df = DataFrame(errors, :auto) + ```@example gpuu + using DataFrames -rename!(df, [Symbol("miss$(i)") for i in 0:9]) -insertcols!(df, 1, :label => string.(0:9)) + df = DataFrame(errors, :auto) -nothing # hide -``` + rename!(df, [Symbol("miss$(i)") for i in 0:9]) + insertcols!(df, 1, :label => string.(0:9)) -```@raw html -
-``` + nothing # hide + ``` ```@example gpuu df # hide @@ -279,38 +234,23 @@ df # hide Surprisingly, the largest number of misclassifications is 9 into 7. One would expect 8 to 0, 5 to 6 or 8 to 9. We investigate this in the next exercise. -```@raw html -
-
Exercise 4:
-
-``` - -Plot all images which are ``9`` but were classified as ``7``. - -```@raw html -
-
-Solution: -
-``` - -To plot all these misclassified images, we find their indices and use the function `imageplot`. Since `y` are stored in the 1:10 format, we need to specify `classes`. +!!! warning "Exercise 4:" + Plot all images which are ``9`` but were classified as ``7``. -```julia -using ImageInspector +!!! details "Solution:" + To plot all these misclassified images, we find their indices and use the function `imageplot`. Since `y` are stored in the 1:10 format, we need to specify `classes`. -classes = 0:9 + ```julia + using ImageInspector -targets = onecold(y_test, classes) -predicts = onecold(m(X_test), classes) + classes = 0:9 -imageplot(1 .- X_test, findall((targets .== 9) .& (predicts .== 7)); nrows=3) -savefig("miss.svg") # hide -``` + targets = onecold(y_test, classes) + predicts = onecold(m(X_test), classes) -```@raw html -
-``` + imageplot(1 .- X_test, findall((targets .== 9) .& (predicts .== 7)); nrows=3) + savefig("miss.svg") # hide + ``` ![](miss.svg) @@ -318,80 +258,65 @@ We see that some of the nines could be recognized as a seven even by humans. The following exercise depicts how images propagate through the network. -```@raw html -
-
Exercise 5: Visualization of neural networks 1
-
-``` - -We know that the output of the convolutional layers has the same number of dimensions as the inputs. If the activation function is the sigmoid, the output values stay within ``[0,1]`` and can also be interpreted as images. Use the same network as before but replace ReLU by sigmoid activation functions. Load the model from ```data/mnist_sigmoid.bson``` (you can check that the model accuracy is 0.9831). - -For all digits, select the first five samples from the training set of this digit. Then create ``5\times 5`` graph (there will be 10 of them for each digit), where each column corresponds to one sample. The rows should be: -- The original image. -- The first channel of the layer after the first pooling layer. -- The last channel of the layer after the first pooling layer. -- The first channel of the layer after the second pooling layer. -- The last channel of the layer after the second pooling layer. -Discuss the images. - -```@raw html -
-
-Solution: -
-``` - -To create the network and to load the data, we use +!!! warning "Exercise 5: Visualization of neural networks 1" + We know that the output of the convolutional layers has the same number of dimensions as the inputs. If the activation function is the sigmoid, the output values stay within ``[0,1]`` and can also be interpreted as images. Use the same network as before but replace ReLU by sigmoid activation functions. Load the model from ```data/mnist_sigmoid.bson``` (you can check that the model accuracy is 0.9831). + + For all digits, select the first five samples from the training set of this digit. Then create ``5\times 5`` graph (there will be 10 of them for each digit), where each column corresponds to one sample. The rows should be: + - The original image. + - The first channel of the layer after the first pooling layer. + - The last channel of the layer after the first pooling layer. + - The first channel of the layer after the second pooling layer. + - The last channel of the layer after the second pooling layer. + Discuss the images. + +!!! details "Solution:" + To create the network and to load the data, we use + + ```@example gpuu + m = Chain( + Conv((2,2), 1=>16, sigmoid), + MaxPool((2,2)), + Conv((2,2), 16=>8, sigmoid), + MaxPool((2,2)), + flatten, + Dense(288, size(y_train,1)), + softmax, + ) -```@example gpuu -m = Chain( - Conv((2,2), 1=>16, sigmoid), - MaxPool((2,2)), - Conv((2,2), 16=>8, sigmoid), - MaxPool((2,2)), - flatten, - Dense(288, size(y_train,1)), - softmax, -) - -file_name = joinpath("data", "mnist_sigmoid.bson") -train_or_load!(file_name, m) -``` + file_name = joinpath("data", "mnist_sigmoid.bson") + train_or_load!(file_name, m) + ``` -Before plotting, we perform a for loop over the digits. Then ```onecold(y_train, classes) .== i``` creates a ```BitArray``` with ones if the condition is satisfied, and zeros if the condition is not satisfied. Then ```findall(???)``` selects all ones, and ```???[1:5]``` finds the first five indices. Since we need to plot the original image, and the images after the second and fourth layer (there is always a convolutional layer before the pooling layer), we save these values into ```z1```, ```z2``` and ```z3```. Then we need to access to desired channels and plot then via the `ImageInspector` package. - -```julia -using ImageInspector - -classes = 0:9 -plts = [] -for i in classes - jj = 1:5 - ii = findall(onecold(y_train, classes) .== i)[jj] - - z1 = X_train[:,:,:,ii] - z2 = m[1:2](X_train[:,:,:,ii]) - z3 = m[1:4](X_train[:,:,:,ii]) - - kwargs = (nrows = 1, size = (600, 140)) - plot( - imageplot(1 .- z1[:, :, 1, :], jj; kwargs...), - imageplot(1 .- z2[:, :, 1, :], jj; kwargs...), - imageplot(1 .- z2[:, :, end, :], jj; kwargs...), - imageplot(1 .- z3[:, :, 1, :], jj; kwargs...), - imageplot(1 .- z3[:, :, end, :], jj; kwargs...); - layout = (5,1), - size=(700,800) - ) - savefig("Layers_$(i).svg") -end -``` + Before plotting, we perform a for loop over the digits. Then ```onecold(y_train, classes) .== i``` creates a ```BitArray``` with ones if the condition is satisfied, and zeros if the condition is not satisfied. Then ```findall(???)``` selects all ones, and ```???[1:5]``` finds the first five indices. Since we need to plot the original image, and the images after the second and fourth layer (there is always a convolutional layer before the pooling layer), we save these values into ```z1```, ```z2``` and ```z3```. Then we need to access to desired channels and plot then via the `ImageInspector` package. -We plot and comment on three selected digits below. + ```julia + using ImageInspector + + classes = 0:9 + plts = [] + for i in classes + jj = 1:5 + ii = findall(onecold(y_train, classes) .== i)[jj] + + z1 = X_train[:,:,:,ii] + z2 = m[1:2](X_train[:,:,:,ii]) + z3 = m[1:4](X_train[:,:,:,ii]) + + kwargs = (nrows = 1, size = (600, 140)) + plot( + imageplot(1 .- z1[:, :, 1, :], jj; kwargs...), + imageplot(1 .- z2[:, :, 1, :], jj; kwargs...), + imageplot(1 .- z2[:, :, end, :], jj; kwargs...), + imageplot(1 .- z3[:, :, 1, :], jj; kwargs...), + imageplot(1 .- z3[:, :, end, :], jj; kwargs...); + layout = (5,1), + size=(700,800) + ) + savefig("Layers_$(i).svg") + end + ``` -```@raw html -
-``` + We plot and comment on three selected digits below. Digit 0 diff --git a/docs/src/lecture_11/nn.md b/docs/src/lecture_11/nn.md index 72f281820..4aad91172 100644 --- a/docs/src/lecture_11/nn.md +++ b/docs/src/lecture_11/nn.md @@ -52,107 +52,70 @@ nothing # hide The first two exercises visualize the data and transform it into the correct input shape required by Flux. -```@raw html -
-
Exercise:
-
-``` -Plot the first 15 images of the digit 0 from the training set. +!!! warning "Exercise:" + Plot the first 15 images of the digit 0 from the training set. -**Hint**: The `ImageInspector` package written earlier provides the function `imageplot(X_train, inds; nrows=3)`, where `inds` are the desired indices. + **Hint**: The `ImageInspector` package written earlier provides the function `imageplot(X_train, inds; nrows=3)`, where `inds` are the desired indices. -**Hint**: To find the correct indices, use the function `findall`. -```@raw html -
-
-Solution: -
-``` + **Hint**: To find the correct indices, use the function `findall`. -The unique elements in `y_train` show that it represents the digits. +!!! details "Solution:" + The unique elements in `y_train` show that it represents the digits. -```@example nn -unique(y_train) -``` + ```@example nn + unique(y_train) + ``` -Then we use the `findall` function to find the indices of the first 15 images of the digit zero. + Then we use the `findall` function to find the indices of the first 15 images of the digit zero. -```@example nn -inds = findall(y_train .== 0)[1:15] + ```@example nn + inds = findall(y_train .== 0)[1:15] -nothing # hide -``` + nothing # hide + ``` -We use the `imageplot` function to plot the images. To invert the colours, we need to call it with `1 .- X_train` instead of `X_train`. + We use the `imageplot` function to plot the images. To invert the colours, we need to call it with `1 .- X_train` instead of `X_train`. -```julia -using Plots -using ImageInspector + ```julia + using Plots + using ImageInspector -imageplot(1 .- X_train, inds; nrows=3, size=(800,480)) -``` - -```@raw html -
-``` + imageplot(1 .- X_train, inds; nrows=3, size=(800,480)) + ``` ![](mnist_intro2.svg) +!!! warning "Exercise:" + Write function `reshape_data`, which reshapes `X_train` and `X_test` into the correct size required by Flux. + **Hint**: The function should work only on inputs with the correct size. This can be achieved by specifying the correct input type `X::AbstractArray{<:Real, 3}`. +!!! details "Solution:" + As we have never worked with MLDatasets, we do not know in which format the loading function returns the data. + ```@example nn + typeof(X_train) + ``` + The variable `X_train` stores a three-dimensional array of images. + ```@example nn + size(X_train) + ``` + Its size shows that the first two dimensions are the number of pixels and the last dimension are the samples. Since the images are grayscale, the dimension representing channels is missing. We need to add it. -```@raw html -
-
Exercise:
-
-``` -Write function `reshape_data`, which reshapes `X_train` and `X_test` into the correct size required by Flux. - -**Hint**: The function should work only on inputs with the correct size. This can be achieved by specifying the correct input type `X::AbstractArray{<:Real, 3}`. -```@raw html -
-
-Solution: -
-``` - -As we have never worked with MLDatasets, we do not know in which format the loading function returns the data. - -```@example nn -typeof(X_train) -``` - -The variable `X_train` stores a three-dimensional array of images. - -```@example nn -size(X_train) -``` - -Its size shows that the first two dimensions are the number of pixels and the last dimension are the samples. Since the images are grayscale, the dimension representing channels is missing. We need to add it. - -```@example nn -function reshape_data(X::AbstractArray{<:Real, 3}) - s = size(X) - return reshape(X, s[1], s[2], 1, s[3]) -end - -nothing # hide -``` - -We specify that the input array has three dimensions via `X::AbstractArray{T, 3}`. This may prevent surprises when called with different input size. - -```@raw html -
-``` - - + ```@example nn + function reshape_data(X::AbstractArray{<:Real, 3}) + s = size(X) + return reshape(X, s[1], s[2], 1, s[3]) + end + nothing # hide + ``` + We specify that the input array has three dimensions via `X::AbstractArray{T, 3}`. This may prevent surprises when called with different input size. We write now the function `load_data`, which loads the data and transform it into the correct shape. The keyword argument `onehot` specifies whether the labels should be converted into their one-hot representation. The `dataset` keyword specifies which dataset to load. It can be any dataset from the MLDatasets package, or we can even use datasets outside of this package provided that we define the `traindata` and `testdata` functions for it. @@ -186,72 +149,44 @@ X_train, y_train, X_test, y_test = load_data(MLDatasets.MNIST; T=T, onehot=true) nothing # hide ``` - - - - - - - The previous example mentioned that `load_data` is rather general. The next exercise makes it work for datasets with coloured images. -```@raw html -
-
Exercise:
-
-``` -Try to load the CIFAR10 dataset via the `load_data` function and fix the error in one line of code. - -**Hint**: Use ` dataset = MLDatasets.CIFAR10`. -```@raw html -
-
-Solution: -
-``` - -We first load the data in the same way as before. - -```julia -load_data(MLDatasets.CIFAR10; T=T, onehot=true) -``` -```julia -│ MethodError: no method matching reshape_data(::Array{Float32,4}) -│ Closest candidates are: -│ reshape_data(::AbstractArray{T,3} where T) where T -``` - -It results in an error which states that the `reshape_function` functon is not defined for inputs with 4 dimensions. We did not implement it because MNIST contains grayscale images, which leads to arrays with 3 dimensions. To fix the problem, it suffices to add a method to the `reshape_data` function. - -```@example nn -reshape_data(X::AbstractArray{<:Real, 4}) = X - -nothing # hide -``` - -Now we can load the data. - -```julia -typeof(load_data(MLDatasets.CIFAR10; T=T, onehot=true)) -``` - -```julia -Tuple{Array{Float32,4},Flux.OneHotMatrix{Array{Flux.OneHotVector,1}},Array{Float32,4},Flux.OneHotMatrix{Array{Flux.OneHotVector,1}}} -``` +!!! warning "Exercise:" + Try to load the CIFAR10 dataset via the `load_data` function and fix the error in one line of code. -We see that it correctly returned a tuple of four items. + **Hint**: Use ` dataset = MLDatasets.CIFAR10`. -```@raw html -
-``` +!!! details "Solution:" + We first load the data in the same way as before. + ```julia + load_data(MLDatasets.CIFAR10; T=T, onehot=true) + ``` + ```julia + │ MethodError: no method matching reshape_data(::Array{Float32,4}) + │ Closest candidates are: + │ reshape_data(::AbstractArray{T,3} where T) where T + ``` + It results in an error which states that the `reshape_function` functon is not defined for inputs with 4 dimensions. We did not implement it because MNIST contains grayscale images, which leads to arrays with 3 dimensions. To fix the problem, it suffices to add a method to the `reshape_data` function. + ```@example nn + reshape_data(X::AbstractArray{<:Real, 4}) = X + nothing # hide + ``` + Now we can load the data. + ```julia + typeof(load_data(MLDatasets.CIFAR10; T=T, onehot=true)) + ``` + ```julia + Tuple{Array{Float32,4},Flux.OneHotMatrix{Array{Flux.OneHotVector,1}},Array{Float32,4},Flux.OneHotMatrix{Array{Flux.OneHotVector,1}}} + ``` + We see that it correctly returned a tuple of four items. ## Training and storing the network @@ -263,98 +198,57 @@ L(w) = \frac1n\sum_{i=1}^n \operatorname{loss}(y_i, \operatorname{predict}(w; x_ The gradient descent works with the derivative ``\nabla L(w)``, which contains the mean over all samples. Since the MNIST training set size is 50000, evaluating one full gradient is costly. For this reasons, the gradient is approximated by a mean over a small number of samples. This small set is called a minibatch, and this accelerated method stochastic gradient descent. - - - - - - - The following exercise splits the dataset into minibatches. While we can do it manually, Flux provides a simple way to do so. -```@raw html -
-
Exercise:
-
-``` -Use the help of the function `DataLoader` to split the dataset into minibatches. - -**Hint**: It needs to be imported from Flux via `using Flux.Data: DataLoader`. -```@raw html -
-
-Solution: -
-``` +!!! warning "Exercise:" + Use the help of the function `DataLoader` to split the dataset into minibatches. -We first load the function `DataLoader`. + **Hint**: It needs to be imported from Flux via `using Flux.Data: DataLoader`. -```@example nn -using Flux.Data: DataLoader -``` +!!! details "Solution:" + We first load the function `DataLoader`. -The in-built help shows us how to call this function. It also includes multiple examples. + ```@example nn + using Flux.Data: DataLoader + ``` -```julia -help?> DataLoader -search: + The in-built help shows us how to call this function. It also includes multiple examples. - DataLoader(data; batchsize=1, shuffle=false, partial=true) -``` + ```julia + help?> DataLoader + search: -We use the following code to split the dataset into minibatches. We need to include both `X_train` and `y_train` to perform the partition for the data and the labels. - -```@example nn -batchsize = 32 -batches = DataLoader((X_train, y_train); batchsize, shuffle = true) - -nothing # hide -``` - -```@raw html -
-``` + DataLoader(data; batchsize=1, shuffle=false, partial=true) + ``` + We use the following code to split the dataset into minibatches. We need to include both `X_train` and `y_train` to perform the partition for the data and the labels. + ```@example nn + batchsize = 32 + batches = DataLoader((X_train, y_train); batchsize, shuffle = true) + nothing # hide + ``` +!!! compat "BONUS: Manually splitting the dataset" + We can do the same procedure manually. To create minibatches, we create a random partition of all indices `randperm(size(y, 2))` and use function `partition` to create an iterator, which creates the minibatches in the form of tuples ``(X,y)``. + ```julia + using Base.Iterators: partition + using Random + batches = map(partition(randperm(size(y, 2)), batchsize)) do inds + return (X[:, :, :, inds], y[:, inds]) + end + ``` + This procedure is equivalent to the `map` function. + ```julia + [(X[:, :, :, inds], y[:, inds]) for inds in partition(randperm(size(y, 2)), batchsize)] + ``` -```@raw html -
-
BONUS: Manually splitting the dataset
-
-``` -We can do the same procedure manually. To create minibatches, we create a random partition of all indices `randperm(size(y, 2))` and use function `partition` to create an iterator, which creates the minibatches in the form of tuples ``(X,y)``. - -```julia -using Base.Iterators: partition -using Random - -batches = map(partition(randperm(size(y, 2)), batchsize)) do inds - return (X[:, :, :, inds], y[:, inds]) -end -``` - -This procedure is equivalent to the `map` function. - -```julia -[(X[:, :, :, inds], y[:, inds]) for inds in partition(randperm(size(y, 2)), batchsize)] -``` - -The type of `batches` is a one-dimensional array (vector) of tuples. -```@raw html -
-``` - - - - - - + The type of `batches` is a one-dimensional array (vector) of tuples. To build the objective ``L``, we first specify the prediction function ``\operatorname{predict}``. We keep the usual convention and denote it by model `m`. It is a composition of seven layers: - Two convolutional layers extract low-level features from the images. @@ -423,56 +317,32 @@ nothing # hide The function `train_model!` first splits the datasets into minibatches `batches` and then uses the optimizer for `n_epochs` epochs. In one epoch, the model `m` evaluates all samples exactly once. Therefore, the optimizer performs the same number of gradient updates as the number of minibatches during one epoch. On the other hand, the standard gradient descent makes only one gradient update during one epoch. The default optimizer is the stochastic gradient descent with stepsize ``0.1``. Since we do not need an index in the loop, we use `_`. Finally, if `file_name` is non-empty, the function saves the trained model `m`. +!!! warning "Exercise:" + Train the model for one epoch and save it to `MNIST_simple.bson`. Print the accuracy on the testing set. +!!! details "Solution:" + To train the model, it suffices to call the previously written function. + ```@example nn + file_name = "mnist_simple.bson" + train_model!(m, L, X_train, y_train; n_epochs=1, file_name=file_name) + nothing # hide + ``` + The accuracy has been computed many times during the course. + ```@example nn + using Statistics + accuracy(x, y) = mean(onecold(m(x)) .== onecold(y)) + "Test accuracy = " * string(accuracy(X_test, y_test)) + nothing # hide + ``` - - -```@raw html -
-
Exercise:
-
-``` -Train the model for one epoch and save it to `MNIST_simple.bson`. Print the accuracy on the testing set. -```@raw html -
-
-Solution: -
-``` - -To train the model, it suffices to call the previously written function. - -```@example nn -file_name = "mnist_simple.bson" -train_model!(m, L, X_train, y_train; n_epochs=1, file_name=file_name) - -nothing # hide -``` - -The accuracy has been computed many times during the course. - -```@example nn -using Statistics - -accuracy(x, y) = mean(onecold(m(x)) .== onecold(y)) - -"Test accuracy = " * string(accuracy(X_test, y_test)) - -nothing # hide -``` - -We defined ```accuracy``` in a different way than before. Can you spot the difference and explain why they are equivalent? - -```@raw html -
-``` + We defined ```accuracy``` in a different way than before. Can you spot the difference and explain why they are equivalent? ```@example nn println("Test accuracy = ", accuracy(X_test, y_test)) # hide @@ -480,60 +350,45 @@ println("Test accuracy = ", accuracy(X_test, y_test)) # hide The accuracy is over 93%, which is not bad for training for one epoch only. Let us recall that training for one epoch means that the classifier evaluates each sample only once. To obtain better accuracy, we need to train the model for more epochs. Since that may take some time, it is not good to train the same model repeatedly. The following exercise determines automatically whether the trained model already exists. If not, it trains it. -```@raw html -
-
Exercise:
-
-``` - -Write a function `train_or_load!(file_name, m, args...; ???)` checking whether the file `file_name` exists. -- If it exists, it loads it and then copies its parameters into `m` using the function `Flux.loadparams!`. -- If it does not exist, it trains it using `train_model!`. -In both cases, the model `m` should be modified inside the `train_or_load!` function. Pay special attention to the optional arguments `???`. - -Use this function to load the model from `data/mnist.bson` and evaluate the performance at the testing set. +!!! warning "Exercise:" + Write a function `train_or_load!(file_name, m, args...; ???)` checking whether the file `file_name` exists. + - If it exists, it loads it and then copies its parameters into `m` using the function `Flux.loadparams!`. + - If it does not exist, it trains it using `train_model!`. + In both cases, the model `m` should be modified inside the `train_or_load!` function. Pay special attention to the optional arguments `???`. -```@raw html -
-
-Solution: -
-``` + Use this function to load the model from `data/mnist.bson` and evaluate the performance at the testing set. -The optional arguments should contain `kwargs...`, which will be passed to `train_model!`. Besides that, we include `force` which enforces that the model is trained even if it already exists. +!!! details "Solution:" + The optional arguments should contain `kwargs...`, which will be passed to `train_model!`. Besides that, we include `force` which enforces that the model is trained even if it already exists. -First, we should check whether the directory exists ```!isdir(dirname(file_name))``` and if not, we create it ```mkpath(dirname(file_name))```. Then we check whether the file exists (or whether we want to enforce the training). If yes, we train the model, which already modifies ```m```. If not, we ```BSON.load``` the model and copy the loaded parameters into ```m``` by ```Flux.loadparams!(m, Flux.params(m_loaded))```. We cannot load directly into ```m``` instead of ```m_loaded``` because that would create a local copy of ```m``` and the function would not modify the external ```m```. + First, we should check whether the directory exists ```!isdir(dirname(file_name))``` and if not, we create it ```mkpath(dirname(file_name))```. Then we check whether the file exists (or whether we want to enforce the training). If yes, we train the model, which already modifies ```m```. If not, we ```BSON.load``` the model and copy the loaded parameters into ```m``` by ```Flux.loadparams!(m, Flux.params(m_loaded))```. We cannot load directly into ```m``` instead of ```m_loaded``` because that would create a local copy of ```m``` and the function would not modify the external ```m```. -```@example nn -function train_or_load!(file_name, m, args...; force=false, kwargs...) + ```@example nn + function train_or_load!(file_name, m, args...; force=false, kwargs...) - !isdir(dirname(file_name)) && mkpath(dirname(file_name)) + !isdir(dirname(file_name)) && mkpath(dirname(file_name)) - if force || !isfile(file_name) - train_model!(m, args...; file_name=file_name, kwargs...) - else - m_weights = BSON.load(file_name)[:m] - Flux.loadparams!(m, Flux.params(m_weights)) + if force || !isfile(file_name) + train_model!(m, args...; file_name=file_name, kwargs...) + else + m_weights = BSON.load(file_name)[:m] + Flux.loadparams!(m, Flux.params(m_weights)) + end end -end - -nothing # hide -``` -To load the model, we should use `joinpath` to be compatible with all operating systems. The accuracy is evaluated as before. + nothing # hide + ``` -```@example nn -file_name = joinpath("data", "mnist.bson") -train_or_load!(file_name, m, L, X_train, y_train) + To load the model, we should use `joinpath` to be compatible with all operating systems. The accuracy is evaluated as before. -"Test accuracy = " * string(accuracy(X_test, y_test)) + ```@example nn + file_name = joinpath("data", "mnist.bson") + train_or_load!(file_name, m, L, X_train, y_train) -nothing # hide -``` + "Test accuracy = " * string(accuracy(X_test, y_test)) -```@raw html -
-``` + nothing # hide + ``` ```@example nn println("Test accuracy = " * string(accuracy(X_test, y_test))) # hide diff --git a/docs/src/lecture_11/theory.md b/docs/src/lecture_11/theory.md index 1c5ae0670..5a22545cb 100644 --- a/docs/src/lecture_11/theory.md +++ b/docs/src/lecture_11/theory.md @@ -82,7 +82,7 @@ When an input is an image, the usual structure of the neural network is the foll - Softmax layer. - Cross-entropy loss function. -!!! bonus "BONUS: Additional layers" +!!! compat "BONUS: Additional layers" Practical convolutional layers involve additional complexities such as layers with even size (we showed only even sizes), padding (should zeros be added or should the output image be smaller) or stride (should there be any distance between convolutions). This goes, however, beyond the lecture. #### Recurrent layer diff --git a/docs/src/lecture_12/glm.md b/docs/src/lecture_12/glm.md index e57dc7952..628eef5f1 100644 --- a/docs/src/lecture_12/glm.md +++ b/docs/src/lecture_12/glm.md @@ -63,41 +63,26 @@ nothing # hide The following exercise performs the ``t``-test to check whether the data come from a distribution with zero mean. -```@raw html -
-
Exercise:
-
-``` - -Use the ``t``-test to verify whether the samples were generated from a distribution with zero mean. - -**Hints:** -- The Student's distribution is invoked by `TDist()`. -- The probability ``\mathbb P(T\le t)`` equals to the [distribution function](https://en.wikipedia.org/wiki/Cumulative_distribution_function) ``F(t)``, which can be called by `cdf`. - -```@raw html -
-
-Solution: -
-``` +!!! warning "Exercise:" + Use the ``t``-test to verify whether the samples were generated from a distribution with zero mean. -We compute the statistic ``t``, then define the Student's distribution with ``n-1`` degrees of freedom, evaluate the distribution function at ``t`` and finally compute the ``p``-value. + **Hints:** + - The Student's distribution is invoked by `TDist()`. + - The probability ``\mathbb P(T\le t)`` equals to the [distribution function](https://en.wikipedia.org/wiki/Cumulative_distribution_function) ``F(t)``, which can be called by `cdf`. -```@example glm -using Distributions +!!! details "Solution:" + We compute the statistic ``t``, then define the Student's distribution with ``n-1`` degrees of freedom, evaluate the distribution function at ``t`` and finally compute the ``p``-value. -t = mean(xs) / std(xs) * sqrt(n) + ```@example glm + using Distributions -prob = cdf(TDist(n-1), t) -p = 2*min(prob, 1-prob) -``` + t = mean(xs) / std(xs) * sqrt(n) -The ``p``-value is significantly larger than ``5\%``. Therefore, we cannot reject the zero hypothesis, which is fortunate because the data were generated from the normal distribution with zero mean. + prob = cdf(TDist(n-1), t) + p = 2*min(prob, 1-prob) + ``` -```@raw html -
-``` + The ``p``-value is significantly larger than ``5\%``. Therefore, we cannot reject the zero hypothesis, which is fortunate because the data were generated from the normal distribution with zero mean. Even though the computation of the ``p``-value is simple, we can use the [HypothesisTests](https://juliastats.org/HypothesisTests.jl/stable/) package. When we run the test, it gives us the same results as we computed. @@ -208,48 +193,33 @@ model = lm(@formula(W ~ 1 + N + Y + I + K + F), wages) The table shows the parameter values and their confidence intervals. Besides that, it also tests the null hypothesis ``H_0: w_j = 0`` whether some of the regression coefficients can be omitted. The ``t`` statistics is in column `t`, while its ``p``-value in column `Pr(>|t|)`. The next exercise checks whether we can achieve the same results with fewer features. -```@raw html -
-
Exercise:
-
-``` - -Check that the solution computed by hand and by `lm` are the same. - -Then remove the feature with the highest ``p``-value and observe whether there was any performance drop. The performance is usually evaluated by the [coeffient of determination](https://en.wikipedia.org/wiki/Coefficient_of_determination) denoted by ``R^2\in[0,1]``. Its higher values indicate a better model. - -**Hint**: Use functions `coef` and `r2`. +!!! warning "Exercise:" + Check that the solution computed by hand and by `lm` are the same. -```@raw html -
-
-Solution: -
-``` + Then remove the feature with the highest ``p``-value and observe whether there was any performance drop. The performance is usually evaluated by the [coeffient of determination](https://en.wikipedia.org/wiki/Coefficient_of_determination) denoted by ``R^2\in[0,1]``. Its higher values indicate a better model. -Since the parameters for both approaches are almost the same, the approaches give the same result. + **Hint**: Use functions `coef` and `r2`. -```@example glm -norm(coef(model) - w0) -``` +!!! details "Solution:" + Since the parameters for both approaches are almost the same, the approaches give the same result. -The table before this exercise shows that the ``p``-value for feature ``K`` is ``3.3\%``. We define the reduced model without this feature. + ```@example glm + norm(coef(model) - w0) + ``` -```@example glm -model_red = lm(@formula(W ~ 1 + N + Y + I + F), wages) -``` + The table before this exercise shows that the ``p``-value for feature ``K`` is ``3.3\%``. We define the reduced model without this feature. -Now we show the performances of both models. + ```@example glm + model_red = lm(@formula(W ~ 1 + N + Y + I + F), wages) + ``` -```@example glm -(r2(model), r2(model_red)) -``` + Now we show the performances of both models. -Since we observe only a small performance drop, we could omit this feature without changing the model prediction capability. + ```@example glm + (r2(model), r2(model_red)) + ``` -```@raw html -
-``` + Since we observe only a small performance drop, we could omit this feature without changing the model prediction capability. The core assumption of this approach is that ``y`` follows the normal distribution. We use the `predict` function for predictions and then use the `plot_histogram` function written earlier to plot the histogram and a density of the normal distribution. For the normal distribution, we need to specify the correct mean and variance. @@ -285,45 +255,30 @@ model = glm(@formula(W ~ 1 + N + Y + I + K + F), wages, InverseGaussian(), SqrtL The following exercise plots the predictions for the generalized linear model. -```@raw html -
-
Exercise:
-
-``` - -Create the scatter plot of predictions and labels. Do not use the `predict` function. - -```@raw html -
-
-Solution: -
-``` - -Due to the construction of the generalized linear model, the prediction equals ``g^{-1}(w^\top x)``. We save it into ``\hat y``. +!!! warning "Exercise:" + Create the scatter plot of predictions and labels. Do not use the `predict` function. -```@example glm -g_inv(z) = z^2 +!!! details "Solution:" + Due to the construction of the generalized linear model, the prediction equals ``g^{-1}(w^\top x)``. We save it into ``\hat y``. -y_hat = g_inv.(X*coef(model)) + ```@example glm + g_inv(z) = z^2 -nothing # hide -``` + y_hat = g_inv.(X*coef(model)) -The scatter plot is now simple. + nothing # hide + ``` -```@example glm -scatter(y, y_hat; - label="", - xlabel="Label", - ylabel="Prediction", -) + The scatter plot is now simple. -savefig("glm_predict.svg") # hide -``` + ```@example glm + scatter(y, y_hat; + label="", + xlabel="Label", + ylabel="Prediction", + ) -```@raw html -
-``` + savefig("glm_predict.svg") # hide + ``` ![](glm_predict.svg) diff --git a/docs/src/lecture_12/monte.md b/docs/src/lecture_12/monte.md index c8d88971a..bd9cec51f 100644 --- a/docs/src/lecture_12/monte.md +++ b/docs/src/lecture_12/monte.md @@ -36,45 +36,30 @@ plot(0:0.1:10, gamma; Machine learning datasets contain many features. Even simple datasets such as MNIST live in ``28\times 28=784`` dimensions. However, we humans are unable to think in more than three dimensions. Working with more-dimensional spaces can bring many surprises. This section computes the volume of ``m``-dimensional balls. Before we start, try to guess the volume of the unit balls in ``\mathbb R^{10}`` and ``\mathbb R^{100}``. -```@raw html -
-
Exercise:
-
-``` - -Use the [formula](https://en.wikipedia.org/wiki/Volume_of_an_n-ball) to compute the volume of a ``m``-dimensional ball. Plot the dependence of the volume on the dimension ``m=1,\dots,100``. - -```@raw html -
-
-Solution: -
-``` +!!! warning "Exercise:" + Use the [formula](https://en.wikipedia.org/wiki/Volume_of_an_n-ball) to compute the volume of a ``m``-dimensional ball. Plot the dependence of the volume on the dimension ``m=1,\dots,100``. -The formula can be easily transferred to a function. +!!! details "Solution:" + The formula can be easily transferred to a function. -```@example monte -volume_true(m, R) = π^(m/2) *R^2 / gamma(m/2 + 1) + ```@example monte + volume_true(m, R) = π^(m/2) *R^2 / gamma(m/2 + 1) -nothing # hide -``` + nothing # hide + ``` -Then we create the plot. We use the log-scaling of the ``y``-axis. + Then we create the plot. We use the log-scaling of the ``y``-axis. -```@example monte -plot(1:100, m -> volume_true.(m, 1); - xlabel="dimension", - ylabel="unit ball volume: log scale", - label="", - yscale=:log10, -) - -savefig("dimension1.svg") # hide -``` + ```@example monte + plot(1:100, m -> volume_true.(m, 1); + xlabel="dimension", + ylabel="unit ball volume: log scale", + label="", + yscale=:log10, + ) -```@raw html -
-``` + savefig("dimension1.svg") # hide + ``` ![](dimension1.svg) @@ -82,41 +67,26 @@ This result may be surprising. While the volume of the ``10``-dimensional ball i The following exercise uses the Monte Carlo sampling to estimate this volume. We will sample points in the hypercube ``[-1,1]^m`` and then compute the unit ball volume by realizing that the volume of the ball divided by the volume of the box equals the fraction of sampled points inside the ball. -```@raw html -
-
Exercise:
-
-``` - -Write the function `volume_monte_carlo`, which estimates the volume of the ``m``-dimensional ball based on ``n`` randomly sampled points. +!!! warning "Exercise:" + Write the function `volume_monte_carlo`, which estimates the volume of the ``m``-dimensional ball based on ``n`` randomly sampled points. -**Hint**: function `rand(m,n)` creates a ``m\times n`` matrix, which can be understood as ``n`` randomly sampled points in ``[0,1]^m``. Transform them to ``[-1,1]^m``. + **Hint**: function `rand(m,n)` creates a ``m\times n`` matrix, which can be understood as ``n`` randomly sampled points in ``[0,1]^m``. Transform them to ``[-1,1]^m``. -```@raw html -
-
-Solution: -
-``` +!!! details "Solution:" + To transform the random variable from ``[0,1]`` to ``[-1,1]``, we need to multiply it by two and subtract one. Then we compute the norm of each sampled point. The estimated volume is computed as the fraction of the points whose norm is smaller than one multiplied by the hypercube volume. The latter equals to ``2^m``. -To transform the random variable from ``[0,1]`` to ``[-1,1]``, we need to multiply it by two and subtract one. Then we compute the norm of each sampled point. The estimated volume is computed as the fraction of the points whose norm is smaller than one multiplied by the hypercube volume. The latter equals to ``2^m``. + ```@example monte + using Random + using Statistics -```@example monte -using Random -using Statistics + function volume_monte_carlo(m::Int; n::Int=10000) + X = 2*rand(m, n).-1 + X_norm_sq = sum(X.^2; dims=1) + return 2^m*mean(X_norm_sq .<= 1) + end -function volume_monte_carlo(m::Int; n::Int=10000) - X = 2*rand(m, n).-1 - X_norm_sq = sum(X.^2; dims=1) - return 2^m*mean(X_norm_sq .<= 1) -end - -nothing # hide -``` - -```@raw html -
-``` + nothing # hide + ``` The next figure shows the estimated volume from ``n\in \{10, 1000, 100000\}`` samples for the unit ball in dimension ``m=1,\dots,15``. @@ -147,24 +117,17 @@ savefig("dimension2.svg") # hide It is not surprising that with increasing dimension, we need a much larger number of samples to obtain good estimates. This number grows exponentially with the dimension. This phenomenon explains why machine learning models with large feature spaces need lots of data. Moreover, the number of samples should increase with the complexity of the input and the network. -```@raw html -
-
Generating from the uniform distribution:
-
-``` -While we wrote our function for generating from the uniform distribution, we can also use the Distributions package. +!!! info "Generating from the uniform distribution:" + While we wrote our function for generating from the uniform distribution, we can also use the Distributions package. -```@example -using Distributions + ```@example + using Distributions -rand(Uniform(-1, 1), 10, 5) -nothing # hide -``` + rand(Uniform(-1, 1), 10, 5) + nothing # hide + ``` -We will discuss this topic more in the following section. -```@raw html -
-``` + We will discuss this topic more in the following section. ## Sampling from distributions @@ -226,36 +189,21 @@ We may work with a distribution ``d`` for which we know the density ``f``, but t - the upper bound ``f_{\rm max}`` for the density ``f``. The rejection sampling technique first randomly samples a trial point ``x\in [x_{\rm min}, x_{\rm max}]`` and a scalar ``p\in [0,f_{\rm max}]``. It accepts ``x`` if ``p \le f(x)`` and rejects it otherwise. This technique ensures that a point is accepted with a probability proportional to its density function value. -```@raw html -
-
Exercise:
-
-``` - -Implement the `rejection_sampling` function. It should generate ``n`` trial points and return all accepted points. +!!! warning "Exercise:" + Implement the `rejection_sampling` function. It should generate ``n`` trial points and return all accepted points. -```@raw html -
-
-Solution: -
-``` +!!! details "Solution:" + While it is possible to generate the random points one by one, we prefer to generate them all at once and discard the rejected samples. The function follows precisely the steps summarized before this exercise. -While it is possible to generate the random points one by one, we prefer to generate them all at once and discard the rejected samples. The function follows precisely the steps summarized before this exercise. + ```@example monte + function rejection_sampling(f, f_max, x_min, x_max; n=1000000) + xs = x_min .+ (x_max - x_min)*rand(n) + ps = f_max*rand(n) + return xs[f.(xs) .>= ps] + end -```@example monte -function rejection_sampling(f, f_max, x_min, x_max; n=1000000) - xs = x_min .+ (x_max - x_min)*rand(n) - ps = f_max*rand(n) - return xs[f.(xs) .>= ps] -end - -nothing # hide -``` - -```@raw html -
-``` + nothing # hide + ``` We will now use the rejection sampling technique to generate the random samples from the three distributions from above. Since the density ``f`` of the normal distribution achieves its maximum at the mean, we specify `f_max = f(d.μ)`. @@ -282,142 +230,118 @@ nothing # hide While the rejection sampling provides a good approximation for the first two distributions, it performs subpar for the last distribution. The reason is that the rejection sampling is sensitive to the choice of the interval ``[x_{\rm min}, x_{\rm max}]``. Because we chose the interval ``[-10,10]`` and ``f_3`` has negligible values outside of the interval ``[-0.1,0.1]``, most trial points got rejected. It is not difficult to verify that from the ``1000000`` trial points, only approximately ``1200`` got accepted. The small number of accepted points makes for the poor approximation. If we generated from a narrower interval, the results would be much better. -```@raw html -
-
BONUS: Using rejection sampling to compute expected value
-
-``` - -This exercise computes the expected value - -```math -\mathbb E_3 \cos(100X) = \int_{-\infty}^\infty \cos(100 x) f_3(x) dx, -``` +!!! compat "BONUS: Using rejection sampling to compute expected value" + This exercise computes the expected value -where we consider the expectation ``\mathbb E`` with respect to ``d_3\sim N(0, 0.01)`` with density ``f_3``. The first possibility to compute the expectation is to discretize the integral. + ```math + \mathbb E_3 \cos(100X) = \int_{-\infty}^\infty \cos(100 x) f_3(x) dx, + ``` -```@example monte -h(x) = cos(100*x) + where we consider the expectation ``\mathbb E`` with respect to ``d_3\sim N(0, 0.01)`` with density ``f_3``. The first possibility to compute the expectation is to discretize the integral. -Δx = 0.001 -xs = range(xlims...; step=Δx) -e0 = Δx * sum(f3.(xs) .* h.(xs)) + ```@example monte + h(x) = cos(100*x) -nothing # hide -``` + Δx = 0.001 + xs = range(xlims...; step=Δx) + e0 = Δx * sum(f3.(xs) .* h.(xs)) -The second possibility is to approximate the integral by + nothing # hide + ``` -```math -\mathbb E_3 \cos(100X) \approx \frac 1n\sum_{i=1}^n \cos(x_i), -``` + The second possibility is to approximate the integral by -where ``x_i`` are sampled from ``d_3``. We do this in `expectation1`, and `expectation2`, where the formed generates from the Distributions package while the latter uses our rejection sampling. We use the method of the `mean` function, which takes a function as its first argument. + ```math + \mathbb E_3 \cos(100X) \approx \frac 1n\sum_{i=1}^n \cos(x_i), + ``` -```@example monte -expectation1(h, d; n = 1000000) = mean(h, rand(d, n)) + where ``x_i`` are sampled from ``d_3``. We do this in `expectation1`, and `expectation2`, where the formed generates from the Distributions package while the latter uses our rejection sampling. We use the method of the `mean` function, which takes a function as its first argument. -function expectation2(h, f, f_max, xlims; n=1000000) - return mean(h, rejection_sampling(f, f_max, xlims...; n)) -end + ```@example monte + expectation1(h, d; n = 1000000) = mean(h, rand(d, n)) -nothing # hide -``` + function expectation2(h, f, f_max, xlims; n=1000000) + return mean(h, rejection_sampling(f, f_max, xlims...; n)) + end -If it is difficult to sample from ``d_3``, we can use a trick to sample from some other distribution. This is based on the following formula: + nothing # hide + ``` -```math -\mathbb E_3 h(x) = \int_{-\infty}^\infty h(x) f_3(x) dx = \int_{-\infty}^\infty h(x) \frac{f_3(x)}{f_1(x)}f_1(x) dx = \mathbb E_1 \frac{h(x)f_3(x)}{f_1(x)}. -``` + If it is difficult to sample from ``d_3``, we can use a trick to sample from some other distribution. This is based on the following formula: -This gives rise to another implementation of the same thing. + ```math + \mathbb E_3 h(x) = \int_{-\infty}^\infty h(x) f_3(x) dx = \int_{-\infty}^\infty h(x) \frac{f_3(x)}{f_1(x)}f_1(x) dx = \mathbb E_1 \frac{h(x)f_3(x)}{f_1(x)}. + ``` -```@example monte -function expectation3(h, f, d_gen; n=1000000) - g(x) = h(x)*f(x)/pdf(d_gen, x) - return mean(g, rand(d_gen, n)) -end + This gives rise to another implementation of the same thing. -nothing # hide -``` + ```@example monte + function expectation3(h, f, d_gen; n=1000000) + g(x) = h(x)*f(x)/pdf(d_gen, x) + return mean(g, rand(d_gen, n)) + end -We run these three approaches for ``20`` repetitions. + nothing # hide + ``` -```@example monte -n = 100000 -n_rep = 20 + We run these three approaches for ``20`` repetitions. -Random.seed!(666) -e1 = [expectation1(h, d3; n=n) for _ in 1:n_rep] -e2 = [expectation2(h, f3, f3(d3.μ), xlims; n=n) for _ in 1:n_rep] -e3 = [expectation3(h, f3, d1; n=n) for _ in 1:n_rep] + ```@example monte + n = 100000 + n_rep = 20 -nothing # hide -``` + Random.seed!(666) + e1 = [expectation1(h, d3; n=n) for _ in 1:n_rep] + e2 = [expectation2(h, f3, f3(d3.μ), xlims; n=n) for _ in 1:n_rep] + e3 = [expectation3(h, f3, d1; n=n) for _ in 1:n_rep] -Finally, we plot the results. Sampling from the package gives the best results because it generates the full amount of points, while the rejection sampling rejects many points. + nothing # hide + ``` -```@example monte -scatter([1], [e0]; label="Integral discretization", legend=:topleft) -scatter!(2*ones(n_rep), e1; label="Generating from Distributions.jl") -scatter!(3*ones(n_rep), e2; label="Generating from rejection sampling") -scatter!(4*ones(n_rep), e3; label="Generating from other distribution") -``` + Finally, we plot the results. Sampling from the package gives the best results because it generates the full amount of points, while the rejection sampling rejects many points. -This exercise considered the computation of a one-dimensional integral. It is important to realize that even for such a simple case, it is necessary to sample a sufficiently large number of points. Even when we sampled ``100000`` points, there is still some variance in the results, as the last three columns show. + ```@example monte + scatter([1], [e0]; label="Integral discretization", legend=:topleft) + scatter!(2*ones(n_rep), e1; label="Generating from Distributions.jl") + scatter!(3*ones(n_rep), e2; label="Generating from rejection sampling") + scatter!(4*ones(n_rep), e3; label="Generating from other distribution") + ``` -```@raw html -
-``` + This exercise considered the computation of a one-dimensional integral. It is important to realize that even for such a simple case, it is necessary to sample a sufficiently large number of points. Even when we sampled ``100000`` points, there is still some variance in the results, as the last three columns show. # How many samples do we need? Previous sections showed that we need many samples to obtain a good approximation of a desired quantity. The natural question is, how exactly many samples do we need? Even though many results estimate such errors, unfortunately, the answer depends on the application. This section will present two examples. The first one shows the distance of sampled points in a more-dimensional space, while the second one computes quantiles. -```@raw html -
-
Exercise:
-
-``` - -Sample ``n=1000`` points in the unit cube in the ``m=9``-dimensional space. What is the minimum distance of these points? Before implementing the exercise, try to guess the answer. +!!! warning "Exercise:" + Sample ``n=1000`` points in the unit cube in the ``m=9``-dimensional space. What is the minimum distance of these points? Before implementing the exercise, try to guess the answer. -```@raw html -
-
-Solution: -
-``` - -We first sample the points. - -```@example monte -n = 1000 -m = 9 +!!! details "Solution:" + We first sample the points. -Random.seed!(666) -xs = rand(m, n) + ```@example monte + n = 1000 + m = 9 -nothing # hide -``` + Random.seed!(666) + xs = rand(m, n) -Then we save the pairwise of points in `dist1`. Since this variable contains zeros on the diagonal, and since lower and upper diagonal are the same, we select only the upper part of the matrix and save it into `dist2`. + nothing # hide + ``` -```@example monte -using LinearAlgebra + Then we save the pairwise of points in `dist1`. Since this variable contains zeros on the diagonal, and since lower and upper diagonal are the same, we select only the upper part of the matrix and save it into `dist2`. -dist1 = [norm(x-y) for x in eachcol(xs), y in eachcol(xs)] -dist2 = [dist1[i,j] for i in 1:n for j in i+1:n] + ```@example monte + using LinearAlgebra -nothing # hide -``` + dist1 = [norm(x-y) for x in eachcol(xs), y in eachcol(xs)] + dist2 = [dist1[i,j] for i in 1:n for j in i+1:n] -This approach has the disadvantage that it allocates an ``n\times n`` matrix. + nothing # hide + ``` -```@raw html -
-``` + This approach has the disadvantage that it allocates an ``n\times n`` matrix. The minimum of these distances is roughly ``0.2``, while the maximum is ``2.2``. The minimum is surprisingly high and shows that sampling even ``1000`` points in ``\mathbb R^9`` forms a very sparse structure. The maximum distance is far away from the distance of two corners of the hypercube, which equals ``\sqrt{m}=3``. @@ -425,105 +349,96 @@ The minimum of these distances is roughly ``0.2``, while the maximum is ``2.2``. extrema(dist2) ``` -```@raw html -
-
BONUS: Approximating the quantiles
-
-``` - -Quantiles form an important concept in statistics. Its definition is slightly complicated; we will consider only absolutely continuous random variables: one-dimensional variables ``X`` with continuous density ``f``. Then the quantile at a level ``\alpha\in[0,1]`` is the unique point ``x`` such that - -```math -\mathbb P(X\le x) = \int_{-\infty}^x f(x)dx = \alpha. -``` - -The quantile at level ``\alpha=0.5`` is the mean. Quantiles play an important role in estimates, where they form upper and lower bounds for confidence intervals. They are also used in hypothesis testing. - -This part will investigate how quantiles on a finite sample differ from the true quantile. We will consider two ways of computing the quantile. Both of them sample ``n`` points from some distribution ``d``. The first one follows the statistical definition and selects the index of the ``n\alpha`` smallest observation by the `partialsort` function. The second one uses the function `quantile`, which performs some interpolation. +!!! compat "BONUS: Approximating the quantiles" + Quantiles form an important concept in statistics. Its definition is slightly complicated; we will consider only absolutely continuous random variables: one-dimensional variables ``X`` with continuous density ``f``. Then the quantile at a level ``\alpha\in[0,1]`` is the unique point ``x`` such that -```@example monte -quantile_sampled1(d, n::Int, α) = partialsort(rand(d, n), floor(Int, α*n)) -quantile_sampled2(d, n::Int, α) = quantile(rand(d, n), α) + ```math + \mathbb P(X\le x) = \int_{-\infty}^x f(x)dx = \alpha. + ``` -nothing # hide -``` + The quantile at level ``\alpha=0.5`` is the mean. Quantiles play an important role in estimates, where they form upper and lower bounds for confidence intervals. They are also used in hypothesis testing. -We defined the vectorized version. This is not efficient because for every ``n``, the samples will be randomly generated again. + This part will investigate how quantiles on a finite sample differ from the true quantile. We will consider two ways of computing the quantile. Both of them sample ``n`` points from some distribution ``d``. The first one follows the statistical definition and selects the index of the ``n\alpha`` smallest observation by the `partialsort` function. The second one uses the function `quantile`, which performs some interpolation. -```@example monte -quantile_sampled1(d, ns::AbstractVector, α) = quantile_sampled1.(d, ns, α) -quantile_sampled2(d, ns::AbstractVector, α) = quantile_sampled2.(d, ns, α) + ```@example monte + quantile_sampled1(d, n::Int, α) = partialsort(rand(d, n), floor(Int, α*n)) + quantile_sampled2(d, n::Int, α) = quantile(rand(d, n), α) -nothing # hide -``` + nothing # hide + ``` -We generate the quantile for ``\alpha = 0.99`` and repeat it 20 times. + We defined the vectorized version. This is not efficient because for every ``n``, the samples will be randomly generated again. -```@example monte -α = 0.99 -n_rep = 20 -ns = round.(Int, 10 .^ (1:0.05:5)) + ```@example monte + quantile_sampled1(d, ns::AbstractVector, α) = quantile_sampled1.(d, ns, α) + quantile_sampled2(d, ns::AbstractVector, α) = quantile_sampled2.(d, ns, α) -Random.seed!(666) -qs1 = hcat([quantile_sampled1(d1, ns, α) for _ in 1:n_rep]...) -Random.seed!(666) -qs2 = hcat([quantile_sampled2(d1, ns, α) for _ in 1:n_rep]...) + nothing # hide + ``` -nothing # hide -``` + We generate the quantile for ``\alpha = 0.99`` and repeat it 20 times. -We initialize the plot with the line for the true quantile. Since this will be part of both plots, we create just one and use `deepcopy` to create the other one. + ```@example monte + α = 0.99 + n_rep = 20 + ns = round.(Int, 10 .^ (1:0.05:5)) -```@example monte -plt1 = plot([0.9*minimum(ns); 1.1*maximum(ns)], quantile(d1, α)*ones(2); - xlabel="n: log scale", - ylabel="sampled quantile", - xscale=:log10, - label="True quantile", - line=(4,:black), - ylims=(0,3.5), -) -plt2 = deepcopy(plt1) + Random.seed!(666) + qs1 = hcat([quantile_sampled1(d1, ns, α) for _ in 1:n_rep]...) + Random.seed!(666) + qs2 = hcat([quantile_sampled2(d1, ns, α) for _ in 1:n_rep]...) -nothing # hide -``` + nothing # hide + ``` -Now we add the sampled quantiles and the mean over all repetitions. Since we work with two plots, we specify into which plot we want to add the new data. It would be better to create a function for plotting and call it for `qs1` and `qs2`, but we wanted to show how to work two plots simultaneously. + We initialize the plot with the line for the true quantile. Since this will be part of both plots, we create just one and use `deepcopy` to create the other one. -```@example monte -for i in 1:size(qs1,1) - scatter!(plt1, ns[i]*ones(size(qs1,2)), qs1[i,:]; - label="", - color=:blue, - markersize = 2, + ```@example monte + plt1 = plot([0.9*minimum(ns); 1.1*maximum(ns)], quantile(d1, α)*ones(2); + xlabel="n: log scale", + ylabel="sampled quantile", + xscale=:log10, + label="True quantile", + line=(4,:black), + ylims=(0,3.5), ) - scatter!(plt2, ns[i]*ones(size(qs2,2)), qs2[i,:]; - label="", - color=:blue, - markersize = 2, + plt2 = deepcopy(plt1) + + nothing # hide + ``` + + Now we add the sampled quantiles and the mean over all repetitions. Since we work with two plots, we specify into which plot we want to add the new data. It would be better to create a function for plotting and call it for `qs1` and `qs2`, but we wanted to show how to work two plots simultaneously. + + ```@example monte + for i in 1:size(qs1,1) + scatter!(plt1, ns[i]*ones(size(qs1,2)), qs1[i,:]; + label="", + color=:blue, + markersize = 2, + ) + scatter!(plt2, ns[i]*ones(size(qs2,2)), qs2[i,:]; + label="", + color=:blue, + markersize = 2, + ) + end + + plot!(plt1, ns, mean(qs1; dims=2); + label="Sampled mean", + line=(4,:red), + ) + plot!(plt2, ns, mean(qs2; dims=2); + label="Sampled mean", + line=(4,:red), ) -end - -plot!(plt1, ns, mean(qs1; dims=2); - label="Sampled mean", - line=(4,:red), -) -plot!(plt2, ns, mean(qs2; dims=2); - label="Sampled mean", - line=(4,:red), -) - -display(plt1) -display(plt2) -savefig(plt1, "quantile1.svg") # hide -savefig(plt2, "quantile2.svg") # hide -``` -![](quantile1.svg) -![](quantile2.svg) + display(plt1) + display(plt2) + savefig(plt1, "quantile1.svg") # hide + savefig(plt2, "quantile2.svg") # hide + ``` -Both sampled estimates give a lower estimate than the true quantile. In statistical methodology, these estimates are biased. We observe that the interpolated estimate is closer to the true value and that computing the quantile even on ``10000`` points gives an uncertainty interval of approximately ``0.25``. + ![](quantile1.svg) + ![](quantile2.svg) -```@raw html -
-``` + Both sampled estimates give a lower estimate than the true quantile. In statistical methodology, these estimates are biased. We observe that the interpolated estimate is closer to the true value and that computing the quantile even on ``10000`` points gives an uncertainty interval of approximately ``0.25``. diff --git a/docs/src/lecture_12/sparse.md b/docs/src/lecture_12/sparse.md index 045bd8b1a..df6b78535 100644 --- a/docs/src/lecture_12/sparse.md +++ b/docs/src/lecture_12/sparse.md @@ -134,64 +134,49 @@ nothing # hide The first exercise compares both approaches to solving the ridge regression. -```@raw html -
-
Exercise:
-
-``` +!!! warning "Exercise:" + Implement the methods for the `ridge_reg` function. Verify that the result in the same result. -Implement the methods for the `ridge_reg` function. Verify that the result in the same result. + **Hints:** + - The eigendecomposition can be found by `eigen(A)` or `eigen(A).values`. + - The identity matrix is implemented by `I` in the `LinearAlgebra` package. -**Hints:** -- The eigendecomposition can be found by `eigen(A)` or `eigen(A).values`. -- The identity matrix is implemented by `I` in the `LinearAlgebra` package. +!!! details "Solution:" + The simple implementation for the solution is the same as in the case of linear regression. We only need to add `μ*I`. -```@raw html -
-
-Solution: -
-``` + ```@example sparse + ridge_reg(X, y, μ) = (X'*X + μ*I) \ (X'*y) -The simple implementation for the solution is the same as in the case of linear regression. We only need to add `μ*I`. + nothing # hide + ``` -```@example sparse -ridge_reg(X, y, μ) = (X'*X + μ*I) \ (X'*y) + We first compute the eigendecomposition and save it into `eigen_dec`. Then we extract the eigenvector and eigenvalues. We also transpose the matrix ``Q`` and save it into `Q_inv` so that we do not have to compute it repeatedly. -nothing # hide -``` + ```@example sparse + eigen_dec = eigen(X'*X) + Q = eigen_dec.vectors + Q_inv = Matrix(Q') + λ = eigen_dec.values -We first compute the eigendecomposition and save it into `eigen_dec`. Then we extract the eigenvector and eigenvalues. We also transpose the matrix ``Q`` and save it into `Q_inv` so that we do not have to compute it repeatedly. + nothing # hide + ``` -```@example sparse -eigen_dec = eigen(X'*X) -Q = eigen_dec.vectors -Q_inv = Matrix(Q') -λ = eigen_dec.values + The more sophisticated way of solving the ridge regression contains only matrix-vector multiplication and the inversion of the diagonal matrix ``(\Lambda + \mu I)^{-1}``. We need to properly add paranthesis, to start multiplication from the right and evade matrix-matrix multiplication, which would occur if we started from the left. Since the matrix ``\Lambda + \mu I`` is diagonal, its inverse is the digonal matrix formed from the inverted diagonal. -nothing # hide -``` + ```@example sparse + ridge_reg(X, y, μ, Q, Q_inv, λ) = Q * ((Diagonal(1 ./ (λ .+ μ)) * ( Q_inv * (X'*y)))) -The more sophisticated way of solving the ridge regression contains only matrix-vector multiplication and the inversion of the diagonal matrix ``(\Lambda + \mu I)^{-1}``. We need to properly add paranthesis, to start multiplication from the right and evade matrix-matrix multiplication, which would occur if we started from the left. Since the matrix ``\Lambda + \mu I`` is diagonal, its inverse is the digonal matrix formed from the inverted diagonal. + nothing # hide + ``` -```@example sparse -ridge_reg(X, y, μ, Q, Q_inv, λ) = Q * ((Diagonal(1 ./ (λ .+ μ)) * ( Q_inv * (X'*y)))) + When we compare both solution, we see that they are the same. -nothing # hide -``` - -When we compare both solution, we see that they are the same. - -```@example sparse -w1 = ridge_reg(X, y, 10) -w2 = ridge_reg(X, y, 10, Q, Q_inv, λ) - -norm(w1 - w2) -``` + ```@example sparse + w1 = ridge_reg(X, y, 10) + w2 = ridge_reg(X, y, 10, Q, Q_inv, λ) -```@raw html -
-``` + norm(w1 - w2) + ``` To test the speed, we use the `BenchmarkTools` package. The second option is significantly faster. The price to pay is the need to pre-compute the matrix decomposition. diff --git a/docs/src/lecture_13/diff_eq.md b/docs/src/lecture_13/diff_eq.md index c250bbf6d..40218de62 100644 --- a/docs/src/lecture_13/diff_eq.md +++ b/docs/src/lecture_13/diff_eq.md @@ -60,45 +60,31 @@ sol(0.8) The following exercise shows how to specify the interpolation technique and compares the results. -```@raw html -
-
Exercise:
-
-``` +!!! warning "Exercise:" + When calling the `solve` function, we can specify the interpolation way. Solve the ODE with linear interpolation (`dense=false`) and the Runge-Kutta method of the fourth order (`RK4()`). Plot the results and compare them with the default and original solutions. -When calling the `solve` function, we can specify the interpolation way. Solve the ODE with linear interpolation (`dense=false`) and the Runge-Kutta method of the fourth order (`RK4()`). Plot the results and compare them with the default and original solutions. +!!! details "Solution:" + To compute the additional solutions, we add the arguments as specified above. -```@raw html -
-
-Solution: -
-``` + ```@example intro + sol2 = solve(prob, dense=false) + sol3 = solve(prob, RK4()) -To compute the additional solutions, we add the arguments as specified above. + nothing # hide + ``` -```@example intro -sol2 = solve(prob, dense=false) -sol3 = solve(prob, RK4()) + We create a discretization ```ts``` of the time interval and then plot the four functions. -nothing # hide -``` - -We create a discretization ```ts``` of the time interval and then plot the four functions. + ```@example intro + ts = range(tspan...; length = 100) -```@example intro -ts = range(tspan...; length = 100) + plot(ts, t->exp(0.98*t), label="True solution", legend=:topleft) + plot!(ts, t->sol(t), label="Default") + plot!(ts, t->sol2(t), label="Linear") + plot!(ts, t->sol3(t), label="Runge-Kutta") -plot(ts, t->exp(0.98*t), label="True solution", legend=:topleft) -plot!(ts, t->sol(t), label="Default") -plot!(ts, t->sol2(t), label="Linear") -plot!(ts, t->sol3(t), label="Runge-Kutta") - -savefig("Comparison.svg") # hide -``` -```@raw html -
-``` + savefig("Comparison.svg") # hide + ``` ![](Comparison.svg) @@ -185,43 +171,28 @@ plot(traj[1,:], traj[2,:], traj[3,:]; label="") In the introduction, we mentioned chaos theory. We will elaborate on this in the following exercise. -```@raw html -
-
Exercise:
-
-``` +!!! warning "Exercise:" + Use the `nextfloat` function to perturb the first parameter of `p` by the smallest possible value. Then solve the Lorenz system again and compare the results by plotting the two trajectories next to each other. -Use the `nextfloat` function to perturb the first parameter of `p` by the smallest possible value. Then solve the Lorenz system again and compare the results by plotting the two trajectories next to each other. +!!! details "Solution:" + We start with the smallest possible perturbation of the initial value. -```@raw html -
-
-Solution: -
-``` + ```@example intro + p0 = (nextfloat(p[1]), p[2:end]...) + ``` -We start with the smallest possible perturbation of the initial value. + Then we plot the graphs as before -```@example intro -p0 = (nextfloat(p[1]), p[2:end]...) -``` - -Then we plot the graphs as before - -```@example intro -prob0 = ODEProblem(lorenz, u0, tspan, p0) -sol0 = solve(prob0) - -plt0 = plot(sol0, vars=(1,2,3), label="") + ```@example intro + prob0 = ODEProblem(lorenz, u0, tspan, p0) + sol0 = solve(prob0) -plot(plt1, plt0; layout=(1,2)) + plt0 = plot(sol0, vars=(1,2,3), label="") -savefig("lorenz4.svg") # hide -``` + plot(plt1, plt0; layout=(1,2)) -```@raw html -
-``` + savefig("lorenz4.svg") # hide + ``` ![](lorenz4.svg) @@ -233,28 +204,14 @@ hcat(sol(tspan[2]), sol0(tspan[2])) shows that they are different by a large margin. This raises a natural question. -```@raw html -
-
Exercise:
-
-``` - -Can we trust the solutions? Why? +!!! warning "Exercise:" + Can we trust the solutions? Why? -```@raw html -
-
-Solution: -
-``` - -Unfortunately, we cannot. Numerical methods always introduce some errors by -- *Rounding errors* due to representing real numbers in machine precision. -- *Discretization errors* for continuous systems when the finite difference method approximates the derivative. -However, if the system itself is unstable and an extremely small perturbation results in big differences in solutions, the numerical method even enhances these errors. The solution could be trusted on some small interval but not after it. +!!! details "Solution:" + Unfortunately, we cannot. Numerical methods always introduce some errors by + - *Rounding errors* due to representing real numbers in machine precision. + - *Discretization errors* for continuous systems when the finite difference method approximates the derivative. + However, if the system itself is unstable and an extremely small perturbation results in big differences in solutions, the numerical method even enhances these errors. The solution could be trusted on some small interval but not after it. -```@raw html -
-``` The following section shows a situation where we try to mitigate this possible effect by using mathematical formulas to compute the exact solution as long as possible. This aproach delays the necessary discretization and may bring better stability. diff --git a/docs/src/lecture_13/ode.md b/docs/src/lecture_13/ode.md index 91b3af232..fcf05e2b3 100644 --- a/docs/src/lecture_13/ode.md +++ b/docs/src/lecture_13/ode.md @@ -43,58 +43,43 @@ A similar formula for the second derivatives reads The following exercise derives the mathematical formulas needed for solving the wave equation. -```@raw html -
-
Exercise:
-
-``` - -Consider equidistant discretizations with stepsizes ``\Delta t`` and ``\Delta x``. Derive mathematical formulas for solving the one-dimensional wave equation on ``[0,T]\times [0,L]`` by applying finite differences in time and space. Do not write any code. - -**Hint**: Start with the initial time and compute the solution after each time step. Use the condition on ``f`` at the first time step, the condition on ``g`` at the second time step and the wave equation at further steps. +!!! warning "Exercise:" + Consider equidistant discretizations with stepsizes ``\Delta t`` and ``\Delta x``. Derive mathematical formulas for solving the one-dimensional wave equation on ``[0,T]\times [0,L]`` by applying finite differences in time and space. Do not write any code. -```@raw html -
-
-Solution: -
-``` - -The wave equation needs to satisfy the boundary conditions + **Hint**: Start with the initial time and compute the solution after each time step. Use the condition on ``f`` at the first time step, the condition on ``g`` at the second time step and the wave equation at further steps. -```math -y(t,0) = f(0),\qquad y(t,L) = f(L) \qquad\text{ for all }t\in\{0,\Delta t,2\Delta t,\dots,T\} -``` +!!! details "Solution:" + The wave equation needs to satisfy the boundary conditions -and the initial conditions + ```math + y(t,0) = f(0),\qquad y(t,L) = f(L) \qquad\text{ for all }t\in\{0,\Delta t,2\Delta t,\dots,T\} + ``` -```math -y(0,x) = f(x) \qquad\text{ for all }x\in\{\Delta x,2\Delta x,\dots,L-\Delta x\}. -``` + and the initial conditions -We exclude ``x\in \{0,L\}`` from the last equation because the boundary conditions already prescribe these values. + ```math + y(0,x) = f(x) \qquad\text{ for all }x\in\{\Delta x,2\Delta x,\dots,L-\Delta x\}. + ``` -Now we start increasing time. For the values at ``\Delta t``, we approximate the initial condition for the derivative by the finite difference and get + We exclude ``x\in \{0,L\}`` from the last equation because the boundary conditions already prescribe these values. -```math -y(\Delta t, x) = y(0, x) + \Delta t g(x). -``` + Now we start increasing time. For the values at ``\Delta t``, we approximate the initial condition for the derivative by the finite difference and get -At further times, we use the finite difference approximation of the second derivative to arrive at + ```math + y(\Delta t, x) = y(0, x) + \Delta t g(x). + ``` -```math -\frac{y(t+\Delta t,x) - 2y(t,x) + y(t-\Delta t,x)}{\Delta t^2} = c^2 \frac{y(t,x+\Delta x) - 2y(t,x) + y(t,x-\Delta x)}{\Delta x^2}. -``` + At further times, we use the finite difference approximation of the second derivative to arrive at -Since we already know the values at ``t`` and ``t - \Delta t``, we rearrange the previous formula to obtain the values at the next time. This yields the final formula: + ```math + \frac{y(t+\Delta t,x) - 2y(t,x) + y(t-\Delta t,x)}{\Delta t^2} = c^2 \frac{y(t,x+\Delta x) - 2y(t,x) + y(t,x-\Delta x)}{\Delta x^2}. + ``` -```math -y(t + \Delta t,x) = \frac{c^2\Delta t^2}{\Delta x^2} \Big(y(t,x + \Delta x) - 2y(t,x) + y(t,x - \Delta x)\Big) + 2y(t,x) - y(t - \Delta t,x). -``` + Since we already know the values at ``t`` and ``t - \Delta t``, we rearrange the previous formula to obtain the values at the next time. This yields the final formula: -```@raw html -
-``` + ```math + y(t + \Delta t,x) = \frac{c^2\Delta t^2}{\Delta x^2} \Big(y(t,x + \Delta x) - 2y(t,x) + y(t,x - \Delta x)\Big) + 2y(t,x) - y(t - \Delta t,x). + ``` The most challenging part is done: We have finished the discretization scheme. Now we need to code it. We will employ a structure storing the wave equation parameters. @@ -108,68 +93,53 @@ end The first exercise solves the wave equation. -```@raw html -
-
Exercise:
-
-``` +!!! warning "Exercise:" + Write the function `solve_wave(T, L, wave::Wave; n_t=100, n_x=100)` that solves the wave equation. -Write the function `solve_wave(T, L, wave::Wave; n_t=100, n_x=100)` that solves the wave equation. + **Hint**: Follow the procedure from the previous exercise. Discretize time and space, initialize the solution, add the boundary conditions, add the initial conditions and finally, iterate over time. -**Hint**: Follow the procedure from the previous exercise. Discretize time and space, initialize the solution, add the boundary conditions, add the initial conditions and finally, iterate over time. +!!! details "Solution:" + We first discretize both time and space by the `range` function. Then we initialize the matrix `y`. We decide that the first dimension corresponds to time and the second one to space. We set the boundary conditions and fill `y[:,1]` with `wave.f(0)` and `y[:,end]` with `wave.f(L)`. Since the wave at the initial moment equals to ``f``, we set `y[1,2:end-1] = wave.f.(xs[2:end-1])`. Since the condition at ``t=\Delta t`` amount to -```@raw html -
-
-Solution: -
-``` + ```math + y(\Delta t, x) = y(0, x) + \Delta t g(x), + ``` -We first discretize both time and space by the `range` function. Then we initialize the matrix `y`. We decide that the first dimension corresponds to time and the second one to space. We set the boundary conditions and fill `y[:,1]` with `wave.f(0)` and `y[:,end]` with `wave.f(L)`. Since the wave at the initial moment equals to ``f``, we set `y[1,2:end-1] = wave.f.(xs[2:end-1])`. Since the condition at ``t=\Delta t`` amount to + we write `y[2,2:end-1] = y[1,2:end-1] + Δt*wave.g.(xs[2:end-1])`. We must not forget to exclude the boundary points because the string position is attached there. For the remaining times, we use the formula -```math -y(\Delta t, x) = y(0, x) + \Delta t g(x), -``` + ```math + y(t + \Delta t,x) = \frac{c^2\Delta t^2}{\Delta x^2} \Big(y(t,x + \Delta x) - 2y(t,x) + y(t,x - \Delta x)\Big) + 2y(t,x) - y(t - \Delta t,x). + ``` -we write `y[2,2:end-1] = y[1,2:end-1] + Δt*wave.g.(xs[2:end-1])`. We must not forget to exclude the boundary points because the string position is attached there. For the remaining times, we use the formula + This gives rise to the following function. -```math -y(t + \Delta t,x) = \frac{c^2\Delta t^2}{\Delta x^2} \Big(y(t,x + \Delta x) - 2y(t,x) + y(t,x - \Delta x)\Big) + 2y(t,x) - y(t - \Delta t,x). -``` + ```@example wave + function solve_wave(T, L, wave::Wave; n_t=100, n_x=100) + ts = range(0, T; length=n_t) + xs = range(0, L; length=n_x) + Δt = ts[2] - ts[1] + Δx = xs[2] - xs[1] + y = zeros(n_t, n_x) + + # boundary conditions + y[:,1] .= wave.f(0) + y[:,end] .= wave.f(L) -This gives rise to the following function. + # initial conditions + y[1,2:end-1] = wave.f.(xs[2:end-1]) + y[2,2:end-1] = y[1,2:end-1] + Δt*wave.g.(xs[2:end-1]) -```@example wave -function solve_wave(T, L, wave::Wave; n_t=100, n_x=100) - ts = range(0, T; length=n_t) - xs = range(0, L; length=n_x) - Δt = ts[2] - ts[1] - Δx = xs[2] - xs[1] - y = zeros(n_t, n_x) - - # boundary conditions - y[:,1] .= wave.f(0) - y[:,end] .= wave.f(L) - - # initial conditions - y[1,2:end-1] = wave.f.(xs[2:end-1]) - y[2,2:end-1] = y[1,2:end-1] + Δt*wave.g.(xs[2:end-1]) - - # solution for t = 2Δt, 3Δt, ..., T - for t in 2:n_t-1, x in 2:n_x-1 - ∂y_xx = (y[t, x+1] - 2*y[t, x] + y[t, x-1])/Δx^2 - y[t+1, x] = c^2 * Δt^2 * ∂y_xx + 2*y[t, x] - y[t-1, x] - end - - return y -end + # solution for t = 2Δt, 3Δt, ..., T + for t in 2:n_t-1, x in 2:n_x-1 + ∂y_xx = (y[t, x+1] - 2*y[t, x] + y[t, x-1])/Δx^2 + y[t+1, x] = c^2 * Δt^2 * ∂y_xx + 2*y[t, x] - y[t-1, x] + end -nothing # hide -``` + return y + end -```@raw html -
-``` + nothing # hide + ``` The best visualization of the wave equation is via animation. Each frame will be a plot of a row of `y`. We use the keyword arguments `kwargs`, where we store additional arguments for plotting. We run the for loop over all rows, create the animation via the `@animate` macro and save it into `anim`. To save the animation to the hard drive, we use the `gif` function. @@ -198,60 +168,45 @@ nothing # hide Now we can finally plot the solution. -```@raw html -
-
Exercise:
-
-``` +!!! warning "Exercise:" + Solve the wave equation for ``L=\frac32\pi``, ``T=240``, ``c=0.02`` and the initial conditions -Solve the wave equation for ``L=\frac32\pi``, ``T=240``, ``c=0.02`` and the initial conditions + ```math + \begin{aligned} + f(x) &= 2e^{-(x-\frac L2)^2} + \frac{x}{L}, \\ + g(x) &= 0. + \end{aligned} + ``` -```math -\begin{aligned} -f(x) &= 2e^{-(x-\frac L2)^2} + \frac{x}{L}, \\ -g(x) &= 0. -\end{aligned} -``` + Use time discretization with stepsize ``\Delta t=1`` and the space discretization with number of points ``n_x=101`` and ``n_x=7`` steps. Plot two graphs. -Use time discretization with stepsize ``\Delta t=1`` and the space discretization with number of points ``n_x=101`` and ``n_x=7`` steps. Plot two graphs. +!!! details "Solution:" + First, we assign the parameters -```@raw html -
-
-Solution: -
-``` - -First, we assign the parameters + ```@example wave + f(x,L) = 2*exp(-(x-L/2)^2) + x/L + g(x) = 0 -```@example wave -f(x,L) = 2*exp(-(x-L/2)^2) + x/L -g(x) = 0 + L = 1.5*pi + T = 240 + c = 0.02 -L = 1.5*pi -T = 240 -c = 0.02 + nothing # hide + ``` -nothing # hide -``` + Now we create the `wave` structure, compute the solution and plot it for with different values of ``n_x``. -Now we create the `wave` structure, compute the solution and plot it for with different values of ``n_x``. + ```@example wave + wave = Wave(x -> f(x,L), g, c) -```@example wave -wave = Wave(x -> f(x,L), g, c) + y1 = solve_wave(T, L, wave; n_t=241, n_x=101) + plot_wave(y1, "wave1.gif"; ylims=(-2,3), label="") -y1 = solve_wave(T, L, wave; n_t=241, n_x=101) -plot_wave(y1, "wave1.gif"; ylims=(-2,3), label="") + y2 = solve_wave(T, L, wave; n_t=241, n_x=7) + plot_wave(y2, "wave2.gif"; ylims=(-2,3), label="") -y2 = solve_wave(T, L, wave; n_t=241, n_x=7) -plot_wave(y2, "wave2.gif"; ylims=(-2,3), label="") - -nothing # hide -``` - -```@raw html -
-``` + nothing # hide + ``` ![](wave1.gif) diff --git a/docs/src/lecture_13/optimal_control.md b/docs/src/lecture_13/optimal_control.md index c76aace03..d8fb07d2d 100644 --- a/docs/src/lecture_13/optimal_control.md +++ b/docs/src/lecture_13/optimal_control.md @@ -117,114 +117,84 @@ nothing # hide The first exercise checks that we computed the matrix exponential correctly. -```@raw html -
-
Exercise:
-
-``` - -Verify that the matrix exponential is computed correctly and that it is different from the elementwise exponential. - -**Hint**: The matrix exponential can also be computed directly by the `exp` function from the `LinearAlgebra` package. - -```@raw html -
-
-Solution: -
-``` +!!! warning "Exercise:" + Verify that the matrix exponential is computed correctly and that it is different from the elementwise exponential. -A simple way to verify is to fix some ``t`` and evaluate the expressions above. + **Hint**: The matrix exponential can also be computed directly by the `exp` function from the `LinearAlgebra` package. -```@example oc -using LinearAlgebra +!!! details "Solution:" + A simple way to verify is to fix some ``t`` and evaluate the expressions above. -t = 5 -exp0 = exp.(t*ps.A) -exp1 = exp(t*ps.A) -exp2 = expA(ps, t) + ```@example oc + using LinearAlgebra -nothing # hide -``` + t = 5 + exp0 = exp.(t*ps.A) + exp1 = exp(t*ps.A) + exp2 = expA(ps, t) -While `exp1` and `exp2` must be identical, they must differ from ```exp0```. Since there are rounding errors for different methods, the matrices will not be identical, and we need to check whether their norm is almost zero. + nothing # hide + ``` -```@example oc -norm(exp1 - exp0) >= 1e-10 || error("Matrices are wrong") -norm(exp1 - exp2) <= 1e-10 || error("Matrices are wrong") + While `exp1` and `exp2` must be identical, they must differ from ```exp0```. Since there are rounding errors for different methods, the matrices will not be identical, and we need to check whether their norm is almost zero. -nothing # hide -``` + ```@example oc + norm(exp1 - exp0) >= 1e-10 || error("Matrices are wrong") + norm(exp1 - exp2) <= 1e-10 || error("Matrices are wrong") -Since the computation resulted in no error (note the opposite sign for ```exp0```), our computation seems to be correct. + nothing # hide + ``` -```@raw html -
-``` + Since the computation resulted in no error (note the opposite sign for ```exp0```), our computation seems to be correct. Now we can finally plot the trajectories of the electric motor. -```@raw html -
-
Exercise:
-
-``` - -Write two function `trajectory_fin_diff` and `trajectory_exact` which compute the trajectory. The first one should use the finite difference method to discretize the time, while the second one should use the closed-form formula. +!!! warning "Exercise:" + Write two function `trajectory_fin_diff` and `trajectory_exact` which compute the trajectory. The first one should use the finite difference method to discretize the time, while the second one should use the closed-form formula. -Plot both trajectories on time interval ``[0,10]`` with time discretization step ``\Delta t=0.01``. Since ``x(t)`` is a two-dimensional vector, plot each component on one axis. + Plot both trajectories on time interval ``[0,10]`` with time discretization step ``\Delta t=0.01``. Since ``x(t)`` is a two-dimensional vector, plot each component on one axis. -```@raw html -
-
-Solution: -
-``` - -Both functions create an empty structure for the solution and then iterate over time. Since finite differences compute the solution at the next time, the loop is one iteration shorter. We compute the iteration based on the formulas derived above. The exact method does not need values at the previous point, which implies that numerical errors do not accumulate due to discretization errors. +!!! details "Solution:" + Both functions create an empty structure for the solution and then iterate over time. Since finite differences compute the solution at the next time, the loop is one iteration shorter. We compute the iteration based on the formulas derived above. The exact method does not need values at the previous point, which implies that numerical errors do not accumulate due to discretization errors. -```@example oc -function trajectory_fin_diff(p::PMSM, x0, ts, q) - xs = zeros(length(x0), length(ts)) - xs[:, 1] = x0 + ```@example oc + function trajectory_fin_diff(p::PMSM, x0, ts, q) + xs = zeros(length(x0), length(ts)) + xs[:, 1] = x0 - for i in 1:length(ts)-1 - xs[:, i+1] = xs[:, i] + (ts[i+1]-ts[i])*(p.A * xs[:, i] + q) + for i in 1:length(ts)-1 + xs[:, i+1] = xs[:, i] + (ts[i+1]-ts[i])*(p.A * xs[:, i] + q) + end + return xs end - return xs -end -function trajectory_exact(p::PMSM, x0, ts, q) - xs = zeros(length(x0), length(ts)) + function trajectory_exact(p::PMSM, x0, ts, q) + xs = zeros(length(x0), length(ts)) - for (i, t) in enumerate(ts) - xs[:, i] = expA(p, t)*(x0 + p.invA * (I - expA(p, -t))*q) + for (i, t) in enumerate(ts) + xs[:, i] = expA(p, t)*(x0 + p.invA * (I - expA(p, -t))*q) + end + return xs end - return xs -end -nothing # hide -``` + nothing # hide + ``` -For plotting, we create the time discretization, compute both trajectories and then plot them. + For plotting, we create the time discretization, compute both trajectories and then plot them. -```@example oc -using Plots + ```@example oc + using Plots -ts = 0:0.01:10 + ts = 0:0.01:10 -xs1 = trajectory_fin_diff(ps, x0, ts, q) -xs2 = trajectory_exact(ps, x0, ts, q) - -plot(xs1[1,:], xs1[2,:], label="Finite differences") -plot!(xs2[1,:], xs2[2,:], label="True value") + xs1 = trajectory_fin_diff(ps, x0, ts, q) + xs2 = trajectory_exact(ps, x0, ts, q) -savefig("Comparison1.svg") # hide -``` + plot(xs1[1,:], xs1[2,:], label="Finite differences") + plot!(xs2[1,:], xs2[2,:], label="True value") -```@raw html -
-``` + savefig("Comparison1.svg") # hide + ``` ![](Comparison1.svg) @@ -268,7 +238,7 @@ u(t) &= U_{\rm max}\frac{p(t)}{||p(t)||}. \end{aligned} ``` -!!! bonus "BONUS: Connection with optimization" +!!! compat "BONUS: Connection with optimization" This part hints at the derivation of the previous result and the connection to constrained optimization. Optimal control forms the Hamiltonian (similar to the [Langrangian](@ref lagrangian)) ```math @@ -333,45 +303,30 @@ Since this relation needs to hold for all ``t\in[0,\tau]``, we set ``t=\tau`` an Since this is one equation for one variable, we can compute the optimal time ``\tau`` from it. -```@raw html -
-
Exercise:
-
-``` - -Solve the optimal time for ``x_{\rm tar}= (0.25, -0.5)`` with the maximum voltage ``U_{\rm max} = 0.1``. - -**Hint**: To solve the equation above for ``t``, use the [bisection method](@ref l7-exercises). - -```@raw html -
-
-Solution: -
-``` +!!! warning "Exercise:" + Solve the optimal time for ``x_{\rm tar}= (0.25, -0.5)`` with the maximum voltage ``U_{\rm max} = 0.1``. -To solve the equation above, we need to find a zero point of + **Hint**: To solve the equation above for ``t``, use the [bisection method](@ref l7-exercises). -```math -f(t) = ||e^{-At}x_{\rm tar} - x_0 - A^{-1}(I-e^{-At})q|| - \frac{U_{\rm max}}{\rho}(e^{\rho t}-1) -``` +!!! details "Solution:" + To solve the equation above, we need to find a zero point of -The graph of the function (plot it) shows a single zero point (for this parameter setting). It can be found by evaluating it at many points at selecting the point with the value closest to zero. A more formal approach is to use the bisection method. + ```math + f(t) = ||e^{-At}x_{\rm tar} - x_0 - A^{-1}(I-e^{-At})q|| - \frac{U_{\rm max}}{\rho}(e^{\rho t}-1) + ``` -```@example oc -U_max = 0.1 -x_t = [0.25;-0.5] + The graph of the function (plot it) shows a single zero point (for this parameter setting). It can be found by evaluating it at many points at selecting the point with the value closest to zero. A more formal approach is to use the bisection method. -f(t) = norm(expA(ps, -t)*x_t - x0 - ps.invA*(I-expA(ps, -t))*q) - U_max/ps.ρ*(exp(ps.ρ*t)-1) + ```@example oc + U_max = 0.1 + x_t = [0.25;-0.5] -τ = bisection(f, minimum(ts), maximum(ts)) + f(t) = norm(expA(ps, -t)*x_t - x0 - ps.invA*(I-expA(ps, -t))*q) - U_max/ps.ρ*(exp(ps.ρ*t)-1) -nothing # hide -``` + τ = bisection(f, minimum(ts), maximum(ts)) -```@raw html -
-``` + nothing # hide + ``` To compute the optimal control and optimal trajectory, we rewrite one of the formulas derived above. @@ -411,28 +366,19 @@ savefig("Optimal.svg") # hide We confirm that the optimal trajectory leads from the starting to the target point. -```@raw html -
-
BONUS: Plotting all optimal trajectories
-
-``` +!!! compat "BONUS: Plotting all optimal trajectories" + The optimal trajectory depends on the normed vector ``p_0``. All such vectors form a unit circle in ``\mathbb R^2``. Therefore, they can be parameterized by an angle ``\alpha\in[0,2\pi]``. We plot eight possible optimal trajectories, each corresponding to a different target ``x_{\rm tar}``, with uniformly distributed ``\alpha``. Since we plot in a loop, we need to initialize an empty plot and then `display` it. -The optimal trajectory depends on the normed vector ``p_0``. All such vectors form a unit circle in ``\mathbb R^2``. Therefore, they can be parameterized by an angle ``\alpha\in[0,2\pi]``. We plot eight possible optimal trajectories, each corresponding to a different target ``x_{\rm tar}``, with uniformly distributed ``\alpha``. Since we plot in a loop, we need to initialize an empty plot and then `display` it. + ```@example oc + ts = 0:0.01:10 -```@example oc -ts = 0:0.01:10 - -plt = plot() -for α = 0:π/4:2*π - trj = trajectory_control(ps, x0, ts, q, U_max, [sin(α); cos(α)]) - plot!(plt, trj[1,:], trj[2,:], label="") -end -display(plt) -savefig("Trajectories.svg") # hide -``` - -![](Trajectories.svg) + plt = plot() + for α = 0:π/4:2*π + trj = trajectory_control(ps, x0, ts, q, U_max, [sin(α); cos(α)]) + plot!(plt, trj[1,:], trj[2,:], label="") + end + display(plt) + savefig("Trajectories.svg") # hide + ``` -```@raw html -
-``` + ![](Trajectories.svg) \ No newline at end of file diff --git a/docs/src/lecture_13/theory.md b/docs/src/lecture_13/theory.md index ecf8f0975..cc22b9c79 100644 --- a/docs/src/lecture_13/theory.md +++ b/docs/src/lecture_13/theory.md @@ -21,17 +21,8 @@ Ordinary differential equations take the form on some interval ``t\in [0,T]``. To obtain a correct definition, the initial value ``y(0)=y_0`` needs to be provided. A solution is a (sufficiently smooth) function ``y(t)`` such that the above formula is satisfied (almost) everywhere on ``[0,T]``. Mild conditions ensure its existence and uniqueness. -```@raw html -
-
Picard–Lindelöf theorem
-
-``` - -Suppose ``f`` is uniformly Lipschitz continuous in ``y`` (the Lipschitz constant is independent of ``t``) and continuous in ``t``. Then for some value ``\varepsilon > 0``, there exists a unique solution ``y(t)`` to the initial value problem on the interval ``[t_0-\varepsilon, t_0+\varepsilon]``. - -```@raw html -
-``` +!!! todo "Picard–Lindelöf theorem" + Suppose ``f`` is uniformly Lipschitz continuous in ``y`` (the Lipschitz constant is independent of ``t``) and continuous in ``t``. Then for some value ``\varepsilon > 0``, there exists a unique solution ``y(t)`` to the initial value problem on the interval ``[t_0-\varepsilon, t_0+\varepsilon]``. However, it may happen that even simple equations do not have a unique solution. diff --git a/docs/src/why.md b/docs/src/why.md index fb5749078..95d9ed6f8 100644 --- a/docs/src/why.md +++ b/docs/src/why.md @@ -2,7 +2,7 @@ There are many established programming languages like Python, Matlab, R, or C. When a new language is introduced, the natural question is why I should learn this new language. What are the advantages and disadvantages of this language? This section introduces significant advantages and disadvantages of Julia and compares it to Python, Matlab, R, and C. -!!! tip "Advantages:" +!!! danger "Advantages:" - **Intuitive and flexible syntax:** Julia was designed to be easy to use and powerful at the same time. Julia provides a very intuitive syntax and supports many useful concepts from other languages such as generators from Python. More details and examples are provided in the separate section below. - **Performance:** Since Julia is a compiled language, code in Julia is generally faster than code written in pure Python or Matlab. More details and examples are provided in the separate section below. - **Type system:** Like Matlab or Python, it is not necessary to use type annotations for variable or function input arguments. However, since everything in Julia has its own type, it is possible to use type annotation. This allows the compiler to optimize the code, and it can also prevent mistakes. @@ -10,7 +10,7 @@ There are many established programming languages like Python, Matlab, R, or C. W To be as objective as possible, we provide a list of Julia disadvantages. -!!! theorem "Disadvantages:" +!!! tip "Disadvantages:" - **A limited number of packages:** Even though Julia grows rapidly and there are many packages, it can not compete with the number of available packages in Python or R. However, Julia provides a simple way of interacting with other languages. If there is no adequate package in Julia, it is possible to use packages from other languages. - **Slow first run:** Since Julia uses just-in-time compilation, the first call of every function is slower due to compilation. This slowdown can be significant if multiple functions are called for the first time. This includes creating a plot in a new Julia session because packages for plotting are large and use many functions. It results in a long time to the first plot (~20 s with [Plots.jl](https://github.com/JuliaPlots/Plots.jl)). - **Limited number of job opportunities:** Because Julia is a relatively new language, there is a limited number of job opportunities, especially compared to Python. On the other hand, there is a list of Julia users and Julia Computing customers on the official webpage of [Julia Computing](https://juliacomputing.com/) including Amazon, Google, IBM, Intel and many others.