diff --git a/EXAMPLES.md b/EXAMPLES.md index dae4336..3662be1 100644 --- a/EXAMPLES.md +++ b/EXAMPLES.md @@ -1,30 +1,36 @@ # Installing Eirene -If you haven't done so already, install Eirene by running +If you haven't done so already, install Eirene with these steps: 0) ensure you are running the most recent version of Eirene 1) open a Julia REPL, 2) press `]` to switch to the Pkg REPL, and 3) type `add Eirene` and press return. ``` -julia> Pkg.clone("https://github.com/Eetion/Eirene.jl.git") +(v1.3) pkg> add Eirene ``` It's fairly common to get error messages when you do this. Generally they will want you to install additional (supporting) packages. Read these messages, and follow the instructions. When in doubt, always double check that you are running the latest version of Eirene (it's been observed that installers such as Homebrew install dated versions of the language, so the only way to be sure you're up to date is to visit the Julia homepage), and use ``` -julia> Pkg.update() +(v1.3) pkg> update ``` to ensure that you are using the latest versions of the supporting packages. -You can now run any function in the Eirene package in the REPL. If you want to run a fictional function called `fictionalfunction` that takes zero input arguments, you can enter +You can now run any Eirene function by running `Eirene.`. For example, to run `example_function`, which takes no arguments, you can enter ``` -julia> Eirene.fictionalfunction() +julia> Eirene.example_function() ``` -Typing the prefix `Eirene.` quickly becomes tiresome, so run +This will return the message + +``` +Welcome to Eirene! Great job running the example function. +``` + +Typing the prefix `Eirene.` quickly becomes tiresome. Entering command ``` julia> using Eirene ``` -This allows you to call Eirene functions without the prefix. +will allow you to call Eirene functions without the prefix. # 1: The Noisy Circle @@ -37,7 +43,11 @@ julia> filepath = eirenefilepath("noisycircle") The contents of this file can be loaded via ``` -julia> pointcloud = readcsv(filepath) +julia> using DelimitedFiles +julia> pointcloud = readdlm(filepath,',') +2×300 Array{Float64,2}: + 1.11307 1.13696 1.13899 1.13758 … 1.06921 1.09202 1.18673 + 0.0750878 0.263837 0.397422 0.529262 0.0550878 0.182685 0.17544 ``` This stores the points in the cloud as the columns of matrix `pointcloud`. To see this cloud for yourself, call @@ -46,6 +56,8 @@ This stores the points in the cloud as the columns of matrix `pointcloud`. To s julia> ezplot_pjs(pointcloud) ``` +![](images/circle_pc.png) + To compute persistent homology in dimensions 0 and 1, run ``` @@ -60,6 +72,9 @@ julia> plotbarcode_pjs(C,dim=1) julia> plotpersistencediagram_pjs(C,dim=1) ``` + +![](images/circle_pdiagram.png) + If you hover the mouse over a point in the persistence diagram you'll see some information about the associated persistent homology class: * `(x coordinate, y coordinate)`: birth and death times @@ -77,18 +92,35 @@ representative involved 153 cells (wow!). To plot this class, call julia> plotclassrep_pjs(C,class=50,dim=1) ``` -To plot the class without point labels, call +![](images/circle_h1rep.png) + +Note that every point on the cycle representative has a number; these numbers correspond to the columns of the `pointcloud` matrix. To plot the class without point labels, call ``` julia> plotclassrep_pjs(C,class=50,dim=1,showlabels=false) ``` +![](images/circle_h1rep_nolabel.png) + and to plot only the points incident to the class representative, call ``` julia> plotclassrep_pjs(C,class=50,dim=1,showlabels=false,showcloud = false) ``` +![](images/circle_h1rep_nolabel_nocloud.png) + + +Sometimes you don't have natural coordinates to work with (eg, if your data comes from a similarity matrix). In that case you can generate coordinates with the built-in MDS system: + +``` +julia> plotclassrep_pjs(C,class=50,dim=1,showlabels=false,showcloud = false,coords="mds",embeddingobj="hop") +``` + +![](images/circle_h1rep_mds.png) + + + # 2: Noisy Torus Let's try a larger point cloud. Run @@ -96,12 +128,14 @@ Let's try a larger point cloud. Run ``` julia> filepath = eirenefilepath("noisytorus") -julia> pointcloud = readcsv(filepath) +julia> pointcloud = readdlm(filepath,',') julia> ezplot_pjs(pointcloud) ``` +![](images/torus_pc.png) + and checkout the new point cloud (it's a noisy torus). Note that ``` @@ -128,6 +162,15 @@ julia> plotclassrep_pjs(C,class=768,dim=1) julia> plotclassrep_pjs(C,class=769,dim=1) ``` +![](images/torus_h1_pdiagram.png) ![](images/torus_h1_barcode.png) + +Red points on the diagonal represent classes that "never die". Technically they should be infinitely high up on the y-axis, but since that's not possible, we color them red and place them on the diagonal. + + + +![](images/torus_h1rep1.png) ![](images/torus_h1rep2.png) + + For H2, use the following. Make sure to set showlabels=false, otherwise the graphics will be expensive to render! @@ -139,6 +182,13 @@ julia> plotpersistencediagram_pjs(C,dim=2) julia> plotclassrep_pjs(C,class=12,dim=2,showlabels=false) ``` +![](images/torus_h2_pdiagram.png) ![](images/torus_h2_barcode.png) + + + +![](images/torus_h2_rep.png) + + # 3: Clouds You can generate several other point clouds and distance matrices via built-in @@ -170,60 +220,57 @@ Suppose you want to explore world geography vis-a-vis networks of neighboring ci ``` julia> filepath = eirenefilepath("simplecity") -``` +julia> a = ezread(filepath) + 7323x9 Array{Any,2}: + "city" "city_ascii" "lat" "lng" … "country" "iso2" "iso3" "province" + "Qal eh-ye Now" "Qal eh-ye" 34.983 63.1333 "Afghanistan" "AF" "AFG" "Badghis" + "Chaghcharan" "Chaghcharan" 34.5167 65.25 "Afghanistan" "AF" "AFG" "Ghor" + "Lashkar Gah" "Lashkar Gah" 31.583 64.36 "Afghanistan" "AF" "AFG" "Hilmand" + "Zaranj" "Zaranj" 31.112 61.887 "Afghanistan" "AF" "AFG" "Nimroz" + "Tarin Kowt" "Tarin Kowt" 32.6333 65.8667 … "Afghanistan" "AF" "AFG" "Uruzgan" + "Zareh Sharan" "Zareh Sharan" 32.85 68.4167 "Afghanistan" "AF" "AFG" "Paktika" + ⋮ ⋱ ⋮ + "Gweru" "Gweru" -19.45 29.82 "Zimbabwe" "ZW" "ZWE" "Midlands" + "Mutare" "Mutare" -18.97 32.65 "Zimbabwe" "ZW" "ZWE" "Manicaland" + "Kadoma" "Kadoma" -18.33 29.9099 "Zimbabwe" "ZW" "ZWE" "Mashonaland West" + "Chitungwiza" "Chitungwiza" -18.0 31.1 … "Zimbabwe" "ZW" "ZWE" "Harare" + "Harare" "Harare" -17.8178 31.0447 "Zimbabwe" "ZW" "ZWE" "Harare" + "Bulawayo" "Bulawayo" -20.17 28.58 "Zimbabwe" "ZW" "ZWE" "Bulawayo" ``` -julia> a = ezread(filepath) -7323x9 Array{Any,2}: - "city" "city_ascii" "lat" "lng" … "country" "iso2" "iso3" "province" - "Qal eh-ye Now" "Qal eh-ye" 34.983 63.1333 "Afghanistan" "AF" "AFG" "Badghis" - "Chaghcharan" "Chaghcharan" 34.5167 65.25 "Afghanistan" "AF" "AFG" "Ghor" - "Lashkar Gah" "Lashkar Gah" 31.583 64.36 "Afghanistan" "AF" "AFG" "Hilmand" - "Zaranj" "Zaranj" 31.112 61.887 "Afghanistan" "AF" "AFG" "Nimroz" - "Tarin Kowt" "Tarin Kowt" 32.6333 65.8667 … "Afghanistan" "AF" "AFG" "Uruzgan" - "Zareh Sharan" "Zareh Sharan" 32.85 68.4167 "Afghanistan" "AF" "AFG" "Paktika" - ⋮ ⋱ ⋮ - "Gweru" "Gweru" -19.45 29.82 "Zimbabwe" "ZW" "ZWE" "Midlands" - "Mutare" "Mutare" -18.97 32.65 "Zimbabwe" "ZW" "ZWE" "Manicaland" - "Kadoma" "Kadoma" -18.33 29.9099 "Zimbabwe" "ZW" "ZWE" "Mashonaland West" - "Chitungwiza" "Chitungwiza" -18.0 31.1 … "Zimbabwe" "ZW" "ZWE" "Harare" - "Harare" "Harare" -17.8178 31.0447 "Zimbabwe" "ZW" "ZWE" "Harare" - "Bulawayo" "Bulawayo" -20.17 28.58 "Zimbabwe" "ZW" "ZWE" "Bulawayo" - ``` + A comment on the meaning of `Array{Any,2}`, which appears in the second line: Julia differentiates arrays based on the type (or class) of elements they can contain. An n-dimensional array that is formally declared to have elements of type T is called an array of type `Array{T,n}`. A 5x5 matrix with 64-bit floating point entries can be realized either as an array of type `Array{Float64,2}` or, as the name suggests, as an array of type `Array{Any,2}`. Many functions in Julia are specialized to work with numerical arrays, so it's in our interest to extract the numeric part we are interested in, namely columns 3 and 4, sans headers, and convert to an array of type `Array{Float64,2}` before proceeding. -``` -julia> b = a[2:end,3:4] -7322x2 Array{Any,2}: - 34.983 63.1333 - 34.5167 65.25 - 31.583 64.36 - ⋮ - -17.8178 31.0447 - -20.17 28.58 -``` -``` -julia> b = convert(Array{Float64,2},b) -7322x2 Array{Float64,2}: - 34.983 63.1333 - 34.5167 65.25 - 31.583 64.36 - ⋮ - -17.8178 31.0447 - -20.17 28.58 - ``` + julia> b = a[2:end,3:4] + 7322x2 Array{Any,2}: + 34.983 63.1333 + 34.5167 65.25 + 31.583 64.36 + ⋮ + -17.8178 31.0447 + -20.17 28.58 + + julia> b = convert(Array{Float64,2},b) + 7322x2 Array{Float64,2}: + 34.983 63.1333 + 34.5167 65.25 + 31.583 64.36 + ⋮ + -17.8178 31.0447 + -20.17 28.58 + Eirene has a built-in function to convert spherical coordinates (in degrees, with fixed radius 1) to 3D Euclidean coordinates. The `rowsare` keyword argument determines wether rows are treated as points or dimensions. -``` -julia> c = latlon2euc(b,model = "points") -3x7322 Array{Float64,2}: - 0.370265 0.344959 0.368622 0.403432 0.344318 0.309031 … 0.796253 0.822829 0.814358 0.81567 0.824296 - 0.730885 0.748275 0.767998 0.755149 0.768533 0.781189 0.510205 0.473338 0.491252 0.490971 0.449048 - 0.573333 0.566646 0.523733 0.516713 0.53926 0.542442 -0.325073 -0.31449 -0.309017 -0.305991 -0.344807 - ``` + + julia> c = latlon2euc(b,model = "points") + 3x7322 Array{Float64,2}: + 0.370265 0.344959 0.368622 0.403432 0.344318 0.309031 … 0.796253 0.822829 0.814358 0.81567 0.824296 + 0.730885 0.748275 0.767998 0.755149 0.768533 0.781189 0.510205 0.473338 0.491252 0.490971 0.449048 + 0.573333 0.566646 0.523733 0.516713 0.53926 0.542442 -0.325073 -0.31449 -0.309017 -0.305991 -0.344807 + Now that we have a point cloud we can take a preliminary look at its shape and scale with the `ezplot_pjs` wrapper. Note you can always select "deny" when PlotlyJS asks for web access, without penalty. @@ -231,34 +278,36 @@ Now that we have a point cloud we can take a preliminary look at its shape and s julia> ezplot_pjs(c) ``` +![](images/globe_pc.png) + A cursory inspection shows that a number of interesting features appear at or below epsilon = 0.15, so we'll use that as our initial cutoff. To pass city names to the `eirene` function, extract column 2 of array a, sans header, and store in a 1-dimensional array of type `Array{Any}`. Due to potential errors resulting from nonstandard character strings, it's good practice to use the built-in label sanitzer ezlabel to clean this column before assigning to d. The wrapper will replace any element of the column that cannot be expressed as an ACIIString with the number corresponding to its row. Aside: n can be omitted from the expression `Array{T,n}` when n=1. -``` -julia> d = ezlabel(a[2:end,2]) -7322-element Array{Any,1}: - "Qal eh-ye" - "Chaghcharan" - "Lashkar Gah" - ⋮ - "Harare" - "Bulawayo" - ``` + + julia> d = ezlabel(a[2:end,2]) + 7322-element Array{Any,1}: + "Qal eh-ye" + "Chaghcharan" + "Lashkar Gah" + ⋮ + "Harare" + "Bulawayo" + With our inputs in order, it's time to call eirene. The calculation should take around 2.5GB of memory and 2 min to complete. -``` -julia> C = eirene(c,model="pc",maxrad = 0.15,pointlabels=d) -elapsed time: 118.869523387 seconds -Dict{ASCIIString,Any} with 14 entries: - "symmat" => 7322x7322 Array{Int64,2}:… - "filtration" => Any[[515055,515055,515055,515055,515055,515055,515055,515055,515055,515055 … 515055,51505… - "lowlab0" => Any[Int64[],[1,2,3,4,5,6,7,8,9,10 … 7313,7314,7315,7316,7317,7318,7319,7320,7321,7322],[3… - "firstv" => Any[[1,2,3,4,5,6,7,8,9,10 … 7314,7315,7316,7317,7318,7319,7320,7321,7322,7323],[1,346,688… - "filtrationtranslator" => [0.149999,0.149999,0.149999,0.149999,0.149999,0.149999,0.149999,0.149998,0.149998,0.149998 … - ⋮ => ⋮ - ``` -The package JLD can be used to save this output. Run `Pkg.add("JLD")` if you have not done so already, and enter + julia> C = eirene(c,model="pc",maxrad = 0.15,pointlabels=d) + elapsed time: 118.869523387 seconds + Dict{ASCIIString,Any} with 14 entries: + "symmat" => 7322x7322 Array{Int64,2}:… + "filtration" => Any[[515055,515055,515055,515055,515055,515055,515055,515055,515055,515055 … 515055,51505… + "lowlab0" => Any[Int64[],[1,2,3,4,5,6,7,8,9,10 … 7313,7314,7315,7316,7317,7318,7319,7320,7321,7322],[3… + "firstv" => Any[[1,2,3,4,5,6,7,8,9,10 … 7314,7315,7316,7317,7318,7319,7320,7321,7322,7323],[1,346,688… + "filtrationtranslator" => [0.149999,0.149999,0.149999,0.149999,0.149999,0.149999,0.149999,0.149998,0.149998,0.149998 … + ⋮ => ⋮ + + +The package JLD can be used to save this output. Install this package, and enter ``` julia> JLD.save("demography.jld","C",C) @@ -284,6 +333,8 @@ We did not specify `bettimax`, so only the persistence modules in dimensions 0 a julia> plotpersistencediagram_pjs(C) ``` +![](images/globe_pdiagram.png) + Hovering over a point in the diagram will display the identification number of the corresponding persistent homology class, together with the size the cycle representative Eirene computed for it. Fix a feature of interest - say number 1757 - and plot the corresponding cycle, noting that only vertices will appear in the figure (cells of dimension 1 and higher are generally too numerous to plot efficiently). Note: The cost of displaying text labels for individual points in the Plotly platform is higher than that of displaying points alone. Consider using `showlabels=false`, if your graphics card has any difficulties. @@ -292,6 +343,8 @@ Note: The cost of displaying text labels for individual points in the Plotly pla julia> plotclassrep_pjs(C,class = 1757) ``` +![](images/globe_rep.png) + Class 1757, shown above, is among my favorites: the Himalayan branch of the silk road is clearly visible on its South West arc; to the North it follows the Trans-Siberian railroad from Moscow in the west to Vladivostok on the Sea of Japan, by way of Omsk, Irktsk, and Chita; from there, it follows the connecting route from Beijing to Hong Kong, passing Nanning, Hanoi, and close to Ho Chi Min on its way to Bangkok (not all of these appear in the cycle itself, but they are easily spotted nearby). With a little exploration you can find large features formed by the Sahara Desert, Tapajos River, Falkland Islands, South China Sea, Guam, and Hudson Bay, and smaller ones shaped by the Andes mountains and Gulf of Mexico - all through the proxy of urban development. To plot the same class-representative without text labels, use @@ -312,6 +365,8 @@ To plot these using MDS, use julia> plotclassrep_pjs(C,class = 1757,showcloud=false,coords="mds",embeddingobj="hop") ``` +![](images/globe_rep_mds.png) + To make any further changes to the appearance of your plot, share it with others, or export to still-frames, you can upload to the interactive, open-source Plotly web API. This requires the setup of a personal account beforehand (see the github documentation for Plotly.jl for instructions), but once this has been done and the connection between your Julia REPL and Plotly account has been established, uploading is quite simple: create an object p for the PlotlyJS figure, and call Plotly.post ``` diff --git a/Project.toml b/Project.toml index 593ae29..8db616d 100644 --- a/Project.toml +++ b/Project.toml @@ -3,7 +3,7 @@ uuid = "9c0f25c4-2ca1-5870-89f6-52640788da1d" keywords = ["topological data analysis", "persistent homology"] license = "GNU" desc = "High-performance persistent homology calculation, with generators." -version = "1.3.3" +version = "1.3.4" author = "Gregory Henselman-Petrusek " [deps] diff --git a/images/.DS_Store b/images/.DS_Store new file mode 100644 index 0000000..5008ddf Binary files /dev/null and b/images/.DS_Store differ diff --git a/images/circle_h1rep.png b/images/circle_h1rep.png new file mode 100644 index 0000000..61b766b Binary files /dev/null and b/images/circle_h1rep.png differ diff --git a/images/circle_h1rep_mds.png b/images/circle_h1rep_mds.png new file mode 100644 index 0000000..bff093c Binary files /dev/null and b/images/circle_h1rep_mds.png differ diff --git a/images/circle_h1rep_nolabel.png b/images/circle_h1rep_nolabel.png new file mode 100644 index 0000000..5ea25c4 Binary files /dev/null and b/images/circle_h1rep_nolabel.png differ diff --git a/images/circle_h1rep_nolabel_nocloud.png b/images/circle_h1rep_nolabel_nocloud.png new file mode 100644 index 0000000..68c03ac Binary files /dev/null and b/images/circle_h1rep_nolabel_nocloud.png differ diff --git a/images/circle_pc.png b/images/circle_pc.png new file mode 100644 index 0000000..a764f6f Binary files /dev/null and b/images/circle_pc.png differ diff --git a/images/circle_pdiagram.png b/images/circle_pdiagram.png new file mode 100644 index 0000000..a7773c1 Binary files /dev/null and b/images/circle_pdiagram.png differ diff --git a/images/globe_pc.png b/images/globe_pc.png new file mode 100644 index 0000000..87aeb60 Binary files /dev/null and b/images/globe_pc.png differ diff --git a/images/globe_pdiagram.png b/images/globe_pdiagram.png new file mode 100644 index 0000000..3ee3711 Binary files /dev/null and b/images/globe_pdiagram.png differ diff --git a/images/globe_rep.png b/images/globe_rep.png new file mode 100644 index 0000000..e0ac478 Binary files /dev/null and b/images/globe_rep.png differ diff --git a/images/globe_rep_mds.png b/images/globe_rep_mds.png new file mode 100644 index 0000000..2b80112 Binary files /dev/null and b/images/globe_rep_mds.png differ diff --git a/images/torus_h1_barcode.png b/images/torus_h1_barcode.png new file mode 100644 index 0000000..d2b6371 Binary files /dev/null and b/images/torus_h1_barcode.png differ diff --git a/images/torus_h1_pdiagram.png b/images/torus_h1_pdiagram.png new file mode 100644 index 0000000..c4b5785 Binary files /dev/null and b/images/torus_h1_pdiagram.png differ diff --git a/images/torus_h1rep1.png b/images/torus_h1rep1.png new file mode 100644 index 0000000..87d4501 Binary files /dev/null and b/images/torus_h1rep1.png differ diff --git a/images/torus_h1rep2.png b/images/torus_h1rep2.png new file mode 100644 index 0000000..90391dc Binary files /dev/null and b/images/torus_h1rep2.png differ diff --git a/images/torus_h2_barcode.png b/images/torus_h2_barcode.png new file mode 100644 index 0000000..119841f Binary files /dev/null and b/images/torus_h2_barcode.png differ diff --git a/images/torus_h2_pdiagram.png b/images/torus_h2_pdiagram.png new file mode 100644 index 0000000..96b6ba5 Binary files /dev/null and b/images/torus_h2_pdiagram.png differ diff --git a/images/torus_h2_rep.png b/images/torus_h2_rep.png new file mode 100644 index 0000000..b15a1af Binary files /dev/null and b/images/torus_h2_rep.png differ diff --git a/images/torus_pc.png b/images/torus_pc.png new file mode 100644 index 0000000..e1b4931 Binary files /dev/null and b/images/torus_pc.png differ diff --git a/src/Eirene.jl b/src/Eirene.jl index 12722d8..62752ff 100644 --- a/src/Eirene.jl +++ b/src/Eirene.jl @@ -49,6 +49,16 @@ using DelimitedFiles using CSV using Hungarian #added for the Wasserstein distances +########################################################################################## + +#### USER TEST FUNCTION + +########################################################################################## + +function example_function() + print("Welcome to Eirene! Great job running the example function.") +end + ########################################################################################## @@ -5339,14 +5349,6 @@ function classrep_pjs( cloudedges_orderverts = vetexinverter[cloudedges] end - ############################################################################################## - # WAYPOINT 1 - # printval(coords,"coords") - # printval(coords==coords,"coords==coords") - # printval(D["input"]["pc"] == "n/a","D[\"input\"][\"pc\"] == \"n/a\"") - # printval(D["input"]["pc"],"D[\"input\"][\"pc\"]") -############################################################################################## - if coords == [] if D["input"]["pc"] == "n/a" print("No point cloud is available. Please consider using the mds keyword argument to generate a Euclidean embedding from the distance matrix (see documentation).") @@ -5385,7 +5387,7 @@ function classrep_pjs( metricmatrix = hopdistance(edges_orderverts,inputis = "edges") end end - coords = classical_mds(metricmatrix,embeddingdim) + coords = transform(fit(MDS, float.(metricmatrix), maxoutdim = embeddingdim, distances=true)) #classical_mds(metricmatrix,embeddingdim) # coords = round.(coords,10) model = "pc" end @@ -5440,7 +5442,22 @@ function classrep_pjs( if showedges faces = classrep(D,dim=dim,class=class) edges = d1faces(faces) - T3 = edgetrace_pjs(coords,edges,model=model ) + if showcloud + T3 = edgetrace_pjs(coords,edges,model=model ) + else + # by default vertices in the edge list index into the original + # point cloud; if we're not showing the ambient cloud, then we have + # by this point deleted the "irrelevant" columns from the coordinate + # matrix, which changes the indices. padding with zeros corrects + # for this difference + # note that for design reasons we re-defined classvinoldspace above, + # so must re-define it + classvinoldspace_original = D["nvl2ovl"][classvinnewspace] + coords_padded = zeros(size(coords,1),maximum(classvinoldspace_original)) + coords_padded[:,classvinoldspace_original] = coords + T3 = edgetrace_pjs(coords_padded,edges,model=model ) + end + append!(data,T3) end @@ -6335,7 +6352,7 @@ function hopdistance_sparse(rv,cp) fringelist = findall(fringenodes) fringenodes[:].= false end - H[.!metnodes,i]=m+1 + H[.!metnodes,i].=m+1 end return H end