diff --git a/packages/example/content/docs/command-reference/diff.md b/packages/example/content/docs/command-reference/diff.md index de086536..94873aec 100644 --- a/packages/example/content/docs/command-reference/diff.md +++ b/packages/example/content/docs/command-reference/diff.md @@ -119,7 +119,7 @@ $ dvc diff ## Example: Comparing workspace with arbitrary commits -
+
### Click and expand to set up the example @@ -149,7 +149,7 @@ files summary: 1 added, 0 deleted, 0 modified ## Example: Comparing tags or branches -
+
### Click and expand to set up the example @@ -223,7 +223,7 @@ It outputs: ## Example: Renamed files -
+
### Click and expand to set up the example diff --git a/packages/example/content/docs/command-reference/exp/pull.md b/packages/example/content/docs/command-reference/exp/pull.md index 39109c2e..e768aa33 100644 --- a/packages/example/content/docs/command-reference/exp/pull.md +++ b/packages/example/content/docs/command-reference/exp/pull.md @@ -6,8 +6,8 @@ data from a `dvc remote`. ## Synopsis ```usage -usage: dvc exp pull [-h] [-q | -v] [-f] [--no-cache] - [-r ] [-j ] [--run-cache] +usage: dvc exp pull [-h] [-q | -v] [-A] [--rev ] [-n ] [-f] + [--no-cache] [-r ] [-j ] [--run-cache] git_remote experiment positional arguments: @@ -43,6 +43,16 @@ all cached data associated with the experiment to DVC ## Options +- `-A`, `--all-commits` - pull all experiments in the repository (overrides + `--rev` and `--num`). + +- `--rev ` - pull experiments derived from the specified `` as + baseline. + +- `-n `, `--num ` - show experiments from the last `num` commits + (first parents) starting from the `--rev` baseline. Give a negative value to + include all first-parent commits (similar to `git log -n`). + - `-f`, `--force` - rewrite the `experiment` commit if it already exists in the local repo. Equivalent to `git push --force` (rewrites history) diff --git a/packages/example/content/docs/command-reference/exp/push.md b/packages/example/content/docs/command-reference/exp/push.md index 4b57370a..9ff02bb5 100644 --- a/packages/example/content/docs/command-reference/exp/push.md +++ b/packages/example/content/docs/command-reference/exp/push.md @@ -6,8 +6,8 @@ to a `dvc remote`. ## Synopsis ```usage -usage: dvc exp push [-h] [-q | -v] [-f] [--no-cache] - [-r ] [-j ] [--run-cache] +usage: dvc exp push [-h] [-q | -v] [-A] [--rev ] [-n ] [-f] + [--no-cache] [-r ] [-j ] [--run-cache] git_remote experiment positional arguments: @@ -40,6 +40,16 @@ This command will also try to [push](/doc/command-reference/push) all ## Options +- `-A`, `--all-commits` - push all experiments in the repository (overrides + `--rev` and `--num`). + +- `--rev ` - push experiments derived from the specified `` as + baseline. + +- `-n `, `--num ` - show experiments from the last `num` commits + (first parents) starting from the `--rev` baseline. Give a negative value to + include all first-parent commits (similar to `git log -n`). + - `-f`, `--force` - rewrite the `experiment` commit if it already exists in the Git remote. Equivalent to `git push --force` (rewrites history) diff --git a/packages/example/content/docs/command-reference/exp/remove.md b/packages/example/content/docs/command-reference/exp/remove.md index d9fc9ec7..1565f7e6 100644 --- a/packages/example/content/docs/command-reference/exp/remove.md +++ b/packages/example/content/docs/command-reference/exp/remove.md @@ -5,7 +5,8 @@ Delete specific experiments from the project. ## Synopsis ```usage -usage: dvc exp remove [-h] [-q | -v] [--queue | -A | -g ] +usage: dvc exp remove [-h] [-q | -v] [-A] [--rev ] [-n ] + [--queue | -g ] [ [ ...]] positional arguments: @@ -29,6 +30,13 @@ With `--queue`, the list of experiments awaiting execution is cleared instead. - `-A`, `--all` - remove all experiments that have been run. Use `--queue` to remove queued ones. +- `--rev ` - remove experiments derived from the specified `` as + baseline. + +- `-n `, `--num ` - show experiments from the last `num` commits + (first parents) starting from the `--rev` baseline. Give a negative value to + include all first-parent commits (similar to `git log -n`). + - `-g`, `--git-remote` - Name or URL of the Git remote to remove the experiment from diff --git a/packages/example/content/docs/command-reference/exp/run.md b/packages/example/content/docs/command-reference/exp/run.md index 2265e03e..9e1aeb38 100644 --- a/packages/example/content/docs/command-reference/exp/run.md +++ b/packages/example/content/docs/command-reference/exp/run.md @@ -7,7 +7,7 @@ Run or resume a ```usage usage: dvc exp run [-h] [-q | -v] [-f] - { repro options ... } + { repro options ... } [-n ] [-S [:]] [--queue] [--run-all] [-j ] [--temp] [-r ] [--reset] diff --git a/packages/example/content/docs/command-reference/gc.md b/packages/example/content/docs/command-reference/gc.md index a5e3a576..5154e624 100644 --- a/packages/example/content/docs/command-reference/gc.md +++ b/packages/example/content/docs/command-reference/gc.md @@ -44,8 +44,29 @@ If the `--cloud` (`-c`) flag is used, this command deletes unused data from the to deleting it from the local DVC cache. To specify a DVC remote to delete from, use the `--remote` (`-r`) option. -> ⚠️ Danger: cloud deletion is irreversible unless there is another DVC remote -> or a manual backup with the same data. + + +Cloud deletion is irreversible unless there is another DVC remote or a manual +backup with the same data. + + + +### Cleaning shared cache (or remote) + +If a [cache is shared] among different projects that track some of the same +files, using `dvc gc` in one project will break those overlapping data links in +the other projects. + +To prevent this, use the `--projects` (`-p`) option. It takes one or more paths +to the DVC project(s) whose data should be preserved. Make sure that all the +commits and branches that reference files you want to keep have been pulled in +those other projects first. + +For example, if we have several projects with some overlapping files and we'd +like to collect all the data that's only used in one of them (e.g. if we no +longer need that projects), we would first clone all the other projects, fetch +all their branches, and pass their paths to the `dvc gc -p` command from the +project we want to clear. ## Options @@ -75,17 +96,15 @@ use the `--remote` (`-r`) option. > \* Not including [DVC experiments] -[dvc experiments]: /doc/user-guide/experiment-management#experiments - - `--all-experiments` keep cached objects referenced in all [DVC experiments], as well as in the workspace (implying `-w`). This preserves the project's [experimental](/doc/user-guide/experiment-management) data (including checkpoints). See also `dvc exp gc`. -- `-p `, `--projects ` - if a single remote or a single - [cache is shared](/doc/user-guide/how-to/share-a-dvc-cache) among different - projects, this option can be used to specify a list of them (each project is a - path) to keep data that is currently referenced from them. +- `-p `, `--projects ` - if a single remote or a single [cache is + shared] among different projects, this option can be used to specify a list of + them (each project is a path) to keep data that is currently referenced from + them. - `-c`, `--cloud` - remove files in remote storage in addition to local cache. **This option is dangerous.** The default remote is used unless a specific one @@ -112,6 +131,9 @@ use the `--remote` (`-r`) option. - `-v`, `--verbose` - displays detailed tracing information. +[cache is shared]: /doc/user-guide/how-to/share-a-dvc-cache +[dvc experiments]: /doc/user-guide/experiment-management#experiments + ## Examples Basic example of cleaning up the cache: diff --git a/packages/example/content/docs/command-reference/import.md b/packages/example/content/docs/command-reference/import.md index 0afc32b0..d5e71a4e 100644 --- a/packages/example/content/docs/command-reference/import.md +++ b/packages/example/content/docs/command-reference/import.md @@ -38,10 +38,14 @@ working directory with its original file name e.g. `data.txt` (or to a location provided with `--out`). An _import `.dvc` file_ is created in the same location e.g. `data.txt.dvc` – similar to using `dvc add` after downloading the data. -(ℹ️) DVC won't push data imported from other DVC repos to + + +DVC won't push data imported from other DVC repos to [remote storage](/doc/command-reference/remote). `dvc pull` will download from the original source. + + The `url` argument specifies the address of the DVC or Git repository containing the data source. Both HTTP and SSH protocols are supported (e.g. `[user@]server:project.git`). `url` can also be a local file system path @@ -59,7 +63,7 @@ targets must be found in a `dvc.yaml` or `.dvc` file of the repo. a Git server). In such a `.dvc` file, the `deps` field specifies the `url` and data `path`, and the `outs` field contains the corresponding local path in the workspace. It records enough metadata about the imported data to -enable DVC efficiently determining whether the local copy is out of date. +enable DVC to efficiently determine whether the local copy is out of date. To actually [version the data](/doc/start/data-and-model-versioning), `git add` (and `git commit`) the import `.dvc` file. @@ -339,7 +343,7 @@ This will result in the following directory structure, which contains a chained import and a regular one: ``` -/repo/d +/repo/c ├── training │ ├── data.csv │ └── labels diff --git a/packages/example/content/docs/command-reference/plots/index.md b/packages/example/content/docs/command-reference/plots/index.md index 5e1989c2..388b8731 100644 --- a/packages/example/content/docs/command-reference/plots/index.md +++ b/packages/example/content/docs/command-reference/plots/index.md @@ -216,7 +216,7 @@ Difference in this metric between the current project version and the previous commit: ```dvc -$ dvc plots diff -d logs.csv HEAD^ +$ dvc plots diff HEAD^ --targets logs.csv file:///Users/usr/src/dvc_plots/index.html ``` diff --git a/packages/example/content/docs/command-reference/repro.md b/packages/example/content/docs/command-reference/repro.md index 3168b641..327d11f4 100644 --- a/packages/example/content/docs/command-reference/repro.md +++ b/packages/example/content/docs/command-reference/repro.md @@ -9,8 +9,9 @@ correct order. ```usage usage: dvc repro [-h] [-q | -v] [-f] [-i] [-s] [-p] [-P] [-R] [-m] - [--downstream] [--force-downstream] [--glob] - [--dry] [--no-commit] [--no-run-cache] [--pull] + [--downstream] [--force-downstream] + [--pull] [--dry] + [--glob] [--no-commit] [--no-run-cache] [targets [ ...]] positional arguments: diff --git a/packages/example/content/docs/user-guide/contributing/blog.md b/packages/example/content/docs/contributing/blog.md similarity index 100% rename from packages/example/content/docs/user-guide/contributing/blog.md rename to packages/example/content/docs/contributing/blog.md diff --git a/packages/example/content/docs/user-guide/contributing/core.md b/packages/example/content/docs/contributing/core.md similarity index 100% rename from packages/example/content/docs/user-guide/contributing/core.md rename to packages/example/content/docs/contributing/core.md diff --git a/packages/example/content/docs/user-guide/contributing/docs.md b/packages/example/content/docs/contributing/docs.md similarity index 100% rename from packages/example/content/docs/user-guide/contributing/docs.md rename to packages/example/content/docs/contributing/docs.md diff --git a/packages/example/content/docs/install/linux.md b/packages/example/content/docs/install/linux.md index fa704b10..1e9de57f 100644 --- a/packages/example/content/docs/install/linux.md +++ b/packages/example/content/docs/install/linux.md @@ -22,7 +22,7 @@ plan to use, you might need to install optional dependencies: `[s3]`, `[gdrive]`, `[gs]`, `[azure]`, `[ssh]`, `[hdfs]`, `[webdav]`, `[oss]`. Use `[all]` to include them all. -
+
### Example: with support for Amazon S3 storage @@ -53,7 +53,7 @@ Depending on the type of the [remote storage](/doc/command-reference/remote) you plan to use, you might need to install optional dependencies: `dvc-s3`, `dvc-azure`, `dvc-gdrive`, `dvc-gs`, `dvc-oss`, `dvc-ssh`. -
+
### Example: with support for Amazon S3 storage @@ -79,7 +79,7 @@ $ snap install --classic dvc ## Install from repository -
+
### On Debian/Ubuntu @@ -94,7 +94,7 @@ $ sudo apt install dvc
-
+
### On Fedora/CentOS @@ -115,7 +115,7 @@ Get the binary package from the big "Download" button on the [home page](/), or from the [release page](https://github.com/iterative/dvc/releases/) on GitHub. Then install it with the following command. -
+
### On Debian/Ubuntu @@ -125,7 +125,7 @@ $ sudo apt install ./dvc_0.62.1_amd64.deb
-
+
### On Fedora/CentOS diff --git a/packages/example/content/docs/install/macos.md b/packages/example/content/docs/install/macos.md index 186996f3..e209f4e9 100644 --- a/packages/example/content/docs/install/macos.md +++ b/packages/example/content/docs/install/macos.md @@ -43,7 +43,7 @@ plan to use, you might need to install optional dependencies: `[s3]`, `[gdrive]`, `[gs]`, `[azure]`, `[ssh]`, `[hdfs]`, `[webdav]`, `[oss]`. Use `[all]` to include them all. -
+
### Example: with support for Amazon S3 storage @@ -69,7 +69,7 @@ Depending on the type of the [remote storage](/doc/command-reference/remote) you plan to use, you might need to install optional dependencies: `dvc-s3`, `dvc-azure`, `dvc-gdrive`, `dvc-gs`, `dvc-oss`, `dvc-ssh`. -
+
### Example: with support for Amazon S3 storage diff --git a/packages/example/content/docs/install/pre-release.md b/packages/example/content/docs/install/pre-release.md index 65241719..49361552 100644 --- a/packages/example/content/docs/install/pre-release.md +++ b/packages/example/content/docs/install/pre-release.md @@ -10,9 +10,6 @@ releases, you can install it from our code repository GitHub. > (on Python 3.7+) to encapsulate your local environment. ```dvc -# Should be installed before DVC. See details below. -$ pip install gitpython - # Basic version. $ pip install git+https://github.com/iterative/dvc diff --git a/packages/example/content/docs/install/windows.md b/packages/example/content/docs/install/windows.md index 471fefb2..1b99ce91 100644 --- a/packages/example/content/docs/install/windows.md +++ b/packages/example/content/docs/install/windows.md @@ -35,7 +35,7 @@ Depending on the type of the [remote storage](/doc/command-reference/remote) you plan to use, you might need to install optional dependencies: `dvc-s3`, `dvc-azure`, `dvc-gdrive`, `dvc-gs`, `dvc-oss`, `dvc-ssh`. -
+
### Example: with support for Amazon S3 storage @@ -61,7 +61,7 @@ Depending on the type of the [remote storage](/doc/command-reference/remote) you plan to use, you might need to install optional dependencies: `[s3]`, `[azure]`, `[gdrive]`, `[gs]`, `[oss]`, `[ssh]`. Use `[all]` to include them all. -
+
### Example: with support for Amazon S3 storage diff --git a/packages/example/content/docs/sidebar.json b/packages/example/content/docs/sidebar.json index 3b873c0e..230f3cb2 100644 --- a/packages/example/content/docs/sidebar.json +++ b/packages/example/content/docs/sidebar.json @@ -35,36 +35,49 @@ }, "children": [ { - "slug": "data-and-model-versioning", - "tutorials": { - "katacoda": "https://katacoda.com/dvc/courses/get-started/versioning" - } - }, - { - "slug": "data-and-model-access", - "tutorials": { - "katacoda": "https://katacoda.com/dvc/courses/get-started/accessing" - } - }, - { - "slug": "data-pipelines", - "tutorials": { - "katacoda": "https://katacoda.com/dvc/courses/get-started/stages" - } - }, - { - "label": "Metrics, Parameters, and Plots", - "slug": "metrics-parameters-plots", - "tutorials": { - "katacoda": "https://katacoda.com/dvc/courses/get-started/params-metrics-plots" - } + "slug": "data-management", + "label": "Data Management", + "source": "data-management/index.md", + "children": [ + { + "label": "Data and Model Access", + "slug": "access", + "tutorials": { + "katacoda": "https://katacoda.com/dvc/courses/get-started/accessing" + } + }, + { + "label": "Data Pipelines", + "slug": "pipelines", + "tutorials": { + "katacoda": "https://katacoda.com/dvc/courses/get-started/stages" + } + }, + { + "label": "Metrics, Parameters, and Plots", + "slug": "metrics-parameters-plots", + "tutorials": { + "katacoda": "https://katacoda.com/dvc/courses/get-started/params-metrics-plots" + } + } + ] }, { "slug": "experiments", "label": "Experiments", + "source": "experiments/index.md", "tutorials": { "katacoda": "https://katacoda.com/dvc/courses/get-started/experiments" - } + }, + "children": [ + { + "label": "Visualization", + "slug": "visualization", + "tutorials": { + "katacoda": "https://katacoda.com/dvc/courses/get-started/params-metrics-plots" + } + } + ] } ] }, @@ -161,25 +174,7 @@ "label": "Managing External Data", "slug": "managing-external-data" }, - { - "label": "Contributing", - "slug": "contributing", - "source": false, - "children": [ - { - "label": "DVC Core Project", - "slug": "core" - }, - { - "label": "Docs and Website", - "slug": "docs" - }, - { - "label": "Writing Blog Posts", - "slug": "blog" - } - ] - }, + "running-dvc-on-windows", "troubleshooting", "related-technologies", @@ -503,6 +498,25 @@ } ] }, + { + "label": "Contributing", + "slug": "contributing", + "source": false, + "children": [ + { + "label": "DVC Core Project", + "slug": "core" + }, + { + "label": "Docs and Website", + "slug": "docs" + }, + { + "label": "Writing Blog Posts", + "slug": "blog" + } + ] + }, { "label": "Changelog", "url": "https://github.com/iterative/dvc/releases", @@ -582,7 +596,8 @@ }, "teams", "account-management", - "install-github-app" + "install-github-app", + "connect-custom-gitlab-server" ] }, "troubleshooting" diff --git a/packages/example/content/docs/start/data-and-model-access.md b/packages/example/content/docs/start/data-management/access.md similarity index 100% rename from packages/example/content/docs/start/data-and-model-access.md rename to packages/example/content/docs/start/data-management/access.md diff --git a/packages/example/content/docs/start/data-and-model-versioning.md b/packages/example/content/docs/start/data-management/index.md similarity index 98% rename from packages/example/content/docs/start/data-and-model-versioning.md rename to packages/example/content/docs/start/data-management/index.md index 87fd915b..8493d54b 100644 --- a/packages/example/content/docs/start/data-and-model-versioning.md +++ b/packages/example/content/docs/start/data-management/index.md @@ -59,7 +59,7 @@ $ git commit -m "Add raw data" The data, meanwhile, is listed in `.gitignore`. -
+
### 💡 Expand to see what happens under the hood. @@ -145,7 +145,7 @@ $ dvc push Usually, we also want to `git commit` and `git push` the corresponding `.dvc` files. -
+
### 💡 Expand to see what happens under the hood. diff --git a/packages/example/content/docs/start/metrics-parameters-plots.md b/packages/example/content/docs/start/data-management/metrics-parameters-plots.md similarity index 100% rename from packages/example/content/docs/start/metrics-parameters-plots.md rename to packages/example/content/docs/start/data-management/metrics-parameters-plots.md diff --git a/packages/example/content/docs/start/data-pipelines.md b/packages/example/content/docs/start/data-management/pipelines.md similarity index 98% rename from packages/example/content/docs/start/data-pipelines.md rename to packages/example/content/docs/start/data-management/pipelines.md index 8cdec0e2..af5d0d5e 100644 --- a/packages/example/content/docs/start/data-pipelines.md +++ b/packages/example/content/docs/start/data-management/pipelines.md @@ -79,7 +79,7 @@ DVC uses these metafiles to track the data used and produced by the stage, so there's no need to use `dvc add` on `data/prepared` [manually](/doc/start/data-and-model-versioning). -
+
### 💡 Expand to see what happens under the hood. @@ -172,7 +172,7 @@ $ dvc stage add -n featurize \ The `dvc.yaml` file is updated automatically and should include two stages now. -
+
### 💡 Expand to see what happens under the hood. @@ -271,7 +271,7 @@ it also doesn't rerun `train`! The previous run with the same set of inputs
-
+
### 💡 Expand to see what happens under the hood. diff --git a/packages/example/content/docs/start/experiments.md b/packages/example/content/docs/start/experiments/index.md similarity index 99% rename from packages/example/content/docs/start/experiments.md rename to packages/example/content/docs/start/experiments/index.md index eddbff5f..040d0089 100644 --- a/packages/example/content/docs/start/experiments.md +++ b/packages/example/content/docs/start/experiments/index.md @@ -2,7 +2,7 @@ title: 'Get Started: Experiments' --- -# Get Started with Experiments +# Get Started: Experiments In machine learning projects, the number of experiments grows rapidly. DVC can track these experiments, list and compare their most relevant diff --git a/packages/example/content/docs/start/experiments/visualization.md b/packages/example/content/docs/start/experiments/visualization.md new file mode 100644 index 00000000..d3d8d527 --- /dev/null +++ b/packages/example/content/docs/start/experiments/visualization.md @@ -0,0 +1,109 @@ +--- +title: 'Get Started: Visualization with Plots' +--- + +# Get Started: Visualization with Plots + +In this section, we'll add visualization to the [`example-dvc-experiments`][ede] +project (explored [previously](/doc/start/experiments)). If you would like to +try these yourself, please refer to the project. [README] about how to install. + +[ede]: https://github.com/iterative/example-dvc-experiments +[readme]: + https://github.com/iterative/example-dvc-experiments/blob/main/README.md + +## Creating plots from tabular data + +A useful plot to show the classification performance is the [confusion matrix]. +In order to produce it, DVC expects a CSV **plots file** in the form: + +```csv +actual,predicted +0,0 +0,2 +... +``` + +> We added a [loop] comparing the results to generate this file from the +> predictions. + +[loop]: + https://github.com/iterative/example-dvc-experiments/blob/main/src/train.py#L123 +[confusion matrix]: https://en.wikipedia.org/wiki/Confusion_matrix + +Running the experiment with `dvc exp run` will produce `plots/confusion.csv`. +Use `dvc plots show` to present it as an HTML file, and open it in the browser: + +```dvc +$ dvc plots show plots/confusion.csv --template confusion \ + -x actual -y predicted +file:///.../example-dvc-experiments/plots/confusion.json.html +``` + +![confusion matrix](/img/start_visualization_confusion1.png) + +## Displaying user-generated plot images + +Let's produce another plot to see misclassified examples from each class. This +procedure generates the misclassification examples from the validation data and +arranges them into a _confusion table_ that shows the correct label, and +misclassification sample. The code to generate an image from a set of training +images is omitted here but you can find the code in [the example +project.][misclassified-example-code] + +[misclassified-example-code]: + https://github.com/iterative/example-dvc-experiments/blob/48b1e5078c957f71674c00f416290eaa3b20b559/src/util.py#L49 + +```dvc +$ dvc plots show plots/misclassified.png +``` + +![Misclassification table](/img/start_visualization_misclassification.png) + +## Autogenerating plots from deep learning code + +An important issue for deep learning projects is to observe in which epoch do +training and validation loss differ. DVC helps in that regard with its Python +integrations to deep learning libraries via [DVCLive]. + +The example project uses Keras to train a classifier, and we have a DVCLive +callback that visualizes the training and validation loss for each epoch. We +first import the callback from DVCLive. + +```python +from dvclive.keras import DvcLiveCallback +``` + +Then we add this callback to the +[`fit` method](https://keras.io/api/models/model_training_apis/#fit-method) +call. + +```python +model.fit( + ... + callbacks=[DvcLiveCallback()], + ...) +``` + +With these two changes, the model metrics are automatically logged to +`dvclive.json` and plotted in `training_metrics/index.html`: + +![dvclive](/img/start_visualization_dvclive.png) + +DVCLive has other capabilities, like saving the model every epoch or modifying +these default values. + +In summary, DVC provides more than one option to use visualization in your +workflow: + +- DVC can generate HTML files that includes interactive plots from data series + in JSON, YAML, CSV, or TSV format. + +- DVC can keep track of image files produced as [plot outputs] from the + training/evaluation scripts. + +- [DVCLive] integrations can produce plots automatically during training. + +[plot outputs]: + /doc/user-guide/project-structure/pipelines-files#metrics-and-plots-outputs +[dvclive]: /doc/dvclive/dvclive-with-dvc diff --git a/packages/example/content/docs/start/index.md b/packages/example/content/docs/start/index.md index d25ae77f..431fcf71 100644 --- a/packages/example/content/docs/start/index.md +++ b/packages/example/content/docs/start/index.md @@ -50,8 +50,10 @@ $ git commit -m "Initialize DVC" Now you're ready to DVC! -DVC's features can be grouped into functional components. We'll explore them one -by one in the next few pages: +DVC's features can be grouped into functional components. You can explore them +in two independent trails: + +### Data Management Trail - [**Data and model versioning**](/doc/start/data-and-model-versioning) (try this next) is the base layer of DVC for large files, datasets, and machine @@ -72,11 +74,17 @@ by one in the next few pages: be attached to pipelines. These let you capture, navigate, and evaluate ML projects without leaving Git. Think "Git for machine learning". +### Experiments Trail + - [**Experiments**](/doc/start/experiments) enable exploration, iteration, and comparison across many ML experiments. Track your experiments with automatic versioning and checkpoint logging. Compare differences in parameters, metrics, code, and data. Apply, drop, roll back, resume, or share any experiment. +- [**Visualization**](/doc/start/experiments/visualization) compare experiment + results visually, track your plots and generate them with library + integrations. + **New!** Once you set up your DVC repository, you can also interact with it using Iterative Studio, the online UI for DVC. [Here's a demo](https://studio.iterative.ai/team/Iterative/views/example-get-started-zde16i6c4g) diff --git a/packages/example/content/docs/studio/get-started.md b/packages/example/content/docs/studio/get-started.md index 2cc39003..ac32ab86 100644 --- a/packages/example/content/docs/studio/get-started.md +++ b/packages/example/content/docs/studio/get-started.md @@ -6,9 +6,15 @@ Studio. https://www.youtube.com/watch?v=hKf4twg832g 1. Sign in to your [Iterative Studio](https://studio.iterative.ai/) dashboard - using GitHub, GitLab or Bitbucket. + using your GitHub.com, GitLab.com or Bitbucket.org account, or your email + address. - The Iterative Studio views dashboard opens. + + + When you sign up with your email address, Studio will send you a verification + email before you can login. + + 2. If this is the first time you are signing in to Iterative Studio, you will see that there already exists a `Demo` view that connects to an example DVC @@ -26,10 +32,24 @@ more views._ 4. To create a new view, click on `Add a View`. All the organizations that you have access to will be listed. -> To create views from your GitHub repositories, you must install the Iterative -> Studio GitHub app. Refer to the section on -> [GitHub app installation](/doc/studio/user-guide/install-github-app) for more -> details. + + + If you do not see your desired organizations or Git repositories, make sure + that + [the connection to your Git server has been set up](/doc/studio/user-guide/account-management#git-integrations). + + To create views from your GitHub repositories, you must install the Iterative + Studio GitHub app. Refer to the section on + [GitHub app installation](/doc/studio/user-guide/install-github-app) for more + details. + + To create views from repositories on your self-hosted GitLab server, you must + first add a connection to this server and create a team. Refer to the section + on + [self-hosted GitLab server support](/doc/studio/user-guide/install-github-app) + for more details. + + 5. Open the organization whose repository you want to connect to. You can also use the search bar to directly look for a repository. diff --git a/packages/example/content/docs/studio/user-guide/account-management.md b/packages/example/content/docs/studio/user-guide/account-management.md index 1ab611c5..430c5592 100644 --- a/packages/example/content/docs/studio/user-guide/account-management.md +++ b/packages/example/content/docs/studio/user-guide/account-management.md @@ -5,24 +5,59 @@ Iterative Studio. In the menu that comes up, click on `Profile`. Your user profile page will open. This page has multiple sections that are described below. +## Profile + +Here, you can see your name and profile picture. If you signed up with a +GitHub.com, GitLab.com or Bitbucket.org account, these details are fetched from +your connected Git hosting account. + +You can edit your name. + ## Account -Here, you can see your avatar, user name and email address. These details are -fetched from your connected Git account and cannot be changed. +Here, you can see your username, password and email addresses. If you signed up +with a GitHub.com, GitLab.com or Bitbucket.org account, the username and email +address are fetched from your connected Git hosting account. + +You can update your username and password. + +**Managing email addresses:** + +You can add multiple email addresses to a single Studio account. You can login +to the account with any of your verified email addresses. + +One of your email addresses must be designated as primary. This is the address +to which Studio will send all your account notification emails. + +You can change your primary email address by clicking on the `Primary` button +next to the email address which you want to designate as primary. + +You can delete your non-primary email addresses. ## Git integrations In this section, you can, -- Connect to additional Git providers. For instance, if you are currently - connected only to GitHub, you can connect to GitLab and/or Bitbucket from - here. When you connect to the additional Git providers, you may be prompted to - Grant access to your account to Iterative Studio. Please grant the required - access to enable Iterative Studio to connect to your Git repositories. -- Disconnect from your GitHub, GitLab or Bitbucket accounts. -- Configure your GitHub account. That is, install the Iterative Studio GitHub - app on additional organizations or repositories, or even remove the app from - organizations or repositories where you no longer need it. +- Connect to GitHub.com, GitLab.com or Bitbucket.org. + + When you connect to a Git provider, you may be prompted to grant access to + your account to Iterative Studio. Please grant the required access to enable + Iterative Studio to connect to your Git repositories. + + If you signed up to use Studio with an email address, you will not have any of + the Git connections when you first sign in to Studio, and you can set up the + required Git connections in this section. + + Note that **connections to self-hosted GitLab servers** are not managed in + this section. If you want to connect to a self-hosted GitLab server, you + should create a team and set up the GitLab server connection in the team + settings. For more details, refer + [here](/doc/studio/user-guide/connect-custom-gitlab-server). + +- Disconnect from your GitHub.com, GitLab.com or Bitbucket.org accounts. +- Configure your GitHub account connection. That is, install the Iterative + Studio GitHub app on additional organizations or repositories, or even remove + the app from organizations or repositories where you no longer need it. ## Cloud credentials diff --git a/packages/example/content/docs/studio/user-guide/connect-custom-gitlab-server.md b/packages/example/content/docs/studio/user-guide/connect-custom-gitlab-server.md new file mode 100644 index 00000000..363e714c --- /dev/null +++ b/packages/example/content/docs/studio/user-guide/connect-custom-gitlab-server.md @@ -0,0 +1,50 @@ +# Custom GitLab Server Connection + +If your team’s Git repositories are on a self-hosted GitLab server, you can set +up a connection to this server such that all your team members can connect to +the Git repositories on this server. Refer to the +[GitLab docs](https://about.gitlab.com/install/) for more details about +self-hosted GitLab servers. + +To connect to your self-hosted GitLab server, you will need the following: + +- A URL for your GitLab server. The URL should either be publicly accessible, or + in the same private network as Studio if Studio is deployed on-premises. +- A personal access token with the following roles: `api`, `read_user`, + `read_repository`. If you do not have a personal access token yet, you can + create one at + + `\/-/profile/personal_access_tokens?name=Iterative+Studio+Access+token&scopes=api,read_user,read_repository` + +Once you have obtained the URL and Personal Access Token for your GitLab server, +do the following: + +- Go to you team’s settings at + `https://studio.iterative.ai/team//settings` +- In the `GitLab connections` section, click on the `Connect GitLab server` + button +- Enter the URL and token in the form that opens up +- Click on `Connect` + +Once the connection is successful, all the repositories in this GitLab server +will become available when you try to create a view in your team workspace. If +you have any trouble setting up the connection to your GitLab server, please +[contact us](https://dvc.org/doc/studio/troubleshooting#support). + + + +Connecting to a self-hosted GitLab server is different from connecting to your +account on GitLab SaaS ([GitLab.com](http://gitlab.com/)). To connect to +[gitlab.com](http://gitlab.com/) go to the `Git integrations` section in your +[personal profile settings](https://studio.iterative.ai/user/_/profile). If you +signed up to use Iterative Studio using your GitLab account, you may already +have a connection to [gitlab.com](http://gitlab.com/). + + + + + +Custom GitLab server support is available only within a team workspace, and is +available to all Studio teams (free and paid). + + diff --git a/packages/example/content/docs/studio/user-guide/index.md b/packages/example/content/docs/studio/user-guide/index.md index e55c44c5..5a5c2de6 100644 --- a/packages/example/content/docs/studio/user-guide/index.md +++ b/packages/example/content/docs/studio/user-guide/index.md @@ -7,7 +7,8 @@ The topics here range from the basic (how to connect to your Git repositories) to more advanced things you can do such as running experiments. We also include guides for getting your Git repositories ready for using with Iterative Studio, with or without DVC. This includes guides on how to prepare your repositories, -how to install and authorize the Iterative Studio GitHub app, etc. +how to install and authorize the Iterative Studio GitHub app, how to connect to +repositories on self-hosted GitLab servers, etc. Please choose from the navigation sidebar to the left, or click the `Next` button below ↘ diff --git a/packages/example/content/docs/studio/user-guide/teams.md b/packages/example/content/docs/studio/user-guide/teams.md index 5b75c532..412acbd7 100644 --- a/packages/example/content/docs/studio/user-guide/teams.md +++ b/packages/example/content/docs/studio/user-guide/teams.md @@ -4,12 +4,13 @@ You can define teams with one or more team members. The team members are also called collaborators, and you can assign different roles to them. The views that you create in your team's page will be accessible to all members of the team. -In this section, you will: +In this section, you will learn about: -- [Learn how to create a team](#create-a-team) -- [Learn how to invite collaborators (team members)](#invite-collaborators) -- [Understand the privileges (access permissions) of different roles](#roles) -- [Learn how to manage the team and its views](#manage-your-team-and-its-views) +- [How to create a team](#create-a-team) +- [How to invite collaborators (team members)](#invite-collaborators) +- [The privileges (access permissions) of different roles](#roles) +- [How to manage the team and its views](#manage-your-team-and-its-views) +- [How to manage connections to self-hosted GitLab servers](#manage-connections-to-self-hosted-gitlab-servers) ## Create a team @@ -95,24 +96,46 @@ same as that of any other collaborator who has been assigned the `Admin` role. ## Manage your team and its views -Once you have created the team, the team's page opens up. +Once you have created the team, the team's workspace opens up. -![](https://static.iterative.ai/img/studio/team_page.png) +![](https://static.iterative.ai/img/studio/team_page_v3.png) -On this page, you can perform three types of tasks: +In this workspace, there are 2 pages - [Views](#views) and +[Settings](#settings). -- **Add a view.** Click on the `View` menu item to add views to the team's page. - The process for adding a view is the same as that for adding personal views - ([instructions](/doc/studio/user-guide/views/create-view)). However, the views - that you create within the team will be accessible to all members - (collaborators) of the team. +### Views -- **Edit collaborators.** You can click on the `Team` menu item to edit the - collaborators in the team. +This is the views dashboard for the team. All the views on this dashboard are +accessible to all members (collaborators) of the team. -- **Change settings.** Finally, you can click on the `Settings` menu item to - change the team name, add credentials for the data remotes, and delete the - team. Note that these settings are applicable to the team and are thus - different from [view settings](/doc/studio/user-guide/views/view-settings). +To add a view to this dashboard, click on `Add a view`. The process for adding a +view is the same as that for adding personal views +([instructions](/doc/studio/user-guide/views/create-view)). - ![](https://static.iterative.ai/img/studio/team_settings.png) +### Settings + +In the team settings page, you can change the team name, add credentials for the +data remotes, and delete the team. Note that these settings are applicable to +the team and are thus different from +[view settings](/doc/studio/user-guide/views/view-settings). + +Additionally, you can also +[manage connections to self-hosted GitLab servers](#manage-connections-to-self-hosted-gitlab-servers) +and [edit collaborators](#edit-collaborators). + +#### **Manage connections to self-hosted GitLab servers** + +If your team’s Git repositories are on a self-hosted GitLab server, you can go +to the `GitLab connections` section of the team settings page to set up a +connection to this server. Once you set up the connection, all your team members +can connect to the Git repositories on this server. For more details, refer +[here](/doc/studio/user-guide/connect-custom-gitlab-server). + +#### **Edit collaborators** + +To manage the collaborators (team members) of your team, go to the +`Collaborators` section of the team settings page. Here you can invite new team +members as well as remove or change the [roles](#roles) of existing team +members. + +![](https://static.iterative.ai/img/studio/team_settings_v3.png) diff --git a/packages/example/content/docs/studio/user-guide/views/create-view.md b/packages/example/content/docs/studio/user-guide/views/create-view.md index 66de0763..50dc8227 100644 --- a/packages/example/content/docs/studio/user-guide/views/create-view.md +++ b/packages/example/content/docs/studio/user-guide/views/create-view.md @@ -3,16 +3,51 @@ To create a new view, follow these steps. 1. Sign in to your [Iterative Studio](https://studio.iterative.ai/) dashboard - using GitHub, GitLab or Bitbucket. + using your GitHub.com, GitLab.com or Bitbucket.org account, or your email + address. + + + + When you sign up with your email address, Studio will send you a verification + email before you can login. + + 2. Click on `Add a View`. All the organizations that you have access to will be listed. +> If you do not see your desired organizations or Git repositories, make sure +> that +> [the connection to your Git server has been set up](/doc/studio/user-guide/account-management#git-integrations). + > To create views from your GitHub repositories, you must install the Iterative > Studio GitHub app. Refer to the section on > [GitHub app installation](/doc/studio/user-guide/install-github-app) for more > details. +> To create views from repositories on your self-hosted GitLab server, you must +> first add a connection to this server and create a team. Refer to the section +> on +> [self-hosted GitLab server support](/doc/studio/user-guide/connect-custom-gitlab-server) +> for more details. + + + +If you do not see your desired organizations or Git repositories, make sure that +[the connection to your Git server has been set up](/doc/studio/user-guide/account-management#git-integrations). + +To create views from your GitHub repositories, you must install the Iterative +Studio GitHub app. Refer to the section on +[GitHub app installation](/doc/studio/user-guide/install-github-app) for more +details. + +To create views from repositories on your self-hosted GitLab server, you must +first add a connection to this server and create a team. Refer to the section on +[self-hosted GitLab server support](/doc/studio/user-guide/install-github-app) +for more details. + + + 3. Open the organization whose repository you want to connect to. You can also use the search bar to directly look for a repository. diff --git a/packages/example/content/docs/user-guide/external-dependencies.md b/packages/example/content/docs/user-guide/external-dependencies.md index 15809de7..da46fec4 100644 --- a/packages/example/content/docs/user-guide/external-dependencies.md +++ b/packages/example/content/docs/user-guide/external-dependencies.md @@ -189,7 +189,7 @@ Importing 'https://data.dvc.org/get-started/data.xml' -> 'data.xml' The command above creates the import `.dvc` file `data.xml.dvc`, that contains an external dependency (in this case an HTTPs URL). -
+
### Expand to see resulting `.dvc` file @@ -227,7 +227,7 @@ Importing 'model.pkl (git@github.com:iterative/example-get-started)' The command above creates `model.pkl.dvc`, where the external dependency is specified (with the `repo` field). -
+
### Expand to see resulting `.dvc` file diff --git a/packages/example/content/docs/user-guide/how-to/share-a-dvc-cache.md b/packages/example/content/docs/user-guide/how-to/share-a-dvc-cache.md index 136e3230..80aa8b38 100644 --- a/packages/example/content/docs/user-guide/how-to/share-a-dvc-cache.md +++ b/packages/example/content/docs/user-guide/how-to/share-a-dvc-cache.md @@ -77,8 +77,12 @@ enable symlinks to avoid having copies from the external cache to the > See `dvc config cache` and > [File link types](/doc/user-guide/large-dataset-optimization) for more info. -⚠️ Note that enabling soft/hard links causes DVC to protect the linked data -because editing them in-place would corrupt the cache. See `dvc unprotect`. + + +Note that enabling soft/hard links causes DVC to protect the linked data because +editing them in-place would corrupt the cache. See `dvc unprotect`. + + If you're using Git, commit the changes to your project's config file (usually `.dvc/config`): @@ -87,3 +91,12 @@ If you're using Git, commit the changes to your project's config file (usually $ git add .dvc/config $ git commit -m "config external/shared DVC cache" ``` + + + +Using `dvc gc` with a shared cache may delete data needed in another project! +See more info. about +[cleaning a shared cache](/doc/command-reference/gc#cleaning-shared-cache-or-shared-remote) +safely. + + diff --git a/packages/example/content/docs/user-guide/setup-google-drive-remote.md b/packages/example/content/docs/user-guide/setup-google-drive-remote.md index 2d7052fa..07aa42ca 100644 --- a/packages/example/content/docs/user-guide/setup-google-drive-remote.md +++ b/packages/example/content/docs/user-guide/setup-google-drive-remote.md @@ -178,11 +178,13 @@ authentication is needed. 1. To [create a service account](https://cloud.google.com/docs/authentication/getting-started#creating_a_service_account), navigate to **IAM & Admin** in the left sidebar, and select **Service - Accounts**. Click **+ CREATE SERVICE ACCOUNT**, on the next screen, enter - **Service account name** e.g. "My DVC project", and click **Create**. Select - **Continue** at the next **Service account permissions** page, click at **+ - CREATE KEY**, select **JSON** and **Create**. Download the generated `.json` - key file to a safe location. + Accounts**. Click **+ CREATE SERVICE ACCOUNT**, enter a **Service account + name** e.g. "My DVC project", and optionally provide a custom **Service + account ID** and description. Then click **CREATE AND CONTINUE**. You can + skip the two optional sections. Click **DONE** and you will be returned to + the overview page. Select your service account and go to the **Keys** tab. + Under **Add key** select **Create new key**, choose **JSON**, and click + **CREATE**. Download the generated `.json` key file to a safe location. ⚠️ Be careful about sharing the key file with others. @@ -195,6 +197,14 @@ authentication is needed. gdrive_service_account_json_file_path path/to/file.json ``` + Alternatively, a `GDRIVE_CREDENTIALS_DATA` can be set to pass service account + key in CI/CD systems, production setup, read-only file systems, etc. The + content of this variable should be a string with JSON that has the same + format as in the keys file described above. If both this variable and + `gdrive_service_account_json_file_path` are provided, + `GDRIVE_CREDENTIALS_DATA` takes priority and + `gdrive_service_account_json_file_path` is ignored. + 3. Share the Google Drive folders that you want to use with the service account. Navigate to your Google Drive folder's sharing options and add the service account as an editor (read/write) or viewer (read-only): @@ -240,7 +250,7 @@ Alternatively, a `GDRIVE_CREDENTIALS_DATA` can be set to pass user credentials in CI/CD systems, production setup, read-only file systems, etc. The content of this variable should be a string with JSON that has the same format as in the credentials files described above, and usually you get it going through the same -authentication process. DVC reads this variable first, before the credentials -file. +authentication process. If `GDRIVE_CREDENTIALS_DATA` is set, the +`gdrive_user_credentials_file` value (if provided) is ignored. > Please note our [Privacy Policy (Google APIs)](/doc/user-guide/privacy). diff --git a/packages/example/static/img/start_visualization_confusion1.png b/packages/example/static/img/start_visualization_confusion1.png new file mode 100644 index 00000000..fe5b20e7 Binary files /dev/null and b/packages/example/static/img/start_visualization_confusion1.png differ diff --git a/packages/example/static/img/start_visualization_dvclive.png b/packages/example/static/img/start_visualization_dvclive.png new file mode 100644 index 00000000..5ad0c2ca Binary files /dev/null and b/packages/example/static/img/start_visualization_dvclive.png differ diff --git a/packages/example/static/img/start_visualization_misclassification.png b/packages/example/static/img/start_visualization_misclassification.png new file mode 100644 index 00000000..83241740 Binary files /dev/null and b/packages/example/static/img/start_visualization_misclassification.png differ diff --git a/packages/gatsby-theme-iterative/src/components/Documentation/Markdown/Admonition/index.tsx b/packages/gatsby-theme-iterative/src/components/Documentation/Markdown/Admonition/index.tsx index df30cb7f..176393a0 100644 --- a/packages/gatsby-theme-iterative/src/components/Documentation/Markdown/Admonition/index.tsx +++ b/packages/gatsby-theme-iterative/src/components/Documentation/Markdown/Admonition/index.tsx @@ -2,14 +2,15 @@ import React from 'react' import cn from 'classnames' import * as styles from './styles.module.css' -const icons = { +const icons: { [key: string]: string } = { tip: '💡', info: 'ℹ️', warn: '⚠️', fire: '🔥', exclamation: '❗', lady_beetle: '🐞', - bug: '🐛' + bug: '🐛', + none: '' } const typeOptions = ['info', 'tip', 'warn'] const defaultType = 'info' @@ -25,10 +26,10 @@ const Admonition: React.FC<{ | 'exclamation' | 'lady_beetle' | 'bug' -}> = ({ title, type = defaultType, children, icon = type }) => { + | 'none' +}> = ({ title, type = defaultType, children, icon = '' }) => { const setType = typeOptions.includes(type) ? type : defaultType - const iconContent = icons[icon] || '' - + const iconContent = icon in icons ? icons[icon] : icons[setType] return (
= ({ +const Details: React.FC<{ slugger: Slugger; id: string }> = ({ slugger, - children + children, + id }) => { const [isOpen, setIsOpen] = useState(false) const location = useLocation() @@ -52,9 +52,7 @@ const Details: React.FC<{ slugger: GithubSlugger }> = ({ firstChild.props.children.length - 1 ) as ReactNode[] - const title: string = ( - triggerChildren as (string | ReactChild)[] - ).reduce((acc, cur) => { + const title = (triggerChildren as any[]).reduce((acc, cur) => { return (acc += typeof cur === 'string' ? cur @@ -62,10 +60,9 @@ const Details: React.FC<{ slugger: GithubSlugger }> = ({ ? cur?.props?.children?.toString() : '') }, '') - - let slug = slugger.slug(title) - slug = slug.replaceAll('️', '').replaceAll('ℹ', '') - const id = slug.replace(/(^\-+|\-+$)/g, '') + id = useMemo(() => { + return id ? slugger.slug(id) : slugger.slug(title) + }, [id, title]) useEffect(() => { if (location.hash === `#${id}`) { @@ -248,8 +245,8 @@ const Tab: React.FC = ({ children }) => { } // Rehype's typedefs don't allow for custom components, even though they work -const renderAst = (slugger: GithubSlugger) => { - // eslint-disable-next-line @typescript-eslint/no-explicit-any +// eslint-disable-next-line @typescript-eslint/no-explicit-any +const renderAst = (slugger: Slugger) => { return new (rehypeReact as any)({ createElement: React.createElement, Fragment: React.Fragment, @@ -258,9 +255,7 @@ const renderAst = (slugger: GithubSlugger) => { abbr: Abbr, card: Card, cards: Cards, - details: (props: PropsWithChildren>) => ( -
- ), + details: (props: any) =>
, toggle: Toggle, tab: Tab, admon: Admonition, @@ -271,7 +266,7 @@ const renderAst = (slugger: GithubSlugger) => { interface IMarkdownProps { htmlAst: Node githubLink: string - tutorials?: { [type: string]: string } + tutorials: { [type: string]: string } prev?: string next?: string } @@ -283,7 +278,7 @@ const Markdown: React.FC = ({ tutorials, githubLink }) => { - const slugger = new GithubSlugger() + const slugger = new Slugger() return (
{renderAst(slugger)(htmlAst)} diff --git a/packages/gatsby-theme-iterative/src/utils/front/Slugger.ts b/packages/gatsby-theme-iterative/src/utils/front/Slugger.ts new file mode 100644 index 00000000..6aca79f3 --- /dev/null +++ b/packages/gatsby-theme-iterative/src/utils/front/Slugger.ts @@ -0,0 +1,37 @@ +class Slugger { + separator: string + lowercase: boolean + slugs: Array + + constructor(options?: { separator?: string; lowercase?: boolean }) { + this.separator = options?.separator || '-' + this.lowercase = options?.lowercase === false ? false : true + this.slugs = [] + } + + slug(str: string) { + str = typeof str === 'string' ? str : '' + let slug = this.slugify(str) + + if (this.lowercase) { + slug = slug.toLowerCase() + } + if (this.slugs.includes(slug)) { + throw new Error(`Duplicate slug: ${slug}`) + } + this.slugs.push(slug) + return slug + } + slugify(str: string) { + return str + .replace(/[^\w\s-]/g, '') + .trim() + .replace(/[-\s]+/g, this.separator) + .replace(this.separator + this.separator, this.separator) + .replace(/(^\-+|\-+$)/g, '') + } + reset() { + this.slugs = [] + } +} +export default Slugger