From 658bc81bf2ea7481ae6a52d67b099b297b8c0e82 Mon Sep 17 00:00:00 2001 From: Kurian Benoy Date: Tue, 14 May 2019 00:56:57 +0530 Subject: [PATCH 1/3] Docs:Add CSS padding for codeblocks in Installation page --- src/Documentation/Markdown/Markdown.js | 2 +- static/docs/get-started/install.md | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/Documentation/Markdown/Markdown.js b/src/Documentation/Markdown/Markdown.js index 2dff72877b..2816336a38 100644 --- a/src/Documentation/Markdown/Markdown.js +++ b/src/Documentation/Markdown/Markdown.js @@ -61,7 +61,7 @@ const HtmlRenderer = props => { const CodeBlock = ({ value, language }) => { const dvcStyle = Object.assign({}, docco) dvcStyle['hljs-comment'] = { color: '#999' } - dvcStyle['hljs-meta'] = { color: '#333', fontSize: '14px' } + dvcStyle['hljs-meta'] = { color: '#333', fontSize: '14px', paddingLeft: '8em' } return ( {value} diff --git a/static/docs/get-started/install.md b/static/docs/get-started/install.md index b2b1bf1752..e620c5a468 100644 --- a/static/docs/get-started/install.md +++ b/static/docs/get-started/install.md @@ -6,7 +6,7 @@ There are three ways to install DVC: `pip`, OS-specific package, and Homebrew To install DVC from terminal, run: ```dvc - $ pip install dvc +$ pip install dvc ``` > Depending on the [remote storage](/doc/commands-reference/remote) type you @@ -27,9 +27,9 @@ and `homebrew` repositories: ### Expand to install from deb repository (Ubuntu, Debian) ```dvc - $ sudo wget https://dvc.org/deb/dvc.list -O /etc/apt/sources.list.d/dvc.list - $ sudo apt-get update - $ sudo apt-get install dvc +$ sudo wget https://dvc.org/deb/dvc.list -O /etc/apt/sources.list.d/dvc.list +$ sudo apt-get update +$ sudo apt-get install dvc ``` @@ -40,9 +40,9 @@ and `homebrew` repositories: ### Expand to install from rpm repository (Fedora, CentOS) ```dvc - $ sudo wget https://dvc.org/rpm/dvc.repo -O /etc/yum.repos.d/dvc.repo - $ sudo yum update - $ sudo yum install dvc +$ sudo wget https://dvc.org/rpm/dvc.repo -O /etc/yum.repos.d/dvc.repo +$ sudo yum update +$ sudo yum install dvc ``` @@ -53,12 +53,12 @@ and `homebrew` repositories: ### Expand to install via Homebrew (Mac OS) ```dvc - $ brew install iterative/homebrew-dvc/dvc +$ brew install iterative/homebrew-dvc/dvc ``` or: ```dvc - $ brew cask install iterative/homebrew-dvc/dvc +$ brew cask install iterative/homebrew-dvc/dvc ``` From e4544c02f48b0ca8cfba3a2844f42c860bcec492 Mon Sep 17 00:00:00 2001 From: Kurian Benoy Date: Tue, 14 May 2019 03:43:47 +0530 Subject: [PATCH 2/3] Remove leading spaces for coding blocks in Getting started section --- static/docs/get-started/add-files.md | 16 +-- .../docs/get-started/compare-experiments.md | 20 +-- static/docs/get-started/configure.md | 6 +- .../docs/get-started/connect-code-and-data.md | 66 +++++----- static/docs/get-started/example-pipeline.md | 106 ++++++++-------- static/docs/get-started/example-versioning.md | 70 +++++----- static/docs/get-started/experiments.md | 6 +- static/docs/get-started/initialize.md | 8 +- static/docs/get-started/metrics.md | 18 +-- static/docs/get-started/older-versions.md | 8 +- static/docs/get-started/pipeline.md | 26 ++-- static/docs/get-started/reproduce.md | 2 +- static/docs/get-started/retrieve-data.md | 8 +- static/docs/get-started/share-data.md | 2 +- static/docs/get-started/visualize.md | 120 +++++++++--------- 15 files changed, 241 insertions(+), 241 deletions(-) diff --git a/static/docs/get-started/add-files.md b/static/docs/get-started/add-files.md index e1890e097e..8d8709dfb0 100644 --- a/static/docs/get-started/add-files.md +++ b/static/docs/get-started/add-files.md @@ -16,15 +16,15 @@ link as`(Chrome) or `Save object as`(Firefox). ```dvc - $ mkdir data - $ wget https://dvc.org/s3/get-started/data.xml -O data/data.xml +$ mkdir data +$ wget https://dvc.org/s3/get-started/data.xml -O data/data.xml ``` To take a file (or a directory) under DVC control just run `dvc add`, it accepts any **file** or a **directory**: ```dvc - $ dvc add data/data.xml +$ dvc add data/data.xml ``` DVC stores information about your data file in a special `.dvc` file, that has a @@ -32,8 +32,8 @@ human-readable [description](/doc/user-guide/dvc-file-format) and can be committed to Git to track versions of your file: ```dvc - $ git add data/.gitignore data/data.xml.dvc - $ git commit -m "add source data to DVC" +$ git add data/.gitignore data/data.xml.dvc +$ git commit -m "add source data to DVC" ```
@@ -44,9 +44,9 @@ You can see that actual data file has been moved to the `.dvc/cache` directory (usually hardlink or reflink is created, so no physical copying is happening). ```dvc - $ ls -R .dvc/cache - .dvc/cache/a3: - 04afb96060aad90176268345e10355 +$ ls -R .dvc/cache + .dvc/cache/a3: + 04afb96060aad90176268345e10355 ``` where `a304afb96060aad90176268345e10355` is an MD5 hash of the `data.xml` file. diff --git a/static/docs/get-started/compare-experiments.md b/static/docs/get-started/compare-experiments.md index efd8207e11..914e8d91cb 100644 --- a/static/docs/get-started/compare-experiments.md +++ b/static/docs/get-started/compare-experiments.md @@ -10,9 +10,9 @@ Let's run evaluate for the latest `bigram` experiment we created in one of the previous steps. It mostly takes just running the `dvc repro`: ```dvc - $ git checkout master - $ dvc checkout - $ dvc repro evaluate.dvc +$ git checkout master +$ dvc checkout +$ dvc repro evaluate.dvc ``` `git checkout master` and `dvc checkout` commands ensure that we have the latest @@ -21,19 +21,19 @@ experiment code and data respectively. And `dvc repro`, as we discussed in the commands to build the model and measure its performance. ```dvc - $ git commit -a -m "evaluate bigram model" - $ git tag -a "bigram-experiment" -m "bigrams" +$ git commit -a -m "evaluate bigram model" +$ git tag -a "bigram-experiment" -m "bigrams" ``` Now, we can use `-T` option of the `dvc metrics show` command to see the difference between the `baseline` and `bigrams` experiments: ```dvc - $ dvc metrics show -T +$ dvc metrics show -T - baseline-experiment: - auc.metric: 0.588765 - bigram-experiment: - auc.metric: 0.620421 +baseline-experiment: + auc.metric: 0.588765 +bigram-experiment: + auc.metric: 0.620421 ``` DVC provides built-in support to track and navigate `JSON`, `TSV` or `CSV` diff --git a/static/docs/get-started/configure.md b/static/docs/get-started/configure.md index 72c6e10a46..6f395908a0 100644 --- a/static/docs/get-started/configure.md +++ b/static/docs/get-started/configure.md @@ -23,8 +23,8 @@ project/repository itself.
```dvc - $ dvc remote add -d myremote /tmp/dvc-storage - $ git commit .dvc/config -m "initialize DVC local remote" +$ dvc remote add -d myremote /tmp/dvc-storage +$ git commit .dvc/config -m "initialize DVC local remote" ``` > We only use a local remote in this guide for simplicity's sake in following > these basic steps as you are learning to use DVC. We realize that for most @@ -53,7 +53,7 @@ for all remotes. For example, to setup an S3 remote we would use something like: ```dvc - $ dvc remote add -d s3remote s3://mybucket/myproject +$ dvc remote add -d s3remote s3://mybucket/myproject ``` > This command is only shown for informational purposes. No need to actually run > it in order to continue with this guide. diff --git a/static/docs/get-started/connect-code-and-data.md b/static/docs/get-started/connect-code-and-data.md index 084046dc33..0bd6bfffc3 100644 --- a/static/docs/get-started/connect-code-and-data.md +++ b/static/docs/get-started/connect-code-and-data.md @@ -12,9 +12,9 @@ to get the sample code: > On Windows just use your browser to download the archive instead. ```dvc - $ wget https://dvc.org/s3/get-started/code.zip - $ unzip code.zip - $ rm -f code.zip +$ wget https://dvc.org/s3/get-started/code.zip +$ unzip code.zip +$ rm -f code.zip ``` You'll also need to install its dependencies: Python packages like `pandas` and @@ -27,34 +27,34 @@ You'll also need to install its dependencies: Python packages like `pandas` and After downloading the sample code, your project structure should look like this: ```dvc - $ tree - . - ├── data - │   ├── data.xml - │   └── data.xml.dvc - ├── requirements.txt - └── src -    ├── evaluate.py -    ├── featurization.py -    ├── prepare.py -  └── train.py +$ tree +. +├── data +│   ├── data.xml +│   └── data.xml.dvc +├── requirements.txt +└── src +    ├── evaluate.py +    ├── featurization.py +    ├── prepare.py +  └── train.py ``` We **strongly** recommend using `virtualenv` or a similar tool to isolate your environment: ```dvc - $ virtualenv .env - $ echo ".env/" >> .gitignore - $ source .env/bin/activate +$ virtualenv .env +$ echo ".env/" >> .gitignore +$ source .env/bin/activate ``` Now, we are ready to install dependencies to run the code: ```dvc - $ pip install -U -r requirements.txt - $ git add . - $ git commit -m "add code" +$ pip install -U -r requirements.txt +$ git add . +$ git commit -m "add code" ``` @@ -64,10 +64,10 @@ command transforms it into a reproducible **stage** for the ML **pipeline** (describes in the next chapter). ```dvc - $ dvc run -f prepare.dvc \ - -d src/prepare.py -d data/data.xml \ - -o data/prepared \ - python src/prepare.py data/data.xml +$ dvc run -f prepare.dvc \ + -d src/prepare.py -d data/data.xml \ + -o data/prepared \ + python src/prepare.py data/data.xml ``` `dvc run` generates the `prepare.dvc` file. It has the same @@ -86,18 +86,18 @@ This is how the result should look like now: ```diff . ├── data - │   ├── data.xml - │   ├── data.xml.dvc -+ │   └── prepared -+ │   ├── test.tsv -+ │   └── train.tsv + │ ├── data.xml + │ ├── data.xml.dvc ++ │ └── prepared ++ │ ├── test.tsv ++ │ └── train.tsv + ├── prepare.dvc ├── requirements.txt └── src -    ├── evaluate.py -    ├── featurization.py -    ├── prepare.py -  └── train.py + ├── evaluate.py + ├── featurization.py + ├── prepare.py + └── train.py ``` This is how `prepare.dvc` looks like internally: diff --git a/static/docs/get-started/example-pipeline.md b/static/docs/get-started/example-pipeline.md index c7c1d764cb..55a85bdadd 100644 --- a/static/docs/get-started/example-pipeline.md +++ b/static/docs/get-started/example-pipeline.md @@ -35,29 +35,29 @@ just use your browser to download `code.zip`. ```dvc - $ mkdir example - $ cd example - $ git init - $ wget https://dvc.org/s3/examples/so/code.zip - $ unzip code.zip - $ rm -f code.zip - $ git add code/ - $ git commit -m "download and initialize code" +$ mkdir example +$ cd example +$ git init +$ wget https://dvc.org/s3/examples/so/code.zip +$ unzip code.zip +$ rm -f code.zip +$ git add code/ +$ git commit -m "download and initialize code" ``` (Optional) It's highly recommended to initialize a virtual environment to keep your global packages clean and untouched: ```dvc - $ virtualenv .env - $ source .env/bin/activate - $ echo ".env/" >> .gitignore +$ virtualenv .env +$ source .env/bin/activate +$ echo ".env/" >> .gitignore ``` Install the required dependencies: ```dvc - $ pip install -r code/requirements.txt +$ pip install -r code/requirements.txt ``` Then, we are creating the pipeline step-by-step, utilizing the same set of @@ -66,17 +66,17 @@ commands that are described in the [get started](/doc/get-started) chapters. * Initialize DVC repository (run it inside your Git repository): ```dvc - $ dvc init - $ git commit -m "initialize DVC" +$ dvc init +$ git commit -m "initialize DVC" ``` * Download an input data set to the `data` directory and take it under DVC control: ```dvc - $ mkdir data - $ wget -P data https://dvc.org/s3/examples/so/Posts.xml.zip - $ dvc add data/Posts.xml.zip +$ mkdir data +$ wget -P data https://dvc.org/s3/examples/so/Posts.xml.zip +$ dvc add data/Posts.xml.zip ```
@@ -114,8 +114,8 @@ It's enough to run `dvc checkout` or `dvc pull` to restore data files. * Commit the data file meta-information to Git repository: ```dvc - $ git add data/Posts.xml.zip.dvc data/.gitignore - $ git commit -m "add dataset" +$ git add data/Posts.xml.zip.dvc data/.gitignore +$ git commit -m "add dataset" ``` ## Define steps @@ -129,10 +129,10 @@ run `dvc add` on `Posts.xml`, `dvc run` saves (commits into the cache, takes the file under DVC control) automatically: ```dvc - $ dvc run -d data/Posts.xml.zip \ - -o data/Posts.xml \ - -f extract.dvc \ - unzip data/Posts.xml.zip -d data +$ dvc run -d data/Posts.xml.zip \ + -o data/Posts.xml \ + -f extract.dvc \ + unzip data/Posts.xml.zip -d data ```
@@ -174,50 +174,50 @@ your actual workspace without copying every time object from/to the cache. * Next step, let's convert XML into TSV to make feature extraction easier: ```dvc - $ dvc run -d code/xml_to_tsv.py -d data/Posts.xml \ - -o data/Posts.tsv \ - -f prepare.dvc \ - python code/xml_to_tsv.py data/Posts.xml data/Posts.tsv +$ dvc run -d code/xml_to_tsv.py -d data/Posts.xml \ + -o data/Posts.tsv \ + -f prepare.dvc \ + python code/xml_to_tsv.py data/Posts.xml data/Posts.tsv ``` * Split training and test data sets. Here `0.2` is a test dataset split ratio, `20170426` is a seed for randomization. There are two output files: ```dvc - $ dvc run -d code/split_train_test.py -d data/Posts.tsv \ - -o data/Posts-train.tsv -o data/Posts-test.tsv \ - -f split.dvc \ - python code/split_train_test.py data/Posts.tsv 0.2 20170426 \ - data/Posts-train.tsv data/Posts-test.tsv +$ dvc run -d code/split_train_test.py -d data/Posts.tsv \ + -o data/Posts-train.tsv -o data/Posts-test.tsv \ + -f split.dvc \ + python code/split_train_test.py data/Posts.tsv 0.2 20170426 \ + data/Posts-train.tsv data/Posts-test.tsv ``` * Extract features and labels from the data. Two TSV as inputs with two pickle matrices as outputs: ```dvc - $ dvc run -d code/featurization.py -d data/Posts-train.tsv -d data/Posts-test.tsv \ - -o data/matrix-train.pkl -o data/matrix-test.pkl \ - -f featurize.dvc \ - python code/featurization.py data/Posts-train.tsv data/Posts-test.tsv \ +$ dvc run -d code/featurization.py -d data/Posts-train.tsv -d data/Posts-test.tsv \ + -o data/matrix-train.pkl -o data/matrix-test.pkl \ + -f featurize.dvc \ + python code/featurization.py data/Posts-train.tsv data/Posts-test.tsv \ data/matrix-train.pkl data/matrix-test.pkl ``` * Train ML model on the training data set. 20170426 is a seed value here: ```dvc - $ dvc run -d code/train_model.py -d data/matrix-train.pkl \ - -o data/model.pkl \ - -f train.dvc \ - python code/train_model.py data/matrix-train.pkl 20170426 data/model.pkl +$ dvc run -d code/train_model.py -d data/matrix-train.pkl \ + -o data/model.pkl \ + -f train.dvc \ + python code/train_model.py data/matrix-train.pkl 20170426 data/model.pkl ``` * Finally, evaluate the model on the test data set and get the metrics file: ```dvc - $ dvc run -d code/evaluate.py -d data/model.pkl -d data/matrix-test.pkl \ - -M auc.metric \ - -f evaluate.dvc \ - python code/evaluate.py data/model.pkl data/matrix-test.pkl auc.metric +$ dvc run -d code/evaluate.py -d data/model.pkl -d data/matrix-test.pkl \ + -M auc.metric \ + -f evaluate.dvc \ + python code/evaluate.py data/model.pkl data/matrix-test.pkl auc.metric ```
@@ -286,9 +286,9 @@ depending on Python version you are using and other environment parameters. * An easy way to see metrics across different branches: ```dvc - $ dvc metrics show +$ dvc metrics show - auc.metric: 0.620091 + auc.metric: 0.620091 ``` It's time to save the pipeline. You can check using `git status` command that we @@ -297,8 +297,8 @@ a snapshot of the DVC files that describe data and code versions and relationships between them. ```dvc - $ git add *.dvc auc.metric - $ git commit -am "create pipeline" +$ git add *.dvc auc.metric +$ git commit -am "create pipeline" ``` ## Reproduce @@ -310,7 +310,7 @@ files have been modified. For example: the `code/featurization.py`: ```dvc - $ vi code/featurization.py +$ vi code/featurization.py ``` Specify `ngram` parameter in `CountVectorizer` (lines 72–73): @@ -324,7 +324,7 @@ Specify `ngram` parameter in `CountVectorizer` (lines 72–73): * Reproduce all required steps to get our target metrics file: ```dvc - $ dvc repro evaluate.dvc +$ dvc repro evaluate.dvc ``` > Since the data set for this example is extremely simplified to make it @@ -334,9 +334,9 @@ depending on the Python version you are using and other environment parameters. * Take a look at the target metric improvement: ```dvc - $ dvc metrics show -a - master: - auc.metric: 0.666618 +$ dvc metrics show -a +master: + auc.metric: 0.666618 ``` ## Conclusion diff --git a/static/docs/get-started/example-versioning.md b/static/docs/get-started/example-versioning.md index 9dfa6b190d..057f5cfc92 100644 --- a/static/docs/get-started/example-versioning.md +++ b/static/docs/get-started/example-versioning.md @@ -40,8 +40,8 @@ Okay, let's first download the code and set up a Git repository. This step has nothing to do with DVC so far, it's just a simple preparation: ```dvc - $ git clone https://github.com/iterative/example-versioning.git - $ cd example-versioning +$ git clone https://github.com/iterative/example-versioning.git +$ cd example-versioning ``` This command pulls a repository with a single script `train.py` that runs the @@ -51,15 +51,15 @@ training. your global packages clean and untouched: ```dvc - $ virtualenv .env - $ source .env/bin/activate - $ echo ".env/" >> .gitignore +$ virtualenv .env +$ source .env/bin/activate +$ echo ".env/" >> .gitignore ``` Install required dependencies: ```dvc - $ pip install -r requirements.txt +$ pip install -r requirements.txt ``` **Note!** As of October 2018, Tensorflow does not support Python 3.7. Model @@ -99,9 +99,9 @@ install `wget` and `tar` with the following command in the `Command Prompt`:
```dvc - $ wget https://dvc.org/s3/examples/versioning/data.zip - $ unzip data.zip - $ rm -f data.zip +$ wget https://dvc.org/s3/examples/versioning/data.zip +$ unzip data.zip +$ rm -f data.zip ``` This command downloads and extracts our initial dataset - **1000 labeled @@ -133,7 +133,7 @@ it's a 43 MB dataset, with a directory structure like this: Let's capture the current state of this dataset with `dvc add`: ```dvc - $ dvc add data +$ dvc add data ``` This command should be used instead of `git add` on files or directories that @@ -152,17 +152,17 @@ metrics history. The simplest way to capture the current version of the model is to use `dvc add` again: ```dvc - $ python train.py - $ dvc add model.h5 +$ python train.py +$ dvc add model.h5 ``` The recommended way of capturing script outputs is using `dvc run`. We'll touch it a little bit later. For now, let's commit the current state: ```dvc - $ git add .gitignore model.h5.dvc data.dvc metrics.json - $ git commit -m "model first version, 1000 images" - $ git tag -a "v1.0" -m "model v1.0, 1000 images" +$ git add .gitignore model.h5.dvc data.dvc metrics.json +$ git commit -m "model first version, 1000 images" +$ git tag -a "v1.0" -m "model v1.0, 1000 images" ```
@@ -187,9 +187,9 @@ Let's imagine that our images dataset is growing, we were able to double it. Next command extracts 500 cat and 500 dog images into `data/train`: ```dvc - $ wget https://dvc.org/s3/examples/versioning/new-labels.zip - $ unzip new-labels.zip - $ rm -f new-labels.zip +$ wget https://dvc.org/s3/examples/versioning/new-labels.zip +$ unzip new-labels.zip +$ rm -f new-labels.zip ``` For simplicity we keep the validation dataset the same. Now our dataset has @@ -221,10 +221,10 @@ For simplicity we keep the validation dataset the same. Now our dataset has Of course, we want to leverage these new labels and train the model again. ```dvc - $ dvc add data - $ dvc remove model.h5.dvc - $ python train.py - $ dvc add model.h5 +$ dvc add data +$ dvc remove model.h5.dvc +$ python train.py +$ dvc add model.h5 ``` Note! `dvc remove` or `dvc unprotect` is required, otherwise `python train.py` @@ -234,9 +234,9 @@ will overwrite the existing file and may corrupt the cached version. Check this Let's commit the second version: ```dvc - $ git add model.h5.dvc data.dvc metrics.json - $ git commit -m "model second version, 2000 images" - $ git tag -a "v2.0" -m "model v2.0, 2000 images" +$ git add model.h5.dvc data.dvc metrics.json +$ git commit -m "model second version, 2000 images" +$ git tag -a "v2.0" -m "model v2.0, 2000 images" ``` That's it, we have a second model and dataset saved and pointers to them @@ -258,8 +258,8 @@ specific data or mode file. Let's consider the full checkout first. It's quite straightforward: ```dvc - $ git checkout v1.0 - $ dvc checkout +$ git checkout v1.0 +$ dvc checkout ``` These commands will restore the working tree to the first snapshot we made - @@ -272,8 +272,8 @@ the previous dataset only, we can do something like this (make sure that you don't have some uncommitted changes in the `data.dvc`): ```dvc - $ git checkout v1.0 data.dvc - $ dvc checkout data.dvc +$ git checkout v1.0 data.dvc +$ dvc checkout data.dvc ``` If you run `git status` you will see that `data.dvc` is modified and currently @@ -312,12 +312,12 @@ example, you should have noticed, that `train.py` produces binary files (e.g. produces other data files, a better way to capture them is to use `dvc run`:** ```dvc - $ dvc remove -p model.h5.dvc - $ dvc run -f Dvcfile \ - -d train.py -d data \ - -M metrics.json \ - -o model.h5 -o bottleneck_features_train.npy -o bottleneck_features_validation.npy \ - python train.py +$ dvc remove -p model.h5.dvc +$ dvc run -f Dvcfile \ + -d train.py -d data \ + -M metrics.json \ + -o model.h5 -o bottleneck_features_train.npy -o bottleneck_features_validation.npy \ + python train.py ``` Similar to `dvc add`, `dvc run` creates a single DVC file (`Dvcfile` in this diff --git a/static/docs/get-started/experiments.md b/static/docs/get-started/experiments.md index b181d55ee3..4a6586f8b5 100644 --- a/static/docs/get-started/experiments.md +++ b/static/docs/get-started/experiments.md @@ -24,9 +24,9 @@ and increase number of features: ```dvc - $ vi featurization.py # edit to use bigrams (see above) - $ dvc repro train.dvc # get and save the new model.pkl - $ git commit -a -m "bigram model" +$ vi featurization.py # edit to use bigrams (see above) +$ dvc repro train.dvc # get and save the new model.pkl +$ git commit -a -m "bigram model" ``` Now, we have a new `model.pkl` captured and saved. To get back to the initial diff --git a/static/docs/get-started/initialize.md b/static/docs/get-started/initialize.md index 1034fa66ba..fd4235fd5d 100644 --- a/static/docs/get-started/initialize.md +++ b/static/docs/get-started/initialize.md @@ -9,16 +9,16 @@ If you don't have a directory for your project already, create it now with these commands: ```dvc - $ mkdir example-get-started && cd example-get-started - $ git init +$ mkdir example-get-started && cd example-get-started +$ git init ``` Run DVC initialization in a repository directory to create DVC metafiles and directories: ```dvc - $ dvc init - $ git commit -m "initialize DVC" +$ dvc init +$ git commit -m "initialize DVC" ``` After DVC initialization, a new directory `.dvc/` will be created with `config` diff --git a/static/docs/get-started/metrics.md b/static/docs/get-started/metrics.md index 6a493b97f2..ecfe7f6082 100644 --- a/static/docs/get-started/metrics.md +++ b/static/docs/get-started/metrics.md @@ -8,11 +8,11 @@ code to use some API, all is tracked by Git and is stored in Git or DVC remote storage: ```dvc - $ dvc run -f evaluate.dvc \ - -d src/evaluate.py -d model.pkl -d data/features \ - -M auc.metric \ - python src/evaluate.py model.pkl \ - data/features auc.metric +$ dvc run -f evaluate.dvc \ + -d src/evaluate.py -d model.pkl -d data/features \ + -M auc.metric \ + python src/evaluate.py model.pkl \ + data/features auc.metric ``` `evaluate.py` calculates AUC value using the test data set. It reads features @@ -26,9 +26,9 @@ options and details. Let's again commit and save results: ```dvc - $ git add evaluate.dvc auc.metric - $ git commit -m "add evaluation step to the pipeline" - $ dvc push +$ git add evaluate.dvc auc.metric +$ git commit -m "add evaluation step to the pipeline" +$ dvc push ``` Let's also assign a Git tag, it will serve as a checkpoint for us to compare @@ -36,7 +36,7 @@ experiments in the future, or if we need to go back and checkout it and the corresponding data: ```dvc - $ git tag -a "baseline-experiment" -m "baseline" +$ git tag -a "baseline-experiment" -m "baseline" ``` The `dvc metrics show` command provides a way to compare different experiments, diff --git a/static/docs/get-started/older-versions.md b/static/docs/get-started/older-versions.md index 8e727ca667..54429eb312 100644 --- a/static/docs/get-started/older-versions.md +++ b/static/docs/get-started/older-versions.md @@ -11,8 +11,8 @@ process of switching between different data versions in the Let's say we want to get the previous `model.pkl` file. The short answer is: ```dvc - $ git checkout baseline-experiment train.dvc - $ dvc checkout train.dvc +$ git checkout baseline-experiment train.dvc +$ dvc checkout train.dvc ``` These two commands will bring the previous model file to its place in the @@ -44,8 +44,8 @@ To fully restore the previous experiment we just run `git checkout` and `dvc checkout` without specifying a target: ```dvc - $ git checkout baseline-experiment - $ dvc checkout +$ git checkout baseline-experiment +$ dvc checkout ``` Read `dvc checkout` command reference and a dedicated data versioning diff --git a/static/docs/get-started/pipeline.md b/static/docs/get-started/pipeline.md index 2c09d08f20..219beebbe4 100644 --- a/static/docs/get-started/pipeline.md +++ b/static/docs/get-started/pipeline.md @@ -10,29 +10,29 @@ The second stage (after the `prepare.dvc` that we created during the previous step), feature extraction: ```dvc - $ dvc run -f featurize.dvc \ - -d src/featurization.py -d data/prepared \ - -o data/features \ - python src/featurization.py \ - data/prepared data/features +$ dvc run -f featurize.dvc \ + -d src/featurization.py -d data/prepared \ + -o data/features \ + python src/featurization.py \ + data/prepared data/features ``` The third stage, training: ```dvc - $ dvc run -f train.dvc \ - -d src/train.py -d data/features \ - -o model.pkl \ - python src/train.py data/features model.pkl +$ dvc run -f train.dvc \ + -d src/train.py -d data/features \ + -o model.pkl \ + python src/train.py data/features model.pkl ``` Let's commit DVC files that describe our pipeline so far: ```dvc - $ git add data/.gitignore .gitignore featurize.dvc train.dvc - $ git commit -m "add featurization and train steps to the pipeline" - $ dvc push -``` +$ git add data/.gitignore .gitignore featurize.dvc train.dvc +$ git commit -m "add featurization and train steps to the pipeline" +$ dvc push + ``` This example is simplified just to show you an idea of the pipeline, check [example](/doc/get-started/example-pipeline) or complete diff --git a/static/docs/get-started/reproduce.md b/static/docs/get-started/reproduce.md index d0c15366f8..8e8f1e17f4 100644 --- a/static/docs/get-started/reproduce.md +++ b/static/docs/get-started/reproduce.md @@ -12,7 +12,7 @@ It's now extremely easy for you or anyone in your team to reproduce the result end-to-end: ```dvc - $ dvc repro train.dvc +$ dvc repro train.dvc ``` `train.dvc` file internally describes what data files and code we should take diff --git a/static/docs/get-started/retrieve-data.md b/static/docs/get-started/retrieve-data.md index a307211ae5..21611a2991 100644 --- a/static/docs/get-started/retrieve-data.md +++ b/static/docs/get-started/retrieve-data.md @@ -3,7 +3,7 @@ To retrieve data files to your local machine and your project's workspace run: ```dvc - $ dvc pull +$ dvc pull ``` This command retrieves data files that are referenced in _all_ `.dvc` files in @@ -13,8 +13,8 @@ the current workspace. So, you usually run it after `git clone`, `git pull`, or As an easy way to test it: ```dvc - $ rm -f data/data.xml - $ dvc pull +$ rm -f data/data.xml +$ dvc pull ``` > Note, make sure that the steps described in [initialization](/doc/get-started/initialize) and @@ -25,7 +25,7 @@ repository. Alternatively, if you want to retrieve a single dataset or a file: ```dvc - $ dvc pull data.xml.dvc +$ dvc pull data.xml.dvc ``` DVC remotes, `dvc push`, and `dvc pull` provide a basic collaboration workflow, diff --git a/static/docs/get-started/share-data.md b/static/docs/get-started/share-data.md index e04688a254..6c274d8d08 100644 --- a/static/docs/get-started/share-data.md +++ b/static/docs/get-started/share-data.md @@ -5,7 +5,7 @@ Now, that your data files are managed by DVC (see to the default [remote](/doc/commands-reference/remote) storage*: ```dvc - $ dvc push +$ dvc push ``` The same way as with Git remote, it ensures that your data files and your models diff --git a/static/docs/get-started/visualize.md b/static/docs/get-started/visualize.md index 6a73bbb969..1f6baa6a26 100644 --- a/static/docs/get-started/visualize.md +++ b/static/docs/get-started/visualize.md @@ -11,74 +11,74 @@ command supports (e.g. `.dot` files that can be used then in other tools). ## Stages ```dvc - $ dvc pipeline show --ascii train.dvc - +-------------------+ - | data/data.xml.dvc | - +-------------------+ - * - * - * - +-------------+ - | prepare.dvc | - +-------------+ - * - * - * - +---------------+ - | featurize.dvc | - +---------------+ - * - * - * - .---------------. - | model.pkl.dvc | - `---------------' +$ dvc pipeline show --ascii train.dvc + +-------------------+ + | data/data.xml.dvc | + +-------------------+ + * + * + * + +-------------+ + | prepare.dvc | + +-------------+ + * + * + * + +---------------+ + | featurize.dvc | + +---------------+ + * + * + * + .---------------. + | model.pkl.dvc | + `---------------' ``` ## Commands ```dvc - $ dvc pipeline show --ascii train.dvc --commands - +-------------------------------------+ - | python src/prepare.py data/data.xml | - +-------------------------------------+ - * - * - * - +---------------------------------------------------------+ - | python src/featurization.py data/prepared data/features | - +---------------------------------------------------------+ - * - * - * - +---------------------------------------------+ - | python src/train.py data/features model.pkl | - +---------------------------------------------+ +$ dvc pipeline show --ascii train.dvc --commands + +-------------------------------------+ + | python src/prepare.py data/data.xml | + +-------------------------------------+ + * + * + * + +---------------------------------------------------------+ + | python src/featurization.py data/prepared data/features | + +---------------------------------------------------------+ + * + * + * + +---------------------------------------------+ + | python src/train.py data/features model.pkl | + +---------------------------------------------+ ``` ## Outputs ```dvc - $ dvc pipeline show --ascii train.dvc --outs - +---------------+ - | data/data.xml | - +---------------+ - * - * - * - +---------------+ - | data/prepared | - +---------------+ - * - * - * - +---------------+ - | data/features | - +---------------+ - * - * - * - +-----------+ - | model.pkl | - +-----------+ +$ dvc pipeline show --ascii train.dvc --outs + +---------------+ + | data/data.xml | + +---------------+ + * + * + * + +---------------+ + | data/prepared | + +---------------+ + * + * + * + +---------------+ + | data/features | + +---------------+ + * + * + * + +-----------+ + | model.pkl | + +-----------+ ``` From f247b58f560662c5ca7d839634180498e84cc004 Mon Sep 17 00:00:00 2001 From: Kurian Benoy Date: Tue, 14 May 2019 06:59:57 +0530 Subject: [PATCH 3/3] Change Padding size attribute for code blocks --- src/Documentation/Markdown/Markdown.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Documentation/Markdown/Markdown.js b/src/Documentation/Markdown/Markdown.js index 2816336a38..fcaa584c7c 100644 --- a/src/Documentation/Markdown/Markdown.js +++ b/src/Documentation/Markdown/Markdown.js @@ -61,7 +61,8 @@ const HtmlRenderer = props => { const CodeBlock = ({ value, language }) => { const dvcStyle = Object.assign({}, docco) dvcStyle['hljs-comment'] = { color: '#999' } - dvcStyle['hljs-meta'] = { color: '#333', fontSize: '14px', paddingLeft: '8em' } + dvcStyle['hljs-meta'] = { color: '#333', fontSize: '14px' } + dvcStyle['hljs']['padding'] = '0.5em 0.5em 0.5em 2em' return ( {value}