diff --git a/.travis/test.sh b/.travis/test.sh
index 60356a7d74ac..40ab63bbd65b 100644
--- a/.travis/test.sh
+++ b/.travis/test.sh
@@ -30,8 +30,10 @@ if [[ ${TASK} == "check-docs" ]]; then
     sudo apt-get install linkchecker
     pip install rstcheck  # html5validator
     pip install -r requirements.txt
-    rstcheck --ignore-directives=autoclass,autofunction `find . -type f -name "*.rst"` || exit -1
+    rstcheck --report warning --ignore-directives=autoclass,autofunction `find . -type f -name "*.rst"` || exit -1
     make html || exit -1
+    find ./_build/html/ -type f -name '*.html' -exec \
+    sed -i -e 's#\(\.\/[^.]*\.\)\(md\|rst\)#\1html#g' {} \;  # Emulate js function
 #    html5validator --root ./_build/html/ || exit -1  For future (Sphinx 1.6) usage
     linkchecker --config=.linkcheckerrc ./_build/html/*.html || exit -1
     exit 0
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 076e03895a26..30fcf4299307 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -21,7 +21,7 @@ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
     message(FATAL_ERROR "Insufficient Clang version")
   endif()
 elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
-    message(FATAL_ERROR "AppleClang wasn't supported. Please see https://github.com/Microsoft/LightGBM/wiki/Installation-Guide#osx")
+    message(FATAL_ERROR "AppleClang wasn't supported. Please see https://github.com/Microsoft/LightGBM/blob/master/docs/Installation-Guide.rst#osx")
 endif()
 
 if(APPLE)
diff --git a/R-package/README.md b/R-package/README.md
index aeecf541acf1..25ade4070db5 100644
--- a/R-package/README.md
+++ b/R-package/README.md
@@ -24,7 +24,7 @@ For users who wants to install online with GPU or want to choose a specific comp
 
 #### Mac OS X Preparation
 
-gcc with OpenMP support must be installed first. Refer to [wiki](https://github.com/Microsoft/LightGBM/wiki/Installation-Guide#osx) for installing gcc with OpenMP support.
+gcc with OpenMP support must be installed first. Refer to [Installation-Guide](https://github.com/Microsoft/LightGBM/blob/master/docs/Installation-Guide.rst#osx) for installing gcc with OpenMP support.
 
 ### Install
 
@@ -51,7 +51,7 @@ Note: for the build with Visual Studio/MSBuild in Windows, you should use the Wi
 
 Windows users may need to run with administrator rights (either R or the command prompt, depending on the way you are installing this package). Linux users might require the appropriate user write permissions for packages.
 
-Set `use_gpu` to `TRUE` in `R-package/src/install.libs.R` to enable the build with GPU support. You will need to install Boost and OpenCL first: details for installation can be found in [gpu-support](https://github.com/Microsoft/LightGBM/wiki/Installation-Guide#with-gpu-support).
+Set `use_gpu` to `TRUE` in `R-package/src/install.libs.R` to enable the build with GPU support. You will need to install Boost and OpenCL first: details for installation can be found in [Installation-Guide](https://github.com/Microsoft/LightGBM/blob/master/docs/Installation-Guide.rst#build-gpu-version).
 
 You can also install directly from R using the repository with `devtools`:
 
@@ -74,7 +74,7 @@ params <- list(objective="regression", metric="l2")
 model <- lgb.cv(params, dtrain, 10, nfold=5, min_data=1, learning_rate=1, early_stopping_rounds=10)
 ```
 
-Installation with precompiled dll/lib from R using GitHub
+Installation with Precompiled dll/lib from R Using GitHub
 ---------------------------------------------------------
 
 You can install LightGBM R-package from GitHub with devtools thanks to a helper package for LightGBM.
@@ -122,7 +122,9 @@ lgb.dl(commit = "master",
        use_gpu = TRUE)
 ```
 
-For more details about options, please check [Laurae2/lgbdl](https://github.com/Laurae2/lgbdl/) R-package. You may also read [Microsoft/LightGBM#912](https://github.com/Microsoft/LightGBM/issues/912#issuecomment-329496254) for a visual example for LightGBM in Windows with Visual Studio.
+For more details about options, please check [Laurae2/lgbdl](https://github.com/Laurae2/lgbdl/) R-package.
+
+You may also read [Microsoft/LightGBM#912](https://github.com/Microsoft/LightGBM/issues/912#issuecomment-329496254) for a visual example for LightGBM installation in Windows with Visual Studio.
 
 Examples
 --------
diff --git a/README.md b/README.md
index 728381cba3b3..c0965ca763cf 100644
--- a/README.md
+++ b/README.md
@@ -18,9 +18,9 @@ LightGBM is a gradient boosting framework that uses tree based learning algorith
 - Parallel and GPU learning supported
 - Capable of handling large-scale data
 
-For more details, please refer to [Features](https://github.com/Microsoft/LightGBM/wiki/Features).
+For more details, please refer to [Features](https://github.com/Microsoft/LightGBM/blob/master/docs/Features.md).
 
-[Experiments](https://github.com/Microsoft/LightGBM/wiki/Experiments#comparison-experiment) on public datasets show that LightGBM can outperform existing boosting frameworks on both efficiency and accuracy, with significantly lower memory consumption. What's more, the [experiments](https://github.com/Microsoft/LightGBM/wiki/Experiments#parallel-experiment) show that LightGBM can achieve a linear speed-up by using multiple machines for training in specific settings.
+[Comparison experiments](https://github.com/Microsoft/LightGBM/blob/master/docs/Experiments.rst#comparison-experiment) on public datasets show that LightGBM can outperform existing boosting frameworks on both efficiency and accuracy, with significantly lower memory consumption. What's more, the [parallel experiments](https://github.com/Microsoft/LightGBM/blob/master/docs/Experiments.rst#parallel-experiment) show that LightGBM can achieve a linear speed-up by using multiple machines for training in specific settings.
 
 News
 ----
@@ -45,7 +45,7 @@ News
 
 12/05/2016 : **Categorical Features as input directly** (without one-hot coding). 
 
-12/02/2016 : Release [**python-package**](https://github.com/Microsoft/LightGBM/tree/master/python-package) beta version, welcome to have a try and provide feedback.
+12/02/2016 : Release [**Python-package**](https://github.com/Microsoft/LightGBM/tree/master/python-package) beta version, welcome to have a try and provide feedback.
 
 More detailed update logs : [Key Events](https://github.com/Microsoft/LightGBM/blob/master/docs/Key-Events.md).
 
@@ -61,16 +61,16 @@ JPMML: https://github.com/jpmml/jpmml-lightgbm
 Get Started and Documentation
 -----------------------------
 
-Install by following the guide for the [command line program](https://github.com/Microsoft/LightGBM/wiki/Installation-Guide), [Python package](https://github.com/Microsoft/LightGBM/tree/master/python-package) or [R-package](https://github.com/Microsoft/LightGBM/tree/master/R-package). Then please see the [Quick Start](https://github.com/Microsoft/LightGBM/wiki/Quick-Start) guide.
+Install by following the [guide](https://github.com/Microsoft/LightGBM/blob/master/docs/Installation-Guide.rst) for the command line program, [Python-package](https://github.com/Microsoft/LightGBM/tree/master/python-package) or [R-package](https://github.com/Microsoft/LightGBM/tree/master/R-package). Then please see the [Quick Start](https://github.com/Microsoft/LightGBM/blob/master/docs/Quick-Start.md) guide.
 
 Our primary documentation is at https://lightgbm.readthedocs.io/ and is generated from this repository.
 
-Next you will want to read:
+Next you may want to read:
 
 * [**Examples**](https://github.com/Microsoft/LightGBM/tree/master/examples) showing command line usage of common tasks
-* [**Features**](https://github.com/Microsoft/LightGBM/wiki/Features) and algorithms supported by LightGBM
+* [**Features**](https://github.com/Microsoft/LightGBM/blob/master/docs/Features.md) and algorithms supported by LightGBM
 * [**Parameters**](https://github.com/Microsoft/LightGBM/blob/master/docs/Parameters.md) is an exhaustive list of customization you can make
-* [**Parallel Learning**](https://github.com/Microsoft/LightGBM/wiki/Parallel-Learning-Guide) and [**GPU Learning**](https://github.com/Microsoft/LightGBM/blob/master/docs/GPU-Tutorial.md) can speed up computation
+* [**Parallel Learning**](https://github.com/Microsoft/LightGBM/blob/master/docs/Parallel-Learning-Guide.rst) and [**GPU Learning**](https://github.com/Microsoft/LightGBM/blob/master/docs/GPU-Tutorial.md) can speed up computation
 * [**Laurae++ interactive documentation**](https://sites.google.com/view/lauraepp/parameters) is a detailed guide for hyperparameters
 
 Documentation for contributors:
@@ -83,7 +83,7 @@ Support
 
 * Ask a question [on Stack Overflow with the `lightgbm` tag ](https://stackoverflow.com/questions/ask?tags=lightgbm), we monitor this for new questions.
 * Discuss on the [LightGBM Gitter](https://gitter.im/Microsoft/LightGBM).
-* Open **bug reports** and **feature requests** (not questions) on [Github issues](https://github.com/Microsoft/LightGBM/issues).
+* Open **bug reports** and **feature requests** (not questions) on [GitHub issues](https://github.com/Microsoft/LightGBM/issues).
 
 How to Contribute
 -----------------
diff --git a/docker/README.md b/docker/README.md
index 1a2491b049b2..3d90c3537985 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -8,14 +8,14 @@ Follow the general installation instructions
 [on the Docker site](https://docs.docker.com/installation/):
 
 * [OSX](https://docs.docker.com/installation/mac/): [docker toolbox](https://www.docker.com/toolbox)
-* [ubuntu](https://docs.docker.com/installation/ubuntulinux/)
+* [Ubuntu](https://docs.docker.com/installation/ubuntulinux/)
 
-## Running the container
+## Running the Container
 
-Build the container, for python users: 
+Build the container, for python users:
 
-    $ docker build -t lightgbm -f dockerfile-python .
+    docker build -t lightgbm -f dockerfile-python .
 
 After build finished, run the container:
 
-    $ docker run --rm -it lightgbm
+    docker run --rm -it lightgbm
diff --git a/docker/gpu/README.md b/docker/gpu/README.md
index 928269e1a2a8..33de9d5ec894 100644
--- a/docker/gpu/README.md
+++ b/docker/gpu/README.md
@@ -1,6 +1,9 @@
-# Dockerfile for LightGBM supporting GPU with Python
-A docker file with lightgbm utilizing nvidia-docker. The file is based on the nvidia/cuda:8.0 image. lightgbm can be utilized in gpu and cpu modes and via python (2.7 & 3.5)
-### Contents
+# Dockerfile for LightGBM GPU Version with Python
+
+A docker file with LightGBM utilizing nvidia-docker. The file is based on the nvidia/cuda:8.0 image. LightGBM can be utilized in GPU and CPU modes and via Python (2.7 & 3.5)
+
+## Contents
+
 - LightGBM (cpu + gpu)
 - Python 2.7 (Conda) + scikit-learn notebooks pandas matplotlib
 - Python 3.5 (Conda) + scikit-learn notebooks pandas matplotlib
@@ -8,29 +11,36 @@ A docker file with lightgbm utilizing nvidia-docker. The file is based on the nv
 Running the container starts a jupyter notebook at localhost:8888
 
 jupyter password: keras
-### Requirements
+
+## Requirements
+
 Requires docker and [nvidia-docker](https://github.com/NVIDIA/nvidia-docker) on host machine.
-### Quickstart
 
-##### Build Docker Image
+## Quickstart
+
+### Build Docker Image
+
 ```sh
 mkdir lightgbm-docker
 cd lightgbm-docker
 wget https://github.com/Microsoft/LightGBM/blob/master/docker/gpu/dockerfile.gpu
-cd lightgbm-docker
 docker build -f dockerfile.gpu -t lightgbm-gpu .
 ```
-##### Run Image
+
+### Run Image
+
 ```sh
 nvidia-docker run --rm -d --name lightgbm-gpu -p 8888:8888 -v /home:/home lightgbm-gpu
 ```
 
-##### Attach with Command Line Access (if required)
+### Attach with Command Line Access (if required)
+
 ```sh
 docker exec -it lightgbm-gpu bash
 ```
-##### Jupyter Notebook
+
+### Jupyter Notebook
+
 ```sh
 localhost:8888
 ```
-
diff --git a/docs/.linkcheckerrc b/docs/.linkcheckerrc
index f4d578cb2b2e..61a97253bd7b 100644
--- a/docs/.linkcheckerrc
+++ b/docs/.linkcheckerrc
@@ -7,7 +7,7 @@ sslverify=0
 ignorewarnings=http-robots-denied,https-certificate-error
 
 [output]
-# Set to 0 if you want see only warnings and errors
-verbose=1
+# Set to 1 if you want see the full output, not only warnings and errors
+verbose=0
 
 [AnchorCheck]
diff --git a/docs/Advanced-Topic.md b/docs/Advanced-Topic.md
index ec311a9ffbad..850be77fd6f2 100644
--- a/docs/Advanced-Topic.md
+++ b/docs/Advanced-Topic.md
@@ -31,4 +31,4 @@
 
 ## Parallel Learning
 
-* Refer to [Parallel Learning Guide](https://github.com/Microsoft/LightGBM/wiki/Parallel-Learning-Guide).
+* Refer to [Parallel Learning Guide](./Parallel-Learning-Guide.rst).
diff --git a/docs/Experiments.rst b/docs/Experiments.rst
new file mode 100644
index 000000000000..69c10363aabc
--- /dev/null
+++ b/docs/Experiments.rst
@@ -0,0 +1,243 @@
+Experiments
+===========
+
+Comparison Experiment
+---------------------
+
+For the detailed experiment scripts and output logs, please refer to this `repo`_.
+
+Data
+^^^^
+
+We use 4 datasets to conduct our comparison experiments. Details of data are listed in the following table:
+
++-------------+-------------------------+------------------------------------------------------------------------+-------------------+----------------+---------------------------------------------+
+| **Data**    | **Task**                | **Link**                                                               | **#Train\_Set**   | **#Feature**   | **Comments**                                |
++=============+=========================+========================================================================+===================+================+=============================================+
+| Higgs       | Binary classification   | `link <https://archive.ics.uci.edu/ml/datasets/HIGGS>`__               | 10,500,000        | 28             | use last 500,000 samples as test set        |
++-------------+-------------------------+------------------------------------------------------------------------+-------------------+----------------+---------------------------------------------+
+| Yahoo LTR   | Learning to rank        | `link <https://webscope.sandbox.yahoo.com/catalog.php?datatype=c>`__   | 473,134           | 700            | set1.train as train, set1.test as test      |
++-------------+-------------------------+------------------------------------------------------------------------+-------------------+----------------+---------------------------------------------+
+| MS LTR      | Learning to rank        | `link <http://research.microsoft.com/en-us/projects/mslr/>`__          | 2,270,296         | 137            | {S1,S2,S3} as train set, {S5} as test set   |
++-------------+-------------------------+------------------------------------------------------------------------+-------------------+----------------+---------------------------------------------+
+| Expo        | Binary classification   | `link <http://stat-computing.org/dataexpo/2009/>`__                    | 11,000,000        | 700            | use last 1,000,000 as test set              |
++-------------+-------------------------+------------------------------------------------------------------------+-------------------+----------------+---------------------------------------------+
+| Allstate    | Binary classification   | `link <https://www.kaggle.com/c/ClaimPredictionChallenge>`__           | 13,184,290        | 4228           | use last 1,000,000 as test set              |
++-------------+-------------------------+------------------------------------------------------------------------+-------------------+----------------+---------------------------------------------+
+
+Environment
+^^^^^^^^^^^
+
+We use one Linux server as experiment platform, details are listed in the following table:
+
++--------------------+-------------------+-----------------------+
+| **OS**             | **CPU**           | **Memory**            |
++====================+===================+=======================+
+| Ubuntu 14.04 LTS   | 2 \* E5-2670 v3   | DDR4 2133Mhz, 256GB   |
++--------------------+-------------------+-----------------------+
+
+Baseline
+^^^^^^^^
+
+We use `xgboost`_ as a baseline.
+
+Both xgboost and LightGBM are built with OpenMP support.
+
+Settings
+^^^^^^^^
+
+We set up total 3 settings for experiments, the parameters of these settings are:
+
+1. xgboost:
+
+   .. code::
+
+       eta = 0.1
+       max_depth = 8
+       num_round = 500
+       nthread = 16
+       tree_method = exact
+       min_child_weight = 100
+
+2. xgboost\_hist (using histogram based algorithm):
+
+   .. code::
+
+       eta = 0.1
+       num_round = 500
+       nthread = 16
+       tree_method = approx
+       min_child_weight = 100
+       tree_method = hist
+       grow_policy = lossguide
+       max_depth = 0
+       max_leaves = 255
+
+3. LightGBM:
+
+   .. code::
+
+       learning_rate = 0.1
+       num_leaves = 255
+       num_trees = 500
+       num_threads = 16
+       min_data_in_leaf = 0
+       min_sum_hessian_in_leaf = 100
+
+xgboost grows tree depth-wise and controls model complexity by ``max_depth``.
+LightGBM uses leaf-wise algorithm instead and controls model complexity by ``num_leaves``.
+So we cannot compare them in the exact same model setting. For the tradeoff, we use xgboost with ``max_depth=8``, which will have max number leaves to 255, to compare with LightGBM with ``num_leves=255``.
+
+Other parameters are default values.
+
+Result
+^^^^^^
+
+Speed
+'''''
+
+For speed comparison, we only run the training task, which is without any test or metric output. And we don't count the time for IO.
+
+The following table is the comparison of time cost:
+
++-------------+---------------+---------------------+------------------+
+| **Data**    | **xgboost**   | **xgboost\_hist**   | **LightGBM**     |
++=============+===============+=====================+==================+
+| Higgs       | 3794.34 s     | 551.898 s           | **238.505513 s** |
++-------------+---------------+---------------------+------------------+
+| Yahoo LTR   | 674.322 s     | 265.302 s           | **150.18644 s**  |
++-------------+---------------+---------------------+------------------+
+| MS LTR      | 1251.27 s     | 385.201 s           | **215.320316 s** |
++-------------+---------------+---------------------+------------------+
+| Expo        | 1607.35 s     | 588.253 s           | **138.504179 s** |
++-------------+---------------+---------------------+------------------+
+| Allstate    | 2867.22 s     | 1355.71 s           | **348.084475 s** |
++-------------+---------------+---------------------+------------------+
+
+We found LightGBM is faster than xgboost on all experiment data sets.
+
+Accuracy
+''''''''
+
+For accuracy comparison, we use the accuracy on test data set to have a fair comparison.
+
++-------------+-----------------+---------------+---------------------+----------------+
+| **Data**    | **Metric**      | **xgboost**   | **xgboost\_hist**   | **LightGBM**   |
++=============+=================+===============+=====================+================+
+| Higgs       | AUC             | 0.839593      | 0.845605            | 0.845154       |
++-------------+-----------------+---------------+---------------------+----------------+
+| Yahoo LTR   | NDCG\ :sub:`1`  | 0.719748      | 0.720223            | 0.732466       |
+|             +-----------------+---------------+---------------------+----------------+
+|             | NDCG\ :sub:`3`  | 0.717813      | 0.721519            | 0.738048       |
+|             +-----------------+---------------+---------------------+----------------+
+|             | NDCG\ :sub:`5`  | 0.737849      | 0.739904            | 0.756548       |
+|             +-----------------+---------------+---------------------+----------------+
+|             | NDCG\ :sub:`10` | 0.78089       | 0.783013            | 0.796818       |
++-------------+-----------------+---------------+---------------------+----------------+
+| MS LTR      | NDCG\ :sub:`1`  | 0.483956      | 0.488649            | 0.524255       |
+|             +-----------------+---------------+---------------------+----------------+
+|             | NDCG\ :sub:`3`  | 0.467951      | 0.473184            | 0.505327       |
+|             +-----------------+---------------+---------------------+----------------+
+|             | NDCG\ :sub:`5`  | 0.472476      | 0.477438            | 0.510007       |
+|             +-----------------+---------------+---------------------+----------------+
+|             | NDCG\ :sub:`10` | 0.492429      | 0.496967            | 0.527371       |
++-------------+-----------------+---------------+---------------------+----------------+
+| Expo        | AUC             | 0.756713      | 0.777777            | 0.777543       |
++-------------+-----------------+---------------+---------------------+----------------+
+| Allstate    | AUC             | 0.607201      | 0.609042            | 0.609167       |
++-------------+-----------------+---------------+---------------------+----------------+
+
+Memory Consumption
+''''''''''''''''''
+
+We monitor RES while running training task. And we set ``two_round=true`` (will increase data-loading time, but reduce peak memory usage, not affect training speed or accuracy) in LightGBM to reduce peak memory usage.
+
++-------------+---------------+---------------------+----------------+
+| **Data**    | **xgboost**   | **xgboost\_hist**   | **LightGBM**   |
++=============+===============+=====================+================+
+| Higgs       | 4.853GB       | 3.784GB             | **0.868GB**    |
++-------------+---------------+---------------------+----------------+
+| Yahoo LTR   | 1.907GB       | 1.468GB             | **0.831GB**    |
++-------------+---------------+---------------------+----------------+
+| MS LTR      | 5.469GB       | 3.654GB             | **0.886GB**    |
++-------------+---------------+---------------------+----------------+
+| Expo        | 1.553GB       | 1.393GB             | **0.543GB**    |
++-------------+---------------+---------------------+----------------+
+| Allstate    | 6.237GB       | 4.990GB             | **1.027GB**    |
++-------------+---------------+---------------------+----------------+
+
+Parallel Experiment
+-------------------
+
+Data
+^^^^
+
+We use a terabyte click log dataset to conduct parallel experiments. Details are listed in following table:
+
++------------+-------------------------+------------+-----------------+----------------+
+| **Data**   | **Task**                | **Link**   | **#Data**       | **#Feature**   |
++============+=========================+============+=================+================+
+| Criteo     | Binary classification   | `link`_    | 1,700,000,000   | 67             |
++------------+-------------------------+------------+-----------------+----------------+
+
+This data contains 13 integer features and 26 category features of 24 days click log.
+We statistic the CTR and count for these 26 category features from the first ten days,
+then use next ten days' data, which had been replaced the category features by the corresponding CTR and count, as training data.
+The processed training data hava total 1.7 billions records and 67 features.
+
+Environment
+^^^^^^^^^^^
+
+We use 16 Windows servers as experiment platform, details are listed in following table:
+
++----------------------+-----------------+----------------------+-------------------------------+
+| **OS**               | **CPU**         | **Memory**           | **Network Adapter**           |
++======================+=================+======================+===============================+
+| Windows Server 2012  | 2 * E5-2670 v2  | DDR3 1600Mhz, 256GB  | Mellanox ConnectX-3, 54Gbps,  |
+|                      |                 |                      | RDMA support                  |
++----------------------+-----------------+----------------------+-------------------------------+
+
+Settings
+^^^^^^^^
+
+.. code::
+
+    learning_rate = 0.1
+    num_leaves = 255
+    num_trees = 100
+    num_thread = 16
+    tree_learner = data
+
+We use data parallel here, since this data is large in ``#data`` but small in ``#feature``.
+
+Other parameters are default values.
+
+Result
+^^^^^^
+
++----------------+---------------------+---------------------------------+
+| **#Machine**   | **Time per Tree**   | **Memory Usage(per Machine)**   |
++================+=====================+=================================+
+| 1              | 627.8 s             | 176GB                           |
++----------------+---------------------+---------------------------------+
+| 2              | 311 s               | 87GB                            |
++----------------+---------------------+---------------------------------+
+| 4              | 156 s               | 43GB                            |
++----------------+---------------------+---------------------------------+
+| 8              | 80 s                | 22GB                            |
++----------------+---------------------+---------------------------------+
+| 16             | 42 s                | 11GB                            |
++----------------+---------------------+---------------------------------+
+
+From the results, we find that LightGBM performs linear speed up in parallel learning.
+
+GPU Experiments
+---------------
+
+Refer to `GPU Performance <./GPU-Performance.rst>`__.
+
+.. _repo: https://github.com/guolinke/boosting_tree_benchmarks
+
+.. _xgboost: https://github.com/dmlc/xgboost
+
+.. _link: http://labs.criteo.com/2013/12/download-terabyte-click-logs/
diff --git a/docs/FAQ.md b/docs/FAQ.md
index 440fe4598bca..c3ac9126e771 100644
--- a/docs/FAQ.md
+++ b/docs/FAQ.md
@@ -1,7 +1,7 @@
 LightGBM FAQ
 ============
 
-### Catalog
+### Contents
 
 - [Critical](#critical)
 - [LightGBM](#lightgbm)
@@ -27,9 +27,11 @@ If it is a critical issue, identify first what error you have:
 
 Depending on the answers, while opening your issue, feel free to ping (just mention them with the arobase (@) symbol) appropriately so we can attempt to solve your problem faster:
 
-* [@guolinke](https://github.com/guolinke) (C++ code / R package / Python package)
-* [@Laurae2](https://github.com/Laurae2) (R package)
-* [@wxchan](https://github.com/wxchan) (Python package)
+* [@guolinke](https://github.com/guolinke) (C++ code / R-package / Python-package)
+* [@Laurae2](https://github.com/Laurae2) (R-package)
+* [@wxchan](https://github.com/wxchan) (Python-package)
+* [@henry0312](https://github.com/henry0312) (Python-package)
+* [@StrikerRUS](https://github.com/StrikerRUS) (Python-package)
 * [@huanzhang12](https://github.com/huanzhang12) (GPU support)
 
 Remember this is a free/open community support. We may not be available 24/7 to provide support.
@@ -111,7 +113,7 @@ Remember this is a free/open community support. We may not be available 24/7 to
     setup.py directory, *never* absolute paths.
     ```
 
-- **Solution 1**: this error should be solved in latest version. If you still meet this error, try to remove lightgbm.egg-info folder in your python-package and reinstall, or check [this thread on stackoverflow](http://stackoverflow.com/questions/18085571/pip-install-error-setup-script-specifies-an-absolute-path).
+- **Solution 1**: this error should be solved in latest version. If you still meet this error, try to remove lightgbm.egg-info folder in your Python-package and reinstall, or check [this thread on stackoverflow](http://stackoverflow.com/questions/18085571/pip-install-error-setup-script-specifies-an-absolute-path).
 
 ---
 
@@ -128,7 +130,7 @@ Remember this is a free/open community support. We may not be available 24/7 to
     Cannot set predictor/reference/categorical feature after freed raw data, set free_raw_data=False when construct Dataset to avoid this.
     ```
 
-- **Solution 2**: Because LightGBM constructs bin mappers to build trees, and train and valid Datasets within one Booster share the same bin mappers, categorical features and feature names etc., the Dataset objects are constructed when construct a Booster. And if you set `free_raw_data=True` (default), the raw data (with python data struct) will be freed. So, if you want to:
+- **Solution 2**: Because LightGBM constructs bin mappers to build trees, and train and valid Datasets within one Booster share the same bin mappers, categorical features and feature names etc., the Dataset objects are constructed when construct a Booster. And if you set `free_raw_data=True` (default), the raw data (with Python data struct) will be freed. So, if you want to:
 
   + get label(or weight/init_score/group) before construct dataset, it's same as get `self.label`
   + set label(or weight/init_score/group) before construct dataset, it's same as `self.label=some_label_array`
diff --git a/docs/Features.md b/docs/Features.md
new file mode 100644
index 000000000000..c5876bf0a2cb
--- /dev/null
+++ b/docs/Features.md
@@ -0,0 +1,183 @@
+# Features
+
+This is a short introduction for the features and algorithms used in LightGBM.
+
+This page doesn't contain detailed algorithms, please refer to cited papers or source code if you are interested.
+
+## Optimization in Speed and Memory Usage
+
+Many boosting tools use pre-sorted based algorithms[[1, 2]](#references) (e.g. default algorithm in xgboost) for decision tree learning. It is a simple solution, but not easy to optimize.
+
+LightGBM uses the histogram based algorithms[[3, 4, 5]](#references), which bucketing continuous feature(attribute) values into discrete bins, to speed up training procedure and reduce memory usage. Following are advantages for histogram based algorithms:
+
+- **Reduce calculation cost of split gain**
+  - Pre-sorted based algorithms need ``O(#data)`` times calculation
+  - Histogram based algorithms only need to calculate ``O(#bins)`` times, and ``#bins`` is far smaller than ``#data``
+    - It still needs ``O(#data)`` times to construct histogram, which only contain sum-up operation
+- **Use histogram subtraction for further speed-up**
+  - To get one leaf's histograms in a binary tree, can use the histogram subtraction of its parent and its neighbor
+  - So it only need to construct histograms for one leaf (with smaller ``#data`` than its neighbor), then can get histograms of its neighbor by histogram subtraction with small cost(``O(#bins)``)
+- **Reduce memory usage**
+  - Can replace continuous values to discrete bins. If ``#bins`` is small, can use small data type, e.g. uint8_t, to store training data
+  - No need to store additional information for pre-sorting feature values
+- **Reduce communication cost for parallel learning**
+
+## Sparse Optimization
+
+- Only need ``O(2 * #non_zero_data)`` to construct histogram for sparse features
+
+## Optimization in Accuracy
+
+### Leaf-wise (Best-first) Tree Growth
+
+Most decision tree learning algorithms grow tree by level(depth)-wise, like the following image:
+
+![level_wise](./_static/images/level-wise.png)
+
+LightGBM grows tree by leaf-wise(best-first)[[6]](#references). It will choose the leaf with max delta loss to grow. When growing same ``#leaf``, leaf-wise algorithm can reduce more loss than level-wise algorithm.
+
+Leaf-wise may cause over-fitting when ``#data`` is small. So, LightGBM can use an additional parameter ``max_depth`` to limit depth of tree and avoid over-fitting (tree still grows by leaf-wise).
+
+![leaf_wise](./_static/images/leaf-wise.png)
+
+### Optimal Split for Categorical Features
+
+We often convert the categorical features into one-hot coding. However, it is not a good solution in tree learner. The reason is, for the high cardinality categorical features, it will grow the very unbalance tree, and needs to grow very deep to achieve the good accuracy.
+
+Actually, the optimal solution is partitioning the categorical feature into 2 subsets, and there are ``2^(k-1) - 1`` possible partitions. But there is a efficient solution for regression tree[[7]](#references). It needs about ``k * log(k)`` to find the optimal partition.
+
+The basic idea is reordering the categories according to the relevance of training target. More specifically, reordering the histogram (of categorical feature) according to it's accumulate values (``sum_gradient / sum_hessian``), then find the best split on the sorted histogram.
+
+## Optimization in Network Communication
+
+It only needs to use some collective communication algorithms, like "All reduce", "All gather" and "Reduce scatter", in parallel learning of LightGBM. LightGBM implement state-of-art algorithms[[8]](#references). These collective communication algorithms can provide much better performance than point-to-point communication.
+
+## Optimization in Parallel Learning
+
+LightGBM provides following parallel learning algorithms.
+
+### Feature Parallel
+
+#### Traditional Algorithm
+
+Feature parallel aims to parallel the "Find Best Split" in the decision tree. The procedure of traditional feature parallel is:
+
+1. Partition data vertically (different machines have different feature set)
+2. Workers find local best split point {feature, threshold} on local feature set
+3. Communicate local best splits with each other and get the best one
+4. Worker with best split to perform split, then send the split result of data to other workers
+5. Other workers split data according received data
+
+The shortage of traditional feature parallel:
+
+- Has computation overhead, since it cannot speed up "split", whose time complexity is ``O(#data)``. Thus, feature parallel cannot speed up well when ``#data`` is large.
+- Need communication of split result, which cost about ``O(#data / 8)`` (one bit for one data).
+
+#### Feature Parallel in LightGBM
+
+Since feature parallel cannot speed up well when ``#data`` is large, we make a little change here: instead of partitioning data vertically, every worker holds the full data. Thus, LightGBM doesn't need to communicate for split result of data since every worker know how to split data. And ``#data`` won't be larger, so it is reasonable to hold full data in every machine.
+
+The procedure of feature parallel in LightGBM:
+
+1. Workers find local best split point{feature, threshold} on local feature set
+2. Communicate local best splits with each other and get the best one
+3. Perform best split
+
+However, this feature parallel algorithm still suffers from computation overhead for "split" when ``#data`` is large. So it will be better to use data parallel when ``#data`` is large.
+
+### Data Parallel
+
+#### Traditional Algorithm
+
+Data parallel aims to parallel the whole decision learning. The procedure of data parallel is:
+
+1. Partition data horizontally
+2. Workers use local data to construct local histograms
+3. Merge global histograms from all local histograms
+4. Find best split from merged global histograms, then perform splits
+
+The shortage of traditional data parallel:
+
+- High communication cost. If using point-to-point communication algorithm, communication cost for one machine is about ``O(#machine * #feature * #bin)``. If using collective communication algorithm (e.g. "All Reduce"), communication cost is about ``O(2 * #feature * #bin)`` (check cost of "All Reduce" in chapter 4.5 at [[8]](#references)).
+
+#### Data Parallel in LightGBM
+
+We reduce communication cost of data parallel in LightGBM:
+
+1. Instead of "Merge global histograms from all local histograms", LightGBM use "Reduce Scatter" to merge histograms of different(non-overlapping) features for different workers. Then workers find local best split on local merged histograms and sync up global best split.
+2. As aforementioned, LightGBM use histogram subtraction to speed up training. Based on this, we can communicate histograms only for one leaf, and get its neighbor's histograms by subtraction as well.
+
+Above all, we reduce communication cost to ``O(0.5 * #feature * #bin)`` for data parallel in LightGBM.
+
+### Voting Parallel
+
+Voting parallel further reduce the communication cost in [Data Parallel](#data-parallel) to constant cost. It uses two stage voting to reduce the communication cost of feature histograms[[9]](#references).
+
+## GPU Support
+
+Thanks [@huanzhang12](https://github.com/huanzhang12) for contributing this feature. Please read[[10]](#references) to get more details.
+
+- [GPU Installation](./Installation-Guide.rst)
+- [GPU Tutorial](./GPU-Tutorial.md)
+
+## Applications and Metrics
+
+Support following application:
+
+- regression, the objective function is L2 loss
+- binary classification, the objective function is logloss
+- multi classification
+- lambdarank, the objective function is lambdarank with NDCG
+
+Support following metrics:
+
+- L1 loss
+- L2 loss
+- Log loss
+- Classification error rate
+- AUC
+- NDCG
+- Multi class log loss
+- Multi class error rate
+
+For more details, please refer to [Parameters](./Parameters.md).
+
+## Other Features
+
+- Limit ``max_depth`` of tree while grows tree leaf-wise
+- [DART](https://arxiv.org/abs/1505.01866)
+- L1/L2 regularization
+- Bagging
+- Column(feature) sub-sample
+- Continued train with input GBDT model
+- Continued train with the input score file
+- Weighted training
+- Validation metric output during training
+- Multi validation data
+- Multi metrics
+- Early stopping (both training and prediction)
+- Prediction for leaf index
+
+For more details, please refer to [Parameters](./Parameters.md).
+
+## References
+
+[1] Mehta, Manish, Rakesh Agrawal, and Jorma Rissanen. "SLIQ: A fast scalable classifier for data mining." International Conference on Extending Database Technology. Springer Berlin Heidelberg, 1996.
+
+[2] Shafer, John, Rakesh Agrawal, and Manish Mehta. "SPRINT: A scalable parallel classifier for data mining." Proc. 1996 Int. Conf. Very Large Data Bases. 1996.
+
+[3] Ranka, Sanjay, and V. Singh. "CLOUDS: A decision tree classifier for large datasets." Proceedings of the 4th Knowledge Discovery and Data Mining Conference. 1998.
+
+[4] Machado, F. P. "Communication and memory efficient parallel decision tree construction." (2003).
+
+[5] Li, Ping, Qiang Wu, and Christopher J. Burges. "Mcrank: Learning to rank using multiple classification and gradient boosting." Advances in neural information processing systems. 2007.
+
+[6] Shi, Haijian. "Best-first decision tree learning." Diss. The University of Waikato, 2007.
+
+[7] Walter D. Fisher. "[On Grouping for Maximum Homogeneity](http://amstat.tandfonline.com/doi/abs/10.1080/01621459.1958.10501479)." Journal of the American Statistical Association. Vol. 53, No. 284 (Dec., 1958), pp. 789-798.
+
+[8] Thakur, Rajeev, Rolf Rabenseifner, and William Gropp. "[Optimization of collective communication operations in MPICH](http://wwwi10.lrr.in.tum.de/~gerndt/home/Teaching/HPCSeminar/mpich_multi_coll.pdf)." International Journal of High Performance Computing Applications 19.1 (2005): 49-66.
+
+[9] Qi Meng, Guolin Ke, Taifeng Wang, Wei Chen, Qiwei Ye, Zhi-Ming Ma, Tieyan Liu. "[A Communication-Efficient Parallel Algorithm for Decision Tree](http://papers.nips.cc/paper/6381-a-communication-efficient-parallel-algorithm-for-decision-tree)." Advances in Neural Information Processing Systems 29 (NIPS 2016).
+
+[10] Huan Zhang, Si Si and Cho-Jui Hsieh. "[GPU Acceleration for Large-scale Tree Boosting](https://arxiv.org/abs/1706.08359)." arXiv:1706.08359, 2017.
diff --git a/docs/GPU-Performance.rst b/docs/GPU-Performance.rst
index 8187fc260c93..0fc252cc4e3a 100644
--- a/docs/GPU-Performance.rst
+++ b/docs/GPU-Performance.rst
@@ -1,7 +1,7 @@
 GPU Tuning Guide and Performance Comparison
 ===========================================
 
-How it works?
+How It Works?
 -------------
 
 In LightGBM, the main computation cost during training is building the feature histograms. We use an efficient algorithm on GPU to accelerate this process.
@@ -206,6 +206,6 @@ Huan Zhang, Si Si and Cho-Jui Hsieh. `GPU Acceleration for Large-scale Tree Boos
 
 .. _0bb4a82: https://github.com/Microsoft/LightGBM/commit/0bb4a82
 
-.. |Performance Comparison| image:: http://www.huan-zhang.com/images/upload/lightgbm-gpu/compare_0bb4a825.png
+.. |Performance Comparison| image:: ./_static/images/gpu-performance-comparison.png
 
 .. _GPU Acceleration for Large-scale Tree Boosting: https://arxiv.org/abs/1706.08359
diff --git a/docs/GPU-Targets.rst b/docs/GPU-Targets.rst
index b78bd552de4f..4540dcd069aa 100644
--- a/docs/GPU-Targets.rst
+++ b/docs/GPU-Targets.rst
@@ -77,7 +77,7 @@ However, using a bad combination of ``gpu_platform_id`` and
 ``gpu_device_id`` will lead to a **crash** (you will lose your entire
 session content). Beware of it.
 
-CPU only architectures
+CPU Only Architectures
 ----------------------
 
 When you have a single device (one CPU), OpenCL usage is
@@ -293,4 +293,4 @@ card.
 
 .. _Intel SDK for OpenCL: https://software.intel.com/en-us/articles/opencl-drivers
 .. _AMD APP SDK: http://developer.amd.com/amd-accelerated-parallel-processing-app-sdk/
-.. _NVIDIA CUDA Toolkit: https://developer.nvidia.com/cuda-downloads
\ No newline at end of file
+.. _NVIDIA CUDA Toolkit: https://developer.nvidia.com/cuda-downloads
diff --git a/docs/GPU-Tutorial.md b/docs/GPU-Tutorial.md
index 62310608c1c7..72652c73eb0c 100644
--- a/docs/GPU-Tutorial.md
+++ b/docs/GPU-Tutorial.md
@@ -67,7 +67,7 @@ If you are building on OSX, you probably need to remove macro `BOOST_COMPUTE_USE
 Install Python Interface (optional)
 -----------------------------------
 
-If you want to use the Python interface of LightGBM, you can install it now (along with some necessary Python package dependencies):
+If you want to use the Python interface of LightGBM, you can install it now (along with some necessary Python-package dependencies):
 
 ```
 sudo apt-get -y install python-pip
@@ -167,7 +167,7 @@ Further Reading
 
 [GPU Tuning Guide and Performance Comparison](./GPU-Performance.rst)
 
-[GPU SDK Correspondence and Device Targeting Table](./GPU-Targets.rst).
+[GPU SDK Correspondence and Device Targeting Table](./GPU-Targets.rst)
 
 [GPU Windows Tutorial](./GPU-Windows.md)
 
diff --git a/docs/GPU-Windows.md b/docs/GPU-Windows.md
index a17fba24200a..29db5ad3d12e 100644
--- a/docs/GPU-Windows.md
+++ b/docs/GPU-Windows.md
@@ -3,7 +3,7 @@ GPU Windows Compilation
 
 This guide is for the MinGW build.
 
-For the MSVC (Visual Studio) build with GPU, please refer to [Installation Guide](https://github.com/Microsoft/LightGBM/wiki/Installation-Guide#windows-2). (We recommend you to use this since it is much easier).
+For the MSVC (Visual Studio) build with GPU, please refer to [Installation Guide](./Installation-Guide.rst). (We recommend you to use this since it is much easier).
 
 # Install LightGBM GPU version in Windows (CLI / R / Python), using MinGW/gcc
 
@@ -33,15 +33,15 @@ At the end, you can restore your original PATH.
 
 To modify PATH, just follow the pictures after going to the `Control Panel`:
 
-![System](https://cloud.githubusercontent.com/assets/9083669/24928495/e3293b12-1f02-11e7-861d-37ec2d086dba.png)
+![System](./_static/images/screenshot-system.png)
 
 Then, go to `Advanced` > `Environment Variables...`:
 
-![Advanced System Settings](https://cloud.githubusercontent.com/assets/9083669/24928515/00b252ae-1f03-11e7-8ff6-fbf78c503754.png)
+![Advanced System Settings](./_static/images/screenshot-advanced-system-settings.png)
 
 Under `System variables`, the variable `Path`:
 
-![Environment Variables](https://cloud.githubusercontent.com/assets/9083669/24928517/00fd8008-1f03-11e7-84e2-7dc8fd50d6ce.png)
+![Environment Variables](./_static/images/screenshot-environment-variables.png)
 
 ---
 
@@ -67,13 +67,13 @@ Warning: using Intel OpenCL is not recommended and may crash your machine due to
 
 ---
 
-## MinGW correct compiler selection
+## MinGW Correct Compiler Selection
 
 If you are expecting to use LightGBM without R, you need to install MinGW. Installing MinGW is straightforward, download [this](http://iweb.dl.sourceforge.net/project/mingw-w64/Toolchains%20targetting%20Win32/Personal%20Builds/mingw-builds/installer/mingw-w64-install.exe).
 
 Make sure you are using the x86_64 architecture, and do not modify anything else. You may choose a version other than the most recent one if you need a previous MinGW version.
 
-![MinGW installation](https://cloud.githubusercontent.com/assets/9083669/25063112/a7374ee2-21db-11e7-89f4-ae6f413a16f1.png)
+![MinGW installation](./_static/images/screenshot-mingw-installation.png)
 
 Then, add to your PATH the following (to adjust to your MinGW version):
 
@@ -87,7 +87,7 @@ C:\Program Files\mingw-w64\x86_64-5.3.0-posix-seh-rt_v4-rev0\mingw64\bin
 
 You can check which MinGW version you are using by running the following in a command prompt: `gcc -v`:
 
-![R MinGW used](https://cloud.githubusercontent.com/assets/9083669/24927803/80b83782-1f00-11e7-961a-068d58d82885.png)
+![R MinGW used](./_static/images/screenshot-r-mingw-used.png)
 
 To check whether you need 32-bit or 64-bit MinGW for R, install LightGBM as usual and check for the following:
 
@@ -162,7 +162,7 @@ Your folder should look like this at the end (not fully detailed):
 
 This is what you should (approximately) get at the end of Boost compilation:
 
-![Boost compiled](https://cloud.githubusercontent.com/assets/9083669/24918623/5152a3c0-1ee1-11e7-9d59-d75fb1193241.png)
+![Boost compiled](./_static/images/screenshot-boost-compiled.png)
 
 If you are getting an error:
 
@@ -177,7 +177,7 @@ If you are getting an error:
 
 Installing Git for Windows is straightforward, use the following [link](https://git-for-windows.github.io/).
 
-![git for Windows](https://cloud.githubusercontent.com/assets/9083669/24919716/e2612ea6-1ee4-11e7-9eca-d30997b911ff.png)
+![git for Windows](./_static/images/screenshot-git-for-windows.png)
 
 Then, click on the big Download button, you can't miss it.
 
@@ -202,7 +202,7 @@ Keep Git Bash open.
 
 Installing CMake requires one download first and then a lot of configuration for LightGBM:
 
-![Downloading CMake](https://cloud.githubusercontent.com/assets/9083669/24919759/fe5f4d90-1ee4-11e7-992e-00f8d9bfe6dd.png)
+![Downloading CMake](./_static/images/screenshot-downloading-cmake.png)
 
 * Download CMake 3.8.0 here: https://cmake.org/download/.
 * Install CMake.
@@ -211,19 +211,19 @@ Installing CMake requires one download first and then a lot of configuration for
 * Copy the folder name, and add `/build` for "Where to build the binaries", default using our steps would be `C:/github_repos/LightGBM/build`.
 * Click `Configure`.
 
-![Create directory](https://cloud.githubusercontent.com/assets/9083669/24921175/33feee92-1eea-11e7-8330-6d8e519a6177.png)
+![Create directory](./_static/images/screenshot-create-directory.png)
 
-![MinGW makefiles to use](https://cloud.githubusercontent.com/assets/9083669/24921193/404dd384-1eea-11e7-872e-6220e0f8b321.png)
+![MinGW makefiles to use](./_static/images/screenshot-mingw-makefiles-to-use.png)
 
 * Lookup for `USE_GPU` and check the checkbox
 
-![Use GPU](https://cloud.githubusercontent.com/assets/9083669/24921364/d7ccd426-1eea-11e7-8054-d4bd3a39af84.png)
+![Use GPU](./_static/images/screenshot-use-gpu.png)
 
 * Click `Configure`
 
 You should get (approximately) the following after clicking Configure:
 
-![Configured LightGBM](https://cloud.githubusercontent.com/assets/9083669/24919175/1301b42e-1ee3-11e7-9823-70a1d4c8c39e.png)
+![Configured LightGBM](./_static/images/screenshot-configured-lightgbm.png)
 
 ```
 Looking for CL_VERSION_2_0
@@ -262,7 +262,7 @@ You can do everything in the Git Bash console you left open:
 * Setup MinGW as make using `alias make='mingw32-make'` (otherwise, beware error and name clash!).
 * In Git Bash, run `make` and see LightGBM being installing!
 
-![LightGBM with GPU support compiled](https://cloud.githubusercontent.com/assets/9083669/24923499/0cb90572-1ef2-11e7-8842-371d038fb5e9.png)
+![LightGBM with GPU support compiled](./_static/images/screenshot-lightgbm-with-gpu-support-compiled.png)
 
 If everything was done correctly, you now compiled CLI LightGBM with GPU support!
 
@@ -275,7 +275,7 @@ cd C:/github_repos/LightGBM/examples/binary_classification
 "../../lightgbm.exe" config=train.conf data=binary.train valid=binary.test objective=binary device=gpu
 ```
 
-![LightGBM in CLI with GPU](https://cloud.githubusercontent.com/assets/9083669/24958722/98021e72-1f90-11e7-80a9-204d56ace395.png)
+![LightGBM in CLI with GPU](./_static/images/screenshot-lightgbm-in-cli-with-gpu.png)
 
 Congratulations for reaching this stage!
 
@@ -283,12 +283,11 @@ To learn how to target a correct CPU or GPU for training, please see: [GPU SDK C
 
 ---
 
-
-## Debugging LightGBM crashes in CLI
+## Debugging LightGBM Crashes in CLI
 
 Now that you compiled LightGBM, you try it... and you always see a segmentation fault or an undocumented crash with GPU support:
 
-![Segmentation Fault](https://cloud.githubusercontent.com/assets/9083669/25015529/7326860a-207c-11e7-8fc3-320b2be619a6.png)
+![Segmentation Fault](./_static/images/screenshot-segmentation-fault.png)
 
 Please check you are using the right device and whether it works with the default `gpu_device_id = 0` and `gpu_platform_id = 0`. If it still does not work with the default values, then you should follow all the steps below.
 
@@ -297,15 +296,15 @@ You will have to redo the compilation steps for LightGBM to add debugging mode.
 * Deleting `C:/github_repos/LightGBM/build` folder
 * Deleting `lightgbm.exe`, `lib_lightgbm.dll`, and `lib_lightgbm.dll.a` files
 
-![Files to remove](https://cloud.githubusercontent.com/assets/9083669/25051307/3b7dd084-214c-11e7-9758-c338c8cacb1e.png)
+![Files to remove](./_static/images/screenshot-files-to-remove.png)
 
 Once you removed the file, go into CMake, and follow the usual steps. Before clicking "Generate", click on "Add Entry":
 
-![Added manual entry in CMake](https://cloud.githubusercontent.com/assets/9083669/25051323/508969ca-214c-11e7-884a-20882cd3936a.png)
+![Added manual entry in CMake](./_static/images/screenshot-added-manual-entry-in-cmake.png)
 
 In addition, click on Configure and Generate:
 
-![Configured and Generated CMake](https://cloud.githubusercontent.com/assets/9083669/25051236/e71237ce-214b-11e7-8faa-d885d7826fe1.png)
+![Configured and Generated CMake](./_static/images/screenshot-configured-and-generated-cmake.png)
 
 And then, follow the regular LightGBM CLI installation from there.
 
@@ -315,7 +314,7 @@ Once you have installed LightGBM CLI, assuming your LightGBM is in `C:\github_re
 gdb --args "../../lightgbm.exe" config=train.conf data=binary.train valid=binary.test objective=binary device=gpu
 ```
 
-![Debug run](https://cloud.githubusercontent.com/assets/9083669/25041067/8fdbee66-210d-11e7-8adb-79b688c051d5.png)
+![Debug run](./_static/images/screenshot-debug-run.png)
 
 Type `run` and Enter key.
 
diff --git a/docs/Installation-Guide.md b/docs/Installation-Guide.md
deleted file mode 100644
index 8c0a9a08e432..000000000000
--- a/docs/Installation-Guide.md
+++ /dev/null
@@ -1 +0,0 @@
-Refer to [Installation Guide](https://github.com/Microsoft/LightGBM/wiki/Installation-Guide).
diff --git a/docs/Installation-Guide.rst b/docs/Installation-Guide.rst
new file mode 100644
index 000000000000..ab2def8dc5cf
--- /dev/null
+++ b/docs/Installation-Guide.rst
@@ -0,0 +1,303 @@
+Installation Guide
+==================
+
+Here is the guide for the build of CLI version.
+
+For the build of Python-package and R-package, please refer to `Python-package`_ and `R-package`_ folders respectively.
+
+Windows
+~~~~~~~
+
+LightGBM can use Visual Studio, MSBuild with CMake or MinGW to build in Windows.
+
+Visual Studio (or MSBuild)
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+With GUI
+********
+
+1. Install `Visual Studio`_.
+
+2. Download `zip archive`_ and unzip it.
+
+3. Go to ``LightGBM-master/windows`` folder.
+
+4. Open ``LightGBM.sln`` file with Visual Studio, choose ``Release`` configuration and click ``BUILD-> Build Solution (Ctrl+Shift+B)``.
+
+   If you have errors about **Platform Toolset**, go to ``PROJECT-> Properties-> Configuration Properties-> General`` and select the toolset installed on your machine.
+
+The exe file will be in ``LightGBM-master/windows/x64/Release`` folder.
+
+From Command Line
+*****************
+
+1. Install `Git for Windows`_, `CMake`_ (3.8 or higher) and `MSBuild`_ (MSbuild is not needed if **Visual Studio** is installed).
+
+2. Run the following commands:
+
+   .. code::
+
+     git clone --recursive https://github.com/Microsoft/LightGBM
+     cd LightGBM
+     mkdir build
+     cd build
+     cmake -DCMAKE_GENERATOR_PLATFORM=x64 ..
+     cmake --build . --target ALL_BUILD --config Release
+
+The exe and dll files will be in ``LightGBM/Release`` folder.
+
+MinGW64
+^^^^^^^
+
+1. Install `Git for Windows`_, `CMake`_ and `MinGW-w64`_.
+
+2. Run the following commands:
+
+   .. code::
+
+     git clone --recursive https://github.com/Microsoft/LightGBM
+     cd LightGBM
+     mkdir build
+     cd build
+     cmake -G "MinGW Makefiles" ..
+     mingw32-make.exe -j4
+
+The exe and dll files will be in ``LightGBM/`` folder.
+
+**Note**: you may need to run the ``cmake -G "MinGW Makefiles" ..`` one more time if met ``sh.exe was found in your PATH`` error.
+
+Linux
+~~~~~
+
+LightGBM uses ``CMake`` to build. Run the following commands:
+
+.. code::
+
+  git clone --recursive https://github.com/Microsoft/LightGBM ; cd LightGBM
+  mkdir build ; cd build
+  cmake ..
+  make -j4
+
+**Note**: glibc >= 2.14 is required.
+
+OSX
+~~~
+
+LightGBM depends on **OpenMP** for compiling, which isn't supported by Apple Clang.
+
+Please install **gcc/g++** by using the following commands:
+
+.. code::
+
+  brew install cmake
+  brew install gcc --without-multilib
+
+Then install LightGBM:
+
+.. code::
+
+  git clone --recursive https://github.com/Microsoft/LightGBM ; cd LightGBM
+  export CXX=g++-7 CC=gcc-7
+  mkdir build ; cd build
+  cmake ..
+  make -j4
+
+Docker
+~~~~~~
+
+Refer to `Docker folder <https://github.com/Microsoft/LightGBM/tree/master/docker>`__.
+
+Build MPI Version
+~~~~~~~~~~~~~~~~~
+
+The default build version of LightGBM is based on socket. LightGBM also supports `MPI`_.
+MPI is a high performance communication approach with `RDMA`_ support.
+
+If you need to run a parallel learning application with high performance communication, you can build the LightGBM with MPI support.
+
+Windows
+^^^^^^^
+
+With GUI
+********
+
+1. You need to install `MS MPI`_ first. Both ``msmpisdk.msi`` and ``MSMpiSetup.exe`` are needed.
+
+2. Install `Visual Studio`_.
+
+3. Download `zip archive`_ and unzip it.
+
+4. Go to ``LightGBM-master/windows`` folder.
+
+4. Open ``LightGBM.sln`` file with Visual Studio, choose ``Release_mpi`` configuration and click ``BUILD-> Build Solution (Ctrl+Shift+B)``.
+
+   If you have errors about **Platform Toolset**, go to ``PROJECT-> Properties-> Configuration Properties-> General`` and select the toolset installed on your machine.
+
+The exe file will be in ``LightGBM-master/windows/x64/Release_mpi`` folder.
+
+From Command Line
+*****************
+
+1. You need to install `MS MPI`_ first. Both ``msmpisdk.msi`` and ``MSMpiSetup.exe`` are needed.
+
+2. Install `Git for Windows`_, `CMake`_ (3.8 or higher) and `MSBuild`_ (MSbuild is not needed if **Visual Studio** is installed).
+
+3. Run the following commands:
+
+   .. code::
+
+     git clone --recursive https://github.com/Microsoft/LightGBM
+     cd LightGBM
+     mkdir build
+     cd build
+     cmake -DCMAKE_GENERATOR_PLATFORM=x64 -DUSE_MPI=ON ..
+     cmake --build . --target ALL_BUILD --config Release
+
+The exe and dll files will be in ``LightGBM/Release`` folder.
+
+**Note**: Build MPI version by **MinGW** is not supported due to the miss of MPI library in it.
+
+Linux
+^^^^^
+
+You need to install `Open MPI`_ first.
+
+Then run the following commands:
+
+.. code::
+
+  git clone --recursive https://github.com/Microsoft/LightGBM ; cd LightGBM
+  mkdir build ; cd build
+  cmake -DUSE_MPI=ON ..
+  make -j4
+
+**Note**: glibc >= 2.14 is required.
+
+OSX
+^^^
+
+Install **gcc** and **Open MPI** first:
+
+.. code::
+
+  brew install openmpi
+  brew install cmake
+  brew install gcc --without-multilib
+
+Then run the following commands:
+
+.. code::
+
+  git clone --recursive https://github.com/Microsoft/LightGBM ; cd LightGBM
+  export CXX=g++-7 CC=gcc-7
+  mkdir build ; cd build
+  cmake -DUSE_MPI=ON ..
+  make -j4
+
+Build GPU Version
+~~~~~~~~~~~~~~~~~
+
+Linux
+^^^^^
+
+The following dependencies should be installed before compilation:
+
+-  OpenCL 1.2 headers and libraries, which is usually provided by GPU manufacture.
+
+   The generic OpenCL ICD packages (for example, Debian package ``cl-icd-libopencl1`` and ``cl-icd-opencl-dev``) can also be used.
+
+-  libboost 1.56 or later (1.61 or later recommended).
+
+   We use Boost.Compute as the interface to GPU, which is part of the Boost library since version 1.61. However, since we include the source code of Boost.Compute as a submodule, we only require the host has Boost 1.56 or later installed. We also use Boost.Align for memory allocation. Boost.Compute requires Boost.System and Boost.Filesystem to store offline kernel cache.
+
+   The following Debian packages should provide necessary Boost libraries: ``libboost-dev``, ``libboost-system-dev``, ``libboost-filesystem-dev``.
+
+-  CMake 3.2 or later.
+
+To build LightGBM GPU version, run the following commands:
+
+.. code::
+
+  git clone --recursive https://github.com/Microsoft/LightGBM ; cd LightGBM
+  mkdir build ; cd build
+  cmake -DUSE_GPU=1 ..
+  make -j4
+
+Windows
+^^^^^^^
+
+If you use **MinGW**, the build procedure are similar to the build in Linux. Refer to `GPU Windows Compilation <./GPU-Windows.md>`__ to get more details.
+
+Following procedure is for the MSVC(Microsoft Visual C++) build.
+
+1. Install `Git for Windows`_, `CMake`_ (3.8 or higher) and `MSBuild`_ (MSbuild is not needed if **Visual Studio** is installed).
+
+2. Install **OpenCL** for Windows. The installation depends on the brand (NVIDIA, AMD, Intel) of your GPU card.
+
+   - For running on Intel, get `Intel SDK for OpenCL`_.
+
+   - For running on AMD, get `AMD APP SDK`_.
+
+   - For running on NVIDIA, get `CUDA Toolkit`_.
+
+3. Install `Boost Binary`_.
+
+   **Note**: match your Visual C++ version:
+   
+   Visual Studio 2013 -> ``msvc-12.0-64.exe``,
+
+   Visual Studio 2015 -> ``msvc-14.0-64.exe``,
+
+   Visual Studio 2017 -> ``msvc-14.1-64.exe``.
+
+4. Run the following commands:
+
+   .. code::
+
+     Set BOOST_ROOT=C:\local\boost_1_64_0\
+     Set BOOST_LIBRARYDIR=C:\local\boost_1_64_0\lib64-msvc-14.0
+     git clone --recursive https://github.com/Microsoft/LightGBM
+     cd LightGBM
+     mkdir build
+     cd build
+     cmake -DCMAKE_GENERATOR_PLATFORM=x64 -DUSE_GPU=1 ..
+     cmake --build . --target ALL_BUILD --config Release
+
+   **Note**: ``C:\local\boost_1_64_0\`` and ``C:\local\boost_1_64_0\lib64-msvc-14.0`` are locations of your Boost binaries. You also can set them to the environment variable to avoid ``Set ...`` commands when build.
+
+Docker
+^^^^^^
+
+Refer to `GPU Docker folder <https://github.com/Microsoft/LightGBM/tree/master/docker/gpu>`__.
+
+.. _Python-package: https://github.com/Microsoft/LightGBM/tree/master/python-package
+
+.. _R-package: https://github.com/Microsoft/LightGBM/tree/master/R-package
+
+.. _zip archive: https://github.com/Microsoft/LightGBM/archive/master.zip
+
+.. _Visual Studio: https://www.visualstudio.com/downloads/
+
+.. _Git for Windows: https://git-scm.com/download/win
+
+.. _CMake: https://cmake.org/
+
+.. _MSBuild: https://www.visualstudio.com/downloads/#build-tools-for-visual-studio-2017
+
+.. _MinGW-w64: https://mingw-w64.org/doku.php/download
+
+.. _MPI: https://en.wikipedia.org/wiki/Message_Passing_Interface
+
+.. _RDMA: https://en.wikipedia.org/wiki/Remote_direct_memory_access
+
+.. _MS MPI: https://www.microsoft.com/en-us/download/details.aspx?id=49926
+
+.. _Open MPI: https://www.open-mpi.org/
+
+.. _Intel SDK for OpenCL: https://software.intel.com/en-us/articles/opencl-drivers
+
+.. _AMD APP SDK: http://developer.amd.com/amd-accelerated-parallel-processing-app-sdk/
+
+.. _CUDA Toolkit: https://developer.nvidia.com/cuda-downloads
+
+.. _Boost Binary: https://sourceforge.net/projects/boost/files/boost-binaries/1.64.0/
diff --git a/docs/Key-Events.md b/docs/Key-Events.md
index f1279f783748..2a1f10a8d747 100644
--- a/docs/Key-Events.md
+++ b/docs/Key-Events.md
@@ -1,4 +1,4 @@
-# Table/List of key modifications of LightGBM
+# Table/List of Key Modifications of LightGBM
 
 The list includes the commits where the major feature added is considered working with the least amount of flaws. This is useful if you are trying to get a specific commit, such as the first properly working commit for categorical support.
 
diff --git a/docs/Parallel-Learning-Guide.md b/docs/Parallel-Learning-Guide.md
deleted file mode 100644
index b74ac7dc4a76..000000000000
--- a/docs/Parallel-Learning-Guide.md
+++ /dev/null
@@ -1 +0,0 @@
-Refer to [Parallel Learning Guide](https://github.com/Microsoft/LightGBM/wiki/Parallel-Learning-Guide).
diff --git a/docs/Parallel-Learning-Guide.rst b/docs/Parallel-Learning-Guide.rst
new file mode 100644
index 000000000000..3d88b038a466
--- /dev/null
+++ b/docs/Parallel-Learning-Guide.rst
@@ -0,0 +1,123 @@
+Parallel Learning Guide
+=======================
+
+This is a guide for parallel learning of LightGBM.
+
+Follow the `Quick Start`_ to know how to use LightGBM first.
+
+Choose Appropriate Parallel Algorithm
+-------------------------------------
+
+LightGBM provides 2 parallel learning algorithms now.
+
++--------------------------+---------------------------+
+| **Parallel Algorithm**   | **How to Use**            |
++==========================+===========================+
+| Data parallel            | ``tree_learner=data``     |
++--------------------------+---------------------------+
+| Feature parallel         | ``tree_learner=feature``  |
++--------------------------+---------------------------+
+| Voting parallel          | ``tree_learner=voting``   |
++--------------------------+---------------------------+
+
+These algorithms are suited for different scenarios, which is listed in the following table:
+
++-------------------------+----------------------+----------------------+
+|                         | **#data is small**   | **#data is large**   |
++=========================+======================+======================+
+| **#feature is small**   | Feature Parallel     | Data Parallel        |
++-------------------------+----------------------+----------------------+
+| **#feature is large**   | Feature Parallel     | Voting Parallel      |
++-------------------------+----------------------+----------------------+
+
+More details about these parallel algorithms can be found in `optimization in parallel learning`_.
+
+Build Parallel Version
+----------------------
+
+Default build version support parallel learning based on the socket.
+
+If you need to build parallel version with MPI support, please refer to `Installation Guide`_.
+
+Preparation
+-----------
+
+Socket Version
+^^^^^^^^^^^^^^
+
+It needs to collect IP of all machines that want to run parallel learning in and allocate one TCP port (assume 12345 here) for all machines,
+and change firewall rules to allow income of this port (12345). Then write these IP and ports in one file (assume ``mlist.txt``), like following:
+
+.. code::
+
+    machine1_ip 12345
+    machine2_ip 12345
+
+MPI Version
+^^^^^^^^^^^
+
+It needs to collect IP (or hostname) of all machines that want to run parallel learning in.
+Then write these IP in one file (assume ``mlist.txt``) like following:
+
+.. code::
+
+    machine1_ip
+    machine2_ip
+
+Note: For Windows users, need to start "smpd" to start MPI service. More details can be found `here`_.
+
+Run Parallel Learning
+---------------------
+
+Socket Version
+^^^^^^^^^^^^^^
+
+1. Edit following parameters in config file:
+
+``tree_learner=your_parallel_algorithm``, edit ``your_parallel_algorithm`` (e.g. feature/data) here.
+
+``num_machines=your_num_machines``, edit ``your_num_machines`` (e.g. 4) here.
+
+``machine_list_file=mlist.txt``, ``mlist.txt`` is created in `Preparation section <#preparation>`__.
+
+``local_listen_port=12345``, ``12345`` is allocated in `Preparation section <#preparation>`__.
+
+2. Copy data file, executable file, config file and ``mlist.txt`` to all machines.
+
+3. Run following command on all machines, you need to change ``your_config_file`` to real config file.
+
+For Windows: ``lightgbm.exe config=your_config_file``
+
+For Linux: ``./lightgbm config=your_config_file``
+
+MPI Version
+^^^^^^^^^^^
+
+1. Edit following parameters in config file:
+
+``tree_learner=your_parallel_algorithm``, edit ``your_parallel_algorithm`` (e.g. feature/data) here.
+
+``num_machines=your_num_machines``, edit ``your_num_machines`` (e.g. 4) here.
+
+2. Copy data file, executable file, config file and ``mlist.txt`` to all machines. Note: MPI needs to be run in the **same path on all machines**.
+
+3. Run following command on one machine (not need to run on all machines), need to change ``your_config_file`` to real config file.
+
+For Windows: ``mpiexec.exe /machinefile mlist.txt lightgbm.exe config=your_config_file``
+
+For Linux: ``mpiexec --machinefile mlist.txt ./lightgbm config=your_config_file``
+
+Example
+^^^^^^^
+
+-  `A simple parallel example`_.
+
+.. _Quick Start: ./Quick-Start.md
+
+.. _optimization in parallel learning: ./Features.md
+
+.. _Installation Guide: ./Installation-Guide.rst
+
+.. _here: https://blogs.technet.microsoft.com/windowshpc/2015/02/02/how-to-compile-and-run-a-simple-ms-mpi-program/
+
+.. _A simple parallel example: https://github.com/Microsoft/lightgbm/tree/master/examples/parallel_learning
diff --git a/docs/Parameters-tuning.md b/docs/Parameters-tuning.md
index 30e6c67c7e0f..ab08f26bdf81 100644
--- a/docs/Parameters-tuning.md
+++ b/docs/Parameters-tuning.md
@@ -4,11 +4,11 @@ This is a page contains all parameters in LightGBM.
 
 ***List of other Helpful Links***
 * [Parameters](./Parameters.md)
-* [Python API Reference](./Python-API.md)
+* [Python API](./Python-API.rst)
 
-## Tune parameters for the leaf-wise(best-first) tree
+## Tune Parameters for the Leaf-wise (Best-first) Tree
 
-LightGBM uses the [leaf-wise](https://github.com/Microsoft/LightGBM/wiki/Features#optimization-in-accuracy) tree growth algorithm, while many other popular tools use depth-wise tree growth. Compared with depth-wise growth, the leaf-wise algorithm can convenge much faster. However, the leaf-wise growth may be over-fitting if not used with the appropriate parameters. 
+LightGBM uses the [leaf-wise](./Features.md) tree growth algorithm, while many other popular tools use depth-wise tree growth. Compared with depth-wise growth, the leaf-wise algorithm can convenge much faster. However, the leaf-wise growth may be over-fitting if not used with the appropriate parameters. 
 
 To get good results using a leaf-wise tree, these are some important parameters:
 
@@ -19,15 +19,15 @@ To get good results using a leaf-wise tree, these are some important parameters:
 3. ```max_depth```. You also can use ```max_depth``` to limit the tree depth explicitly. 
 
 
-## For faster speed
+## For Faster Speed
 
 * Use bagging by setting ```bagging_fraction``` and ```bagging_freq``` 
 * Use feature sub-sampling by setting ```feature_fraction```
 * Use small ```max_bin```
 * Use ```save_binary``` to speed up data loading in future learning
-* Use parallel learning, refer to [parallel learning guide](./Parallel-Learning-Guide.md).
+* Use parallel learning, refer to [Parallel Learning Guide](./Parallel-Learning-Guide.rst).
 
-## For better accuracy
+## For Better Accuracy
 
 * Use large ```max_bin``` (may be slower)
 * Use small ```learning_rate``` with large ```num_iterations```
@@ -35,7 +35,7 @@ To get good results using a leaf-wise tree, these are some important parameters:
 * Use bigger training data
 * Try ```dart```
 
-## Deal with over-fitting
+## Deal with Over-fitting
 
 * Use small ```max_bin```
 * Use small ```num_leaves```
diff --git a/docs/Parameters.md b/docs/Parameters.md
index b9aa39f6feab..00f3ccb81fbd 100644
--- a/docs/Parameters.md
+++ b/docs/Parameters.md
@@ -3,7 +3,7 @@
 This is a page contains all parameters in LightGBM.
 
 ***List of other Helpful Links***
-* [Python API Reference](./Python-API.md)
+* [Python API](./Python-API.rst)
 * [Parameters Tuning](./Parameters-tuning.md)
 
 ***External Links***
@@ -18,7 +18,7 @@ Default values for the following parameters have changed:
 * num_leaves = 127 => 31
 * num_iterations = 10 => 100
 
-## Parameter format
+## Parameter Format
 
 The parameter format is `key1=value1 key2=value2 ... ` . And parameters can be set both in config file and command line. By using command line, parameters should not have spaces before and after `=`. By using config files, one line can only contain one parameter. you can use `#` to comment. If one parameter appears in both command line and config file, LightGBM will use the parameter in command line.
 
@@ -65,7 +65,7 @@ The parameter format is `key1=value1 key2=value2 ... ` . And parameters can be s
   * `serial`, single machine tree learner
   * `feature`, feature parallel tree learner
   * `data`, data parallel tree learner
-  * Refer to [Parallel Learning Guide](./Parallel-Learning-Guide.md) to get more details.
+  * Refer to [Parallel Learning Guide](./Parallel-Learning-Guide.rst) to get more details.
 * `num_threads`, default=OpenMP_default, type=int, alias=`num_thread`,`nthread`
   * Number of threads for LightGBM.
   * For the best speed, set this to the number of **real CPU cores**, not the number of threads (most CPU using [hyper-threading](https://en.wikipedia.org/wiki/Hyper-threading) to generate 2 threads per CPU core).
@@ -74,10 +74,11 @@ The parameter format is `key1=value1 key2=value2 ... ` . And parameters can be s
   * For parallel learning, should not use full CPU cores since this will cause poor performance for the network.
 * `device`, default=`cpu`, options=`cpu`,`gpu`
   * Choose device for the tree learning, can use gpu to achieve the faster learning.
-  * Note: 1. Recommend use the smaller `max_bin`(e.g `63`) to get the better speed up. 2. For the faster speed, GPU use 32-bit float point to sum up by default, may affect the accuracy for some tasks. You can set `gpu_use_dp=true` to enable 64-bit float point, but it will slow down the training. 3. Refer to [Installation Guide](https://github.com/Microsoft/LightGBM/wiki/Installation-Guide#with-gpu-support) to build with GPU .
+  * Note: 1. Recommend use the smaller `max_bin`(e.g `63`) to get the better speed up. 2. For the faster speed, GPU use 32-bit float point to sum up by default, may affect the accuracy for some tasks. You can set `gpu_use_dp=true` to enable 64-bit float point, but it will slow down the training. 3. Refer to [Installation Guide](./Installation-Guide.rst) to build with GPU .
 
 
-## Learning control parameters
+## Learning Control Parameters
+
 * `max_depth`, default=`-1`, type=int
   * Limit the max depth for tree model. This is used to deal with overfit when #data is small. Tree still grow by leaf-wise.
   * `< 0` means no limit
@@ -142,7 +143,7 @@ The parameter format is `key1=value1 key2=value2 ... ` . And parameters can be s
   * The smooth numerator  is `b = a * sum_gradient / sum_hessian`.
 
 
-## IO parameters
+## IO Parameters
 
 * `max_bin`, default=`255`, type=int
   * max number of bin that feature values will bucket in. Small bin may reduce training accuracy but may increase general power (deal with over-fit).
@@ -231,7 +232,8 @@ The parameter format is `key1=value1 key2=value2 ... ` . And parameters can be s
   * Path of validation initial score file, `""` will use `valid_data_file+".init"` (if exists).
   * separate by `,` for multi-validation data
 
-## Objective parameters
+
+## Objective Parameters
 
 * `sigmoid`, default=`1.0`, type=double
   * parameter for sigmoid function. Will be used in binary classification and lambdarank.
@@ -257,7 +259,8 @@ The parameter format is `key1=value1 key2=value2 ... ` . And parameters can be s
 * `num_class`, default=`1`, type=int, alias=`num_classes`
   * only used in multi-class classification
 
-## Metric parameters
+
+## Metric Parameters
 
 * `metric`, default={`l2` for regression}, {`binary_logloss` for binary classification},{`ndcg` for lambdarank}, type=multi-enum, options=`l1`,`l2`,`ndcg`,`auc`,`binary_logloss`,`binary_error`...
   * `l1`, absolute loss, alias=`mean_absolute_error`, `mae`
@@ -281,7 +284,8 @@ The parameter format is `key1=value1 key2=value2 ... ` . And parameters can be s
 * `ndcg_at`, default=`1,2,3,4,5`, type=multi-int, alias=`ndcg_eval_at`,`eval_at`
   * NDCG evaluation position, separate by `,`
 
-## Network parameters
+
+## Network Parameters
 
 Following parameters are used for parallel learning, and only used for base(socket) version.
 
@@ -297,7 +301,8 @@ Following parameters are used for parallel learning, and only used for base(sock
   * File that list machines for this parallel learning application
   * Each line contains one IP and one port for one machine. The format is `ip port`, separate by space.
 
-## GPU parameters
+
+## GPU Parameters
 
 * `gpu_platform_id`, default=`-1`, type=int
   * OpenCL platform ID. Usually each GPU vendor exposes one OpenCL platform.
@@ -308,7 +313,8 @@ Following parameters are used for parallel learning, and only used for base(sock
 * `gpu_use_dp`, default=`false`, type=bool
   * Set to true to use double precision math on GPU (default using single precision).
 
-## Convert model parameters
+
+## Convert Model Parameters
 
 This feature is only supported in command line version yet.
 
@@ -321,7 +327,8 @@ This feature is only supported in command line version yet.
 
 ## Others
 
-### Continued training with input score
+### Continued Training with Input Score
+
 LightGBM support continued train with initial score. It uses an additional file to store these initial score, like the following:
 
 ```
@@ -334,7 +341,8 @@ LightGBM support continued train with initial score. It uses an additional file
 It means the initial score of first data is `0.5`, second is `-0.1`, and so on. The initial score file corresponds with data file line by line, and has per score per line. And if the name of data file is "train.txt", the initial score file should be named as "train.txt.init" and in the same folder as the data file. And LightGBM will auto load initial score file if it exists.
 
 
-### Weight data
+### Weight Data
+
 LightGBM support weighted training. It uses an additional file to store weight data, like the following:
 
 ```
@@ -349,7 +357,8 @@ It means the weight of first data is `1.0`, second is `0.5`, and so on. The weig
 update:
 You can specific weight column in data file now. Please refer to parameter `weight` in above.
 
-### Query data
+
+### Query Data
 
 For LambdaRank learning, it needs query information for training data. LightGBM use an additional file to store query data. Following is an example:
 
diff --git a/docs/Python-API.md b/docs/Python-API.md
deleted file mode 100644
index e0bd94127fa1..000000000000
--- a/docs/Python-API.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# Python API Reference
-
-Move to [Read The Docs](http://lightgbm.readthedocs.io/en/latest/python/lightgbm.html#lightgbm-package).
\ No newline at end of file
diff --git a/docs/python/lightgbm.rst b/docs/Python-API.rst
similarity index 96%
rename from docs/python/lightgbm.rst
rename to docs/Python-API.rst
index 7aeda73ed497..e304f763bed1 100644
--- a/docs/python/lightgbm.rst
+++ b/docs/Python-API.rst
@@ -1,6 +1,5 @@
-lightgbm package
-================
-
+Python API
+==========
 
 Data Structure API
 ------------------
@@ -64,4 +63,3 @@ Plotting
 .. autofunction:: lightgbm.plot_tree
 
 .. autofunction:: lightgbm.create_tree_digraph
-
diff --git a/docs/Python-intro.md b/docs/Python-intro.md
index 9ec63a97b614..6e3671fa9f89 100644
--- a/docs/Python-intro.md
+++ b/docs/Python-intro.md
@@ -1,34 +1,35 @@
 Python Package Introduction
 ===========================
 
-This document gives a basic walkthrough of LightGBM python package.
+This document gives a basic walkthrough of LightGBM Python-package.
 
 ***List of other Helpful Links***
 * [Python Examples](https://github.com/Microsoft/LightGBM/tree/master/examples/python-guide)
-* [Python API Reference](./Python-API.md)
+* [Python API](./Python-API.rst)
 * [Parameters Tuning](./Parameters-tuning.md)
 
 Install
 -------
-* Install the library first, follow the wiki [here](./Installation-Guide.md).
-* Install python-package dependencies, `setuptools`, `numpy` and `scipy` is required, `scikit-learn` is required for sklearn interface and recommended. Run:
-```
-pip install setuptools numpy scipy scikit-learn -U
-```
 
-* In the  `python-package` directory, run
+Install Python-package dependencies, `setuptools`, `wheel`, `numpy` and `scipy` are required, `scikit-learn` is required for sklearn interface and recommended:
+
 ```
-python setup.py install
+pip install setuptools wheel numpy scipy scikit-learn -U
 ```
 
-* To verify your installation, try to `import lightgbm` in Python.
+Refer to [Python-package](https://github.com/Microsoft/LightGBM/tree/master/python-package) folder for the installation guide.
+
+To verify your installation, try to `import lightgbm` in Python:
+
 ```
 import lightgbm as lgb
 ```
 
 Data Interface
 --------------
-The LightGBM python module is able to load data from:
+
+The LightGBM Python module is able to load data from:
+
 - libsvm/tsv/csv txt format file
 - Numpy 2D array, pandas object
 - LightGBM binary file
@@ -36,27 +37,35 @@ The LightGBM python module is able to load data from:
 The data is stored in a ```Dataset``` object.
 
 #### To load a libsvm text file or a LightGBM binary file into ```Dataset```:
+
 ```python
 train_data = lgb.Dataset('train.svm.bin')
 ```
 
 ####  To load a numpy array into ```Dataset```:
+
 ```python
 data = np.random.rand(500, 10) # 500 entities, each contains 10 features
 label = np.random.randint(2, size=500) # binary target
 train_data = lgb.Dataset(data, label=label)
 ```
+
 #### To load a scpiy.sparse.csr_matrix array into ```Dataset```:
+
 ```python
 csr = scipy.sparse.csr_matrix((dat, (row, col)))
 train_data = lgb.Dataset(csr)
 ```
+
 #### Saving ```Dataset``` into a LightGBM binary file will make loading faster:
+
 ```python
 train_data = lgb.Dataset('train.svm.txt')
 train_data.save_binary('train.bin')
 ```
-#### Create validation data
+
+#### Create validation data:
+
 ```python
 test_data = train_data.create_valid('test.svm')
 ```
@@ -69,20 +78,25 @@ test_data = lgb.Dataset('test.svm', reference=train_data)
 
 In LightGBM, the validation data should be aligned with training data.
 
-#### Specific feature names and categorical features
+#### Specific feature names and categorical features:
 
 ```python
 train_data = lgb.Dataset(data, label=label, feature_name=['c1', 'c2', 'c3'], categorical_feature=['c3'])
 ```
-LightGBM can use categorical features as input directly. It doesn't need to covert to one-hot coding, and is much faster than one-hot coding (about 8x speed-up). 
-**Note:You should convert your categorical features to int type before you construct `Dataset`.**
+
+LightGBM can use categorical features as input directly. It doesn't need to covert to one-hot coding, and is much faster than one-hot coding (about 8x speed-up).
+
+**Note**:You should convert your categorical features to int type before you construct `Dataset`.
 
 #### Weights can be set when needed:
+
 ```python
 w = np.random.rand(500, )
 train_data = lgb.Dataset(data, label=label, weight=w)
 ```
+
 or
+
 ```python
 train_data = lgb.Dataset(data, label=label)
 w = np.random.rand(500, )
@@ -102,43 +116,56 @@ However, Numpy/Array/Pandas object is memory cost. If you concern about your mem
 
 Setting Parameters
 ------------------
-LightGBM can use either a list of pairs or a dictionary to set [parameters](./Parameters.md). For instance:
-* Booster parameters
+
+LightGBM can use either a list of pairs or a dictionary to set [Parameters](./Parameters.md). For instance:
+
+* Booster parameters:
+
 ```python
 param = {'num_leaves':31, 'num_trees':100, 'objective':'binary'}
 param['metric'] = 'auc'
 ```
+
 * You can also specify multiple eval metrics:
+
 ```python
 param['metric'] = ['auc', 'binary_logloss']
-
 ```
 
 Training
 --------
 
 Training a model requires a parameter list and data set.
+
 ```python
 num_round = 10
 bst = lgb.train(param, train_data, num_round, valid_sets=[test_data])
 ```
+
 After training, the model can be saved.
+
 ```python
 bst.save_model('model.txt')
 ```
-The trained model can also be dumped to JSON format
+
+The trained model can also be dumped to JSON format.
+
 ```python
 # dump model
 json_model = bst.dump_model()
 ```
-A saved model can be loaded as follows:
+
+A saved model can be loaded.
+
 ```python
 bst = lgb.Booster(model_file='model.txt') #init model
 ```
 
 CV
 --
+
 Training with 5-fold CV:
+
 ```python
 num_round = 10
 lgb.cv(param, train_data, num_round, nfold=5)
@@ -146,6 +173,7 @@ lgb.cv(param, train_data, num_round, nfold=5)
 
 Early Stopping
 --------------
+
 If you have a validation set, you can use early stopping to find the optimal number of boosting rounds.
 Early stopping requires at least one set in `valid_sets`. If there's more than one, it will use all of them.
 
@@ -162,7 +190,9 @@ This works with both metrics to minimize (L2, log loss, etc.) and to maximize (N
 
 Prediction
 ----------
+
 A model that has been trained or loaded can perform predictions on data sets.
+
 ```python
 # 7 entities, each contains 10 features
 data = np.random.rand(7, 10)
@@ -170,6 +200,7 @@ ypred = bst.predict(data)
 ```
 
 If early stopping is enabled during training, you can get predictions from the best iteration with `bst.best_iteration`:
+
 ```python
 ypred = bst.predict(data, num_iteration=bst.best_iteration)
 ```
diff --git a/docs/Quick-Start.md b/docs/Quick-Start.md
index 0594de9db37f..2fed71e66168 100644
--- a/docs/Quick-Start.md
+++ b/docs/Quick-Start.md
@@ -2,21 +2,21 @@
 
 This is a quick start guide for LightGBM of cli version.
 
-Follow the [Installation Guide](./Installation-Guide.md) to install LightGBM first.
+Follow the [Installation Guide](./Installation-Guide.rst) to install LightGBM first.
 
 ***List of other Helpful Links***
 * [Parameters](./Parameters.md)
 * [Parameters Tuning](./Parameters-tuning.md)
-* [Python Package quick start guide](./Python-intro.md)
-* [Python API Reference](./Python-API.md)
+* [Python-package Quick Start](./Python-intro.md)
+* [Python API](./Python-API.rst)
 
-## Training data format 
+## Training Data Format 
 
 LightGBM supports input data file with [CSV](https://en.wikipedia.org/wiki/Comma-separated_values), [TSV](https://en.wikipedia.org/wiki/Tab-separated_values) and [LibSVM](https://www.csie.ntu.edu.tw/~cjlin/libsvm/) formats.
 
 Label is the data of first column, and there is no header in the file.
 
-### Categorical feature support
+### Categorical Feature Support
 
 update 12/5/2016:
 
@@ -24,7 +24,8 @@ LightGBM can use categorical feature directly (without one-hot coding). The expe
 
 For the setting details, please refer to [Parameters](./Parameters.md).
 
-### Weight and query/group data
+### Weight and Query/Group Data
+
 LightGBM also support weighted training, it needs an additional [weight data](./Parameters.md). And it needs an additional [query data](./Parameters.md) for ranking task.
 
 update 11/3/2016:
@@ -33,9 +34,7 @@ update 11/3/2016:
 2. can specific label column, weight column and query/group id column. Both index and column are supported
 3. can specific a list of ignored columns
 
-For the detailed usage, please refer to [Configuration](./Parameters.md).
-
-## Parameter quick look
+## Parameter Quick Look
 
 The parameter format is ```key1=value1 key2=value2 ... ``` . And parameters can be in both config file and command line.
 
@@ -78,7 +77,7 @@ Some important parameters:
   * ```serial```, single machine tree learner
   * ```feature```, feature parallel tree learner
   * ```data```, data parallel tree learner
-  * Refer to [Parallel Learning Guide](./Parallel-Learning-Guide.md) to get more details.
+  * Refer to [Parallel Learning Guide](./Parallel-Learning-Guide.rst) to get more details.
 * ```num_threads```, default=OpenMP_default, type=int, alias=```num_thread```,```nthread```
   * Number of threads for LightGBM. 
   * For the best speed, set this to the number of **real CPU cores**, not the number of threads (most CPU using [hyper-threading](https://en.wikipedia.org/wiki/Hyper-threading) to generate 2 threads per CPU core).
@@ -93,7 +92,6 @@ Some important parameters:
 
 For all parameters, please refer to [Parameters](./Parameters.md).
 
-
 ## Run LightGBM
 
 For Windows:
@@ -101,7 +99,7 @@ For Windows:
 lightgbm.exe config=your_config_file other_args ...
 ```
 
-For unix:
+For Unix:
 ```
 ./lightgbm config=your_config_file other_args ...
 ```
diff --git a/docs/README.md b/docs/README.md
index 5bb11e05fab0..f5857f0cbd31 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -2,17 +2,13 @@
 
 Documentation for LightGBM is generated using [Sphinx](http://www.sphinx-doc.org/) and [recommonmark](https://recommonmark.readthedocs.io/).
 
-After each commit on `master`, documentation is updated and published to https://lightgbm.readthedocs.io/
+After each commit on `master`, documentation is updated and published to [https://lightgbm.readthedocs.io/](https://lightgbm.readthedocs.io/).
 
 ## Build
 
-You can build the documentation locally. Just run:
+You can build the documentation locally. Just run in `docs` folder:
 
 ```sh
 pip install -r requirements.txt
 make html
 ```
-
-## Wiki
-
-In addition to our documentation hosted on Read the Docs, some additional topics are explained at https://github.com/Microsoft/LightGBM/wiki.
diff --git a/docs/_static/images/gpu-performance-comparison.png b/docs/_static/images/gpu-performance-comparison.png
new file mode 100644
index 000000000000..f26d46bea29c
Binary files /dev/null and b/docs/_static/images/gpu-performance-comparison.png differ
diff --git a/docs/_static/images/leaf-wise.png b/docs/_static/images/leaf-wise.png
new file mode 100644
index 000000000000..aa0fd3259c7d
Binary files /dev/null and b/docs/_static/images/leaf-wise.png differ
diff --git a/docs/_static/images/level-wise.png b/docs/_static/images/level-wise.png
new file mode 100644
index 000000000000..b014ccb7ecfe
Binary files /dev/null and b/docs/_static/images/level-wise.png differ
diff --git a/docs/_static/images/screenshot-added-manual-entry-in-cmake.png b/docs/_static/images/screenshot-added-manual-entry-in-cmake.png
new file mode 100644
index 000000000000..8812944846a3
Binary files /dev/null and b/docs/_static/images/screenshot-added-manual-entry-in-cmake.png differ
diff --git a/docs/_static/images/screenshot-advanced-system-settings.png b/docs/_static/images/screenshot-advanced-system-settings.png
new file mode 100644
index 000000000000..a625cbd0aafc
Binary files /dev/null and b/docs/_static/images/screenshot-advanced-system-settings.png differ
diff --git a/docs/_static/images/screenshot-boost-compiled.png b/docs/_static/images/screenshot-boost-compiled.png
new file mode 100644
index 000000000000..f2b7f866cbcf
Binary files /dev/null and b/docs/_static/images/screenshot-boost-compiled.png differ
diff --git a/docs/_static/images/screenshot-configured-and-generated-cmake.png b/docs/_static/images/screenshot-configured-and-generated-cmake.png
new file mode 100644
index 000000000000..afa5d159205b
Binary files /dev/null and b/docs/_static/images/screenshot-configured-and-generated-cmake.png differ
diff --git a/docs/_static/images/screenshot-configured-lightgbm.png b/docs/_static/images/screenshot-configured-lightgbm.png
new file mode 100644
index 000000000000..606137e3ee99
Binary files /dev/null and b/docs/_static/images/screenshot-configured-lightgbm.png differ
diff --git a/docs/_static/images/screenshot-create-directory.png b/docs/_static/images/screenshot-create-directory.png
new file mode 100644
index 000000000000..48a0077ba53b
Binary files /dev/null and b/docs/_static/images/screenshot-create-directory.png differ
diff --git a/docs/_static/images/screenshot-debug-run.png b/docs/_static/images/screenshot-debug-run.png
new file mode 100644
index 000000000000..ce45d15a12d9
Binary files /dev/null and b/docs/_static/images/screenshot-debug-run.png differ
diff --git a/docs/_static/images/screenshot-downloading-cmake.png b/docs/_static/images/screenshot-downloading-cmake.png
new file mode 100644
index 000000000000..ae668f8d56db
Binary files /dev/null and b/docs/_static/images/screenshot-downloading-cmake.png differ
diff --git a/docs/_static/images/screenshot-environment-variables.png b/docs/_static/images/screenshot-environment-variables.png
new file mode 100644
index 000000000000..c1825b2513fc
Binary files /dev/null and b/docs/_static/images/screenshot-environment-variables.png differ
diff --git a/docs/_static/images/screenshot-files-to-remove.png b/docs/_static/images/screenshot-files-to-remove.png
new file mode 100644
index 000000000000..2dcfe481b273
Binary files /dev/null and b/docs/_static/images/screenshot-files-to-remove.png differ
diff --git a/docs/_static/images/screenshot-git-for-windows.png b/docs/_static/images/screenshot-git-for-windows.png
new file mode 100644
index 000000000000..00567daf8afd
Binary files /dev/null and b/docs/_static/images/screenshot-git-for-windows.png differ
diff --git a/docs/_static/images/screenshot-lightgbm-in-cli-with-gpu.png b/docs/_static/images/screenshot-lightgbm-in-cli-with-gpu.png
new file mode 100644
index 000000000000..81ac68731dc5
Binary files /dev/null and b/docs/_static/images/screenshot-lightgbm-in-cli-with-gpu.png differ
diff --git a/docs/_static/images/screenshot-lightgbm-with-gpu-support-compiled.png b/docs/_static/images/screenshot-lightgbm-with-gpu-support-compiled.png
new file mode 100644
index 000000000000..23a040e38139
Binary files /dev/null and b/docs/_static/images/screenshot-lightgbm-with-gpu-support-compiled.png differ
diff --git a/docs/_static/images/screenshot-mingw-installation.png b/docs/_static/images/screenshot-mingw-installation.png
new file mode 100644
index 000000000000..1a39c1d57845
Binary files /dev/null and b/docs/_static/images/screenshot-mingw-installation.png differ
diff --git a/docs/_static/images/screenshot-mingw-makefiles-to-use.png b/docs/_static/images/screenshot-mingw-makefiles-to-use.png
new file mode 100644
index 000000000000..f85e686f722d
Binary files /dev/null and b/docs/_static/images/screenshot-mingw-makefiles-to-use.png differ
diff --git a/docs/_static/images/screenshot-r-mingw-used.png b/docs/_static/images/screenshot-r-mingw-used.png
new file mode 100644
index 000000000000..b371950c3e15
Binary files /dev/null and b/docs/_static/images/screenshot-r-mingw-used.png differ
diff --git a/docs/_static/images/screenshot-segmentation-fault.png b/docs/_static/images/screenshot-segmentation-fault.png
new file mode 100644
index 000000000000..f645cfee8e58
Binary files /dev/null and b/docs/_static/images/screenshot-segmentation-fault.png differ
diff --git a/docs/_static/images/screenshot-system.png b/docs/_static/images/screenshot-system.png
new file mode 100644
index 000000000000..520388939e73
Binary files /dev/null and b/docs/_static/images/screenshot-system.png differ
diff --git a/docs/_static/images/screenshot-use-gpu.png b/docs/_static/images/screenshot-use-gpu.png
new file mode 100644
index 000000000000..2d338d63ba0c
Binary files /dev/null and b/docs/_static/images/screenshot-use-gpu.png differ
diff --git a/docs/_static/js/rst_links_fix.js b/docs/_static/js/rst_links_fix.js
new file mode 100644
index 000000000000..26bcc2d2a63c
--- /dev/null
+++ b/docs/_static/js/rst_links_fix.js
@@ -0,0 +1,4 @@
+window.onload = function() {
+    $('a[href^="./"][href$=".md"]').attr('href', (i, val) => { return val.replace('.md', '.html'); });  /* Replace '.md' with '.html' in all internal links like './[Something].md' */
+    $('a[href^="./"][href$=".rst"]').attr('href', (i, val) => { return val.replace('.rst', '.html'); });  /* Replace '.rst' with '.html' in all internal links like './[Something].rst' */
+}
diff --git a/docs/conf.py b/docs/conf.py
index c6198ebfc003..6a96a3b7bf0e 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -180,10 +180,11 @@
 
 
 # https://recommonmark.readthedocs.io/en/latest/
-github_doc_root = 'https://github.com/Microsoft/LightGBM/tree/master/docs'
+github_doc_root = 'https://github.com/Microsoft/LightGBM/tree/master/docs/'
 def setup(app):
     app.add_config_value('recommonmark_config', {
             'url_resolver': lambda url: github_doc_root + url,
             'auto_toc_tree_section': 'Contents',
             }, True)
     app.add_transform(AutoStructify)
+    app.add_javascript("js/rst_links_fix.js")
diff --git a/docs/development.rst b/docs/development.rst
index 632e8b83dbc0..880d037933c6 100644
--- a/docs/development.rst
+++ b/docs/development.rst
@@ -4,7 +4,7 @@ Development Guide
 Algorithms
 ----------
 
-Refer to `Features <https://github.com/Microsoft/LightGBM/wiki/Features>`__ to understand important algorithms used in LightGBM.
+Refer to `Features <./Features.md>`__ to understand important algorithms used in LightGBM.
 
 Classes and Code Structure
 --------------------------
@@ -13,7 +13,7 @@ Important Classes
 ~~~~~~~~~~~~~~~~~
 
 +-------------------------+--------------------------------------------------------------------------------------+
-| Class                   | description                                                                          |
+| Class                   | Description                                                                          |
 +=========================+======================================================================================+
 | ``Application``         | The entrance of application, including training and prediction logic                 |
 +-------------------------+--------------------------------------------------------------------------------------+
@@ -44,7 +44,7 @@ Code Structure
 ~~~~~~~~~~~~~~
 
 +---------------------+------------------------------------------------------------------------------------------------------------------------------------+
-| Path                | description                                                                                                                        |
+| Path                | Description                                                                                                                        |
 +=====================+====================================================================================================================================+
 | ./include           | Header files                                                                                                                       |
 +---------------------+------------------------------------------------------------------------------------------------------------------------------------+
@@ -65,11 +65,13 @@ Code Structure
 | ./src/treelearner   | Implementations of tree learners                                                                                                   |
 +---------------------+------------------------------------------------------------------------------------------------------------------------------------+
 
-API Documents
+Documents API
 ~~~~~~~~~~~~~
 
 LightGBM support use `doxygen <http://www.stack.nl/~dimitri/doxygen/>`__ to generate documents for classes and functions.
 
+Refer to `docs README <./README.md>`__.
+
 C API
 -----
 
@@ -78,9 +80,11 @@ Refere to the comments in `c\_api.h <https://github.com/Microsoft/LightGBM/blob/
 High Level Language Package
 ---------------------------
 
-See the implementations at `python-package <https://github.com/Microsoft/LightGBM/tree/master/python-package>`__ and `R-package <https://github.com/Microsoft/LightGBM/tree/master/R-package>`__.
+See the implementations at `Python-package <https://github.com/Microsoft/LightGBM/tree/master/python-package>`__ and `R-package <https://github.com/Microsoft/LightGBM/tree/master/R-package>`__.
+
+Questions
+---------
 
-Ask Questions
--------------
+Refer to `FAQ <./FAQ.md>`__.
 
-Feel free to open `issues <https://github.com/Microsoft/LightGBM/issues>`__ if you met problems.
+Also feel free to open `issues <https://github.com/Microsoft/LightGBM/issues>`__ if you met problems.
diff --git a/docs/index.rst b/docs/index.rst
index 100a3fdd8d9f..adbd6e9d8729 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -10,22 +10,21 @@ Welcome to LightGBM's documentation!
    :maxdepth: 1
    :caption: Contents:
 
-   Installation Guide <https://github.com/Microsoft/LightGBM/wiki/Installation-Guide>
+   Installation Guide <Installation-Guide>
    Quick Start <Quick-Start>
    Python Quick Start <Python-intro>
-   Features <https://github.com/Microsoft/LightGBM/wiki/Features>
-   Experiments <https://github.com/Microsoft/LightGBM/wiki/Experiments>
+   Features <Features>
+   Experiments <Experiments>
    Parameters <Parameters>
-   Advanced Topics <Advanced-Topic>
    Parameters Tuning <Parameters-tuning>
-   Python API Reference <python/lightgbm>
-   Parallel Learning Guide <https://github.com/Microsoft/LightGBM/wiki/Parallel-Learning-Guide>
+   Python API <Python-API>
+   Parallel Learning Guide <Parallel-Learning-Guide>
    GPU Tutorial <GPU-Tutorial>
+   Advanced Topics <Advanced-Topic>
    FAQ <FAQ>
    Development Guide <development>
 
-Indices and tables
+Indices and Tables
 ==================
 
 * :ref:`genindex`
-* :ref:`search`
diff --git a/examples/README.md b/examples/README.md
index 7a76d7f28465..30f5d277d256 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -1,5 +1,4 @@
 Examples
-=====================
-
-You can learn how to use LightGBM by these examples. If you have any questions, please refer to our [wiki](https://github.com/Microsoft/LightGBM/wiki). 
+========
 
+You can learn how to use LightGBM by these examples.
diff --git a/examples/binary_classification/README.md b/examples/binary_classification/README.md
index 9075f5e7bc5a..d7bda49dd204 100644
--- a/examples/binary_classification/README.md
+++ b/examples/binary_classification/README.md
@@ -5,7 +5,8 @@ Here is an example for LightGBM to run binary classification task.
 
 ***You should copy executable file to this folder first.***
 
-#### Training
+Trainin
+-------
 
 For Windows, by running following command in this folder:
 
@@ -19,7 +20,8 @@ For Linux, by running following command in this folder:
 ./lightgbm config=train.conf
 ```
 
-#### Prediction
+Prediction
+----------
 
 You should finish training first.
 
diff --git a/examples/lambdarank/README.md b/examples/lambdarank/README.md
index 4576ba3cce92..b095bedc37a9 100644
--- a/examples/lambdarank/README.md
+++ b/examples/lambdarank/README.md
@@ -5,7 +5,8 @@ Here is an example for LightGBM to run lambdarank task.
 
 ***You should copy executable file to this folder first.***
 
-#### Training
+Training
+--------
 
 For Windows, by running following command in this folder:
 
@@ -19,7 +20,8 @@ For Linux, by running following command in this folder:
 ./lightgbm config=train.conf
 ```
 
-#### Prediction
+Prediction
+----------
 
 You should finish training first.
 
diff --git a/examples/multiclass_classification/README.md b/examples/multiclass_classification/README.md
index 5354cb5c0cfd..c9dbeeb8d3b4 100644
--- a/examples/multiclass_classification/README.md
+++ b/examples/multiclass_classification/README.md
@@ -5,7 +5,8 @@ Here is an example for LightGBM to run multiclass classification task.
 
 ***You should copy executable file to this folder first.***
 
-#### Training
+Training
+--------
 
 For Windows, by running following command in this folder:
 
@@ -19,7 +20,8 @@ For Linux, by running following command in this folder:
 ./lightgbm config=train.conf
 ```
 
-#### Prediction
+Prediction
+----------
 
 You should finish training first.
 
diff --git a/examples/parallel_learning/README.md b/examples/parallel_learning/README.md
index 87b50e788661..d2d2adaf8305 100644
--- a/examples/parallel_learning/README.md
+++ b/examples/parallel_learning/README.md
@@ -1,5 +1,6 @@
 Parallel Learning Example
 =========================
+
 Here is an example for LightGBM to perform parallel learning for 2 machines.
 
 1. Edit mlist.txt, write the ip of these 2 machines that you want to run application on.
@@ -19,4 +20,4 @@ Here is an example for LightGBM to perform parallel learning for 2 machines.
 
 This parallel learning example is based on socket. LightGBM also support parallel learning based on mpi.
 
-For more details about the usage of parallel learning, please refer to [this](https://github.com/Microsoft/LightGBM/wiki/Parallel-Learning-Guide).
+For more details about the usage of parallel learning, please refer to [this](https://github.com/Microsoft/LightGBM/blob/master/docs/Parallel-Learning-Guide.rst).
diff --git a/examples/python-guide/README.md b/examples/python-guide/README.md
index 665c2dbe61eb..eefb9bfed105 100644
--- a/examples/python-guide/README.md
+++ b/examples/python-guide/README.md
@@ -1,11 +1,9 @@
 Python Package Examples
 =======================
 
-Here is an example for LightGBM to use python package.
+Here is an example for LightGBM to use Python-package.
 
-***You should install lightgbm (both c++ and python verion) first.***
-
-For the installation, check the wiki [here](https://github.com/Microsoft/LightGBM/wiki/Installation-Guide).
+You should install LightGBM [Python-package](https://github.com/Microsoft/LightGBM/tree/master/python-package) first.
 
 You also need scikit-learn, pandas and matplotlib (only for plot example) to run the examples, but they are not required for the package itself. You can install them with pip:
 
diff --git a/examples/regression/README.md b/examples/regression/README.md
index 3e077c6ff4c9..073c6faf78d2 100644
--- a/examples/regression/README.md
+++ b/examples/regression/README.md
@@ -1,10 +1,12 @@
 Regression Example
 ==================
+
 Here is an example for LightGBM to run regression task.
 
 ***You should copy executable file to this folder first.***
 
-#### Training
+Training
+--------
 
 For Windows, by running following command in this folder:
 
@@ -18,7 +20,8 @@ For Linux, by running following command in this folder:
 ./lightgbm config=train.conf
 ```
 
-#### Prediction
+Prediction
+----------
 
 You should finish training first.
 
diff --git a/pmml/README.md b/pmml/README.md
index eb83f14d8c92..d996d736d365 100644
--- a/pmml/README.md
+++ b/pmml/README.md
@@ -1,5 +1,6 @@
 PMML Generator 
 ==============
+
 The script pmml.py can be used to translate the LightGBM models, found in LightGBM_model.txt, to  predictive model markup language (PMML). These models can then be imported by other analytics applications. The models that the language can describe includes decision trees. The specification of PMML can be found here at the Data Mining Group's [website](http://dmg.org/pmml/v4-3/GeneralStructure.html).
 
 In order to generate pmml files do the following steps.
@@ -7,19 +8,27 @@ In order to generate pmml files do the following steps.
 lightgbm config=train.conf
 python pmml.py LightGBM_model.txt
 ```
+
 The python script will create a file called **LightGBM_pmml.xml**. Inside the file you will find a `MiningModel` tag. In there you will find `TreeModel` tags. Each `TreeModel` tag contains the pmml translation of a decision tree inside the LightGBM_model.txt file. The model described by the **LightGBM_pmml.xml** file can be transferred to other analytics applications. For instance you can use the pmml file as an input to the jpmml-evaluator API. Follow the steps below to run a model described by **LightGBM_pmml.xml**. 
 
-##### Steps to run jpmml-evaluator
-1, First clone the repository
+##### Steps to Run jpmml-evaluator
+
+1. Clone the repository
+
 ```
 git clone https://github.com/jpmml/jpmml-evaluator.git
 ```
-2, Build using maven
+
+2. Build using maven
+
 ```
 mvn clean install
 ```
-3, Run the EvaluationExample class on the model file using the following command
+
+3. Run the EvaluationExample class on the model file using the following command
+
 ```
 java -cp example-1.3-SNAPSHOT.jar org.jpmml.evaluator.EvaluationExample --model LightGBM_pmml.xml --input input.csv --output output.csv
 ```
+
 Note, in order to run the model on the input.csv file, the input.csv file must have the same number of columns as specified by the `DataDictionary` field in the pmml file. Also, the column headers inside the input.csv file must be the same as the column names specified by the `MiningSchema` field. Inside output.csv you will find all the columns inside the input.csv file plus a new column. In the new column you will find the scores calculated by processing each rows data on the model. More information about jpmml-evaluator can be found at its  [github repository](https://github.com/jpmml/jpmml-evaluator).
\ No newline at end of file
diff --git a/python-package/README.rst b/python-package/README.rst
index d289ed6eafcd..ddae4e81e18b 100644
--- a/python-package/README.rst
+++ b/python-package/README.rst
@@ -11,11 +11,11 @@ Preparation
 
 `setuptools <https://pypi.python.org/pypi/setuptools>`_ is needed.
 
-For Mac OS X users, gcc with OpenMP support must be installed first. Refer to `wiki <https://github.com/Microsoft/LightGBM/wiki/Installation-Guide#osx>`_ for installing gcc with OpenMP support.
+For Mac OS X users, gcc with OpenMP support must be installed first. Refer to `Installation Guide <https://github.com/Microsoft/LightGBM/blob/master/docs/Installation-Guide.rst#osx>`_ for installing gcc with OpenMP support.
 
-Note: 32-bit python is not supported. Please install 64-bit version.
+Note: 32-bit Python is not supported. Please install 64-bit version.
 
-Install from `PyPI <https://pypi.python.org/pypi/lightgbm>`_ using ``pip``
+Install from `PyPI <https://pypi.python.org/pypi/lightgbm>`_ Using ``pip``
 ''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
 For Windows users, `VC runtime <https://go.microsoft.com/fwlink/?LinkId=746572>`_ is needed if Visual Studio (2013, 2015 or 2017) is not installed.
 
@@ -25,7 +25,7 @@ Install `wheel <http://pythonwheels.com>`_ via ``pip install wheel`` first. Afte
 
     pip install lightgbm
 
-Build from sources
+Build from Sources
 ******************
 
 .. code:: sh
@@ -38,7 +38,7 @@ For Mac OS X users, you need to specify compilers by runnig ``export CXX=g++-7 C
 
 For Windows users, Visual Studio (or `MS Build <https://www.visualstudio.com/downloads/#build-tools-for-visual-studio-2017>`_) is needed. If you get any errors during installation, you may need to install `CMake <https://cmake.org/>`_ (version 3.8 or higher).
 
-Build GPU version
+Build GPU Version
 ~~~~~~~~~~~~~~~~~
 
 .. code:: sh
@@ -47,7 +47,7 @@ Build GPU version
 
 For Windows users, `CMake <https://cmake.org/>`_ (version 3.8 or higher) is strongly required in this case.
 
-Note: Boost and OpenCL are needed: details for installation can be found in `gpu-support <https://github.com/Microsoft/LightGBM/wiki/Installation-Guide#with-gpu-support>`_. You need to add ``OpenCL_INCLUDE_DIR`` to the environmental variable **'PATH'** and export ``BOOST_ROOT`` before installation.
+Note: Boost and OpenCL are needed: details for installation can be found in `Installation Guide <https://github.com/Microsoft/LightGBM/blob/master/docs/Installation-Guide.rst#build-gpu-version>`_. You need to add ``OpenCL_INCLUDE_DIR`` to the environmental variable **'PATH'** and export ``BOOST_ROOT`` before installation.
 
 Build with MinGW-w64 on Windows
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -76,14 +76,14 @@ Note: ``sudo`` (or administrator rights in Windows) may be needed to perform the
 
 Run ``python setup.py install --mingw`` if you want to use MinGW-w64 on Windows instead of Visual Studio. `CMake <https://cmake.org/>`_ and `MinGW-w64 <https://mingw-w64.org/>`_ should be installed first.
 
-Run ``python setup.py install --gpu`` to enable GPU support. For Windows users, `CMake <https://cmake.org/>`_ (version 3.8 or higher) is strongly required in this case. Boost and OpenCL are needed: details for installation can be found in `gpu-support <https://github.com/Microsoft/LightGBM/wiki/Installation-Guide#with-gpu-support>`_.
+Run ``python setup.py install --gpu`` to enable GPU support. For Windows users, `CMake <https://cmake.org/>`_ (version 3.8 or higher) is strongly required in this case. Boost and OpenCL are needed: details for installation can be found in `Installation Guide <https://github.com/Microsoft/LightGBM/blob/master/docs/Installation-Guide.rst#build-gpu-version>`_.
 
-If you get any errors during installation or due to any other reason, you may want to build dynamic library from sources by any method you prefer (see `Installation-Guide <https://github.com/Microsoft/LightGBM/wiki/Installation-Guide>`_) and then run ``python setup.py install --precompile``.
+If you get any errors during installation or due to any other reason, you may want to build dynamic library from sources by any method you prefer (see `Installation Guide <https://github.com/Microsoft/LightGBM/blob/master/docs/Installation-Guide.rst>`_) and then run ``python setup.py install --precompile``.
 
 Examples
 --------
 
-Refer to the walk through examples in `python-guide folder <https://github.com/Microsoft/LightGBM/tree/master/examples/python-guide>`_.
+Refer to the walk through examples in `Python guide folder <https://github.com/Microsoft/LightGBM/tree/master/examples/python-guide>`_.
 
 Troubleshooting
 ---------------
@@ -93,7 +93,7 @@ Refer to `FAQ <https://github.com/Microsoft/LightGBM/tree/master/docs/FAQ.md>`_.
 Developments
 ------------
 
-The code style of python package follows `pep8 <https://www.python.org/dev/peps/pep-0008/>`_. If you would like to make a contribution and not familiar with pep-8, please check the pep8 style guide first. Otherwise, the check won't pass. You should be careful about:
+The code style of Python-package follows `pep8 <https://www.python.org/dev/peps/pep-0008/>`_. If you would like to make a contribution and not familiar with pep-8, please check the pep8 style guide first. Otherwise, the check won't pass. You should be careful about:
 
 - E1 Indentation (check pep8 link above)
 - E202 whitespace before and after brackets
diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py
index 52c9be1e9075..c82c985f843a 100644
--- a/python-package/lightgbm/engine.py
+++ b/python-package/lightgbm/engine.py
@@ -85,7 +85,7 @@ def train(params, train_set, num_boost_round=100,
         You can still use _InnerPredictor as ``init_model`` for future continue training.
     callbacks : list of callables or None, optional (default=None)
         List of callback functions that are applied at each iteration.
-        See Callbacks in Python-API.md for more information.
+        See Callbacks in Python API for more information.
 
     Returns
     -------
@@ -369,7 +369,7 @@ def cv(params, train_set, num_boost_round=10,
         Seed used to generate the folds (passed to numpy.random.seed).
     callbacks : list of callables or None, optional (default=None)
         List of callback functions that are applied at each iteration.
-        See Callbacks in Python-API.md for more information.
+        See Callbacks in Python API for more information.
 
     Returns
     -------
diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py
index 3b8c73fc9d54..147c9707c4c4 100644
--- a/python-package/lightgbm/sklearn.py
+++ b/python-package/lightgbm/sklearn.py
@@ -196,7 +196,7 @@ def __init__(self, boosting_type="gbdt", num_leaves=31, max_depth=-1,
 
             Note
             ----
-            **kwargs is not supported in sklearn, it may cause unexpected issues.
+            \*\*kwargs is not supported in sklearn, it may cause unexpected issues.
 
         Attributes
         ----------
@@ -341,7 +341,7 @@ def fit(self, X, y,
             If 'auto' and data is pandas DataFrame, pandas categorical columns are used.
         callbacks : list of callback functions or None, optional (default=None)
             List of callback functions that are applied at each iteration.
-            See Callbacks in Python-API.md for more information.
+            See Callbacks in Python API for more information.
 
         Returns
         -------
@@ -370,6 +370,7 @@ def fit(self, X, y,
                 The eval result.
             is_bigger_better: bool
                 Is eval result bigger better, e.g. AUC is bigger_better.
+
         For multi-class task, the y_pred is group by class_id first, then group by row_id.
         If you want to get i-th row y_pred in j-th class, the access way is y_pred[j * num_data + i].
         """