From 5d3408c0d8588146dd2ba37e2fe1ed63d21a08ef Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Mon, 15 Jul 2024 17:44:50 -0400 Subject: [PATCH 1/2] docs: Disallow improper capitalization disallow DeepMD (excluding PairDeepMD), DeepMd, Pytorch, Tensorflow, Numpy, Github, Lammps, I-Pi, I-PI, i-Pi Signed-off-by: Jinzhe Zeng --- .github/labeler.yml | 2 +- .pre-commit-config.yaml | 8 ++++++++ README.md | 2 +- codecov.yml | 2 +- deepmd/infer/model_devi.py | 2 +- deepmd/utils/argcheck.py | 2 +- doc/data/data-conv.md | 6 +++--- doc/development/create-a-model-pt.md | 4 ++-- doc/getting-started/quick_start.ipynb | 6 +++--- doc/install/easy-install.md | 2 +- doc/install/install-from-source.md | 6 +++--- doc/install/install-gromacs.md | 2 +- doc/third-party/gromacs.md | 6 +++--- doc/train/multi-task-training-pt.md | 2 +- doc/troubleshooting/installation.md | 2 +- source/cmake/Findtensorflow.cmake | 2 +- source/gmx/dp_gmx_patch | 4 ++-- source/gmx/patches/2020.2/CMakeLists.txt.patch.in | 6 +++--- .../patches/2020.2/src/gromacs/mdlib/sim_util.cpp.patch | 4 ++-- source/install/build_tf.py | 6 +++--- 20 files changed, 42 insertions(+), 34 deletions(-) diff --git a/.github/labeler.yml b/.github/labeler.yml index b048574e77..0183a144ba 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -33,6 +33,6 @@ LAMMPS: Gromacs: - changed-files: - any-glob-to-any-file: source/gmx/**/* -i-Pi: +i-PI: - changed-files: - any-glob-to-any-file: source/ipi/**/* diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 73cf12de90..3e5b73d1ed 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -136,5 +136,13 @@ repos: - --comment-style - - --no-extra-eol + - repo: local + hooks: + - id: disallow-caps + name: Disallow improper capitalization + language: pygrep + entry: DeepMD|DeepMd|Pytorch|Tensorflow|Numpy|Github|Lammps|I-Pi|I-PI|i-Pi + # unclear why PairDeepMD is used instead of PairDeePMD + exclude: .pre-commit-config.yaml|source/lmp ci: autoupdate_branch: devel diff --git a/README.md b/README.md index 3838f2596a..e821a29768 100644 --- a/README.md +++ b/README.md @@ -91,7 +91,7 @@ The code is organized as follows: - `source/api_c`: source code of the C API. - `source/nodejs`: source code of the Node.js API. - `source/ipi`: source code of i-PI client. -- `source/lmp`: source code of Lammps module. +- `source/lmp`: source code of LAMMPS module. - `source/gmx`: source code of Gromacs plugin. # Contributing diff --git a/codecov.yml b/codecov.yml index 8f639ec037..16bde1deb9 100644 --- a/codecov.yml +++ b/codecov.yml @@ -41,6 +41,6 @@ component_management: paths: - source/lmp/** - component_id: module_ipi - name: i-Pi + name: i-PI paths: - source/ipi/** diff --git a/deepmd/infer/model_devi.py b/deepmd/infer/model_devi.py index 61025bcb70..f5aa57ab9b 100644 --- a/deepmd/infer/model_devi.py +++ b/deepmd/infer/model_devi.py @@ -378,7 +378,7 @@ def make_model_devi( The output file for model deviation results frequency : int The number of steps that elapse between writing coordinates - in a trajectory by a MD engine (such as Gromacs / Lammps). + in a trajectory by a MD engine (such as Gromacs / LAMMPS). This paramter is used to determine the index in the output file. real_error : bool, default: False If True, calculate the RMS real error instead of model deviation. diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py index acb243ea2f..8deef830f7 100644 --- a/deepmd/utils/argcheck.py +++ b/deepmd/utils/argcheck.py @@ -2204,7 +2204,7 @@ def training_data_args(): # ! added by Ziyao: new specification style for data - int: all {link_sys} use the same batch size.\n\n\ - string "auto": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than 32.\n\n\ - string "auto:N": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than N.\n\n\ -- string "mixed:N": the batch data will be sampled from all systems and merged into a mixed system with the batch size N. Only support the se_atten descriptor for Tensorflow backend.\n\n\ +- string "mixed:N": the batch data will be sampled from all systems and merged into a mixed system with the batch size N. Only support the se_atten descriptor for TensorFlow backend.\n\n\ If MPI is used, the value should be considered as the batch size per task.' doc_auto_prob_style = 'Determine the probability of systems automatically. The method is assigned by this key and can be\n\n\ - "prob_uniform" : the probability all the systems are equal, namely 1.0/self.get_nsystems()\n\n\ diff --git a/doc/data/data-conv.md b/doc/data/data-conv.md index 7634daf5e6..56ce526480 100644 --- a/doc/data/data-conv.md +++ b/doc/data/data-conv.md @@ -4,7 +4,7 @@ Two binary formats, NumPy and HDF5, are supported for training. The raw format i ## NumPy format -In a system with the Numpy format, the system properties are stored as text files ending with `.raw`, such as `type.raw` and `type_map.raw`, under the system directory. If one needs to train a non-periodic system, an empty `nopbc` file should be put under the system directory. Both input and labeled frame properties are saved as the [NumPy binary data (NPY) files](https://numpy.org/doc/stable/reference/generated/numpy.lib.format.html#npy-format) ending with `.npy` in each of the `set.*` directories. Take an example, a system may contain the following files: +In a system with the NumPy format, the system properties are stored as text files ending with `.raw`, such as `type.raw` and `type_map.raw`, under the system directory. If one needs to train a non-periodic system, an empty `nopbc` file should be put under the system directory. Both input and labeled frame properties are saved as the [NumPy binary data (NPY) files](https://numpy.org/doc/stable/reference/generated/numpy.lib.format.html#npy-format) ending with `.npy` in each of the `set.*` directories. Take an example, a system may contain the following files: ``` type.raw @@ -38,7 +38,7 @@ For training models with descriptor `se_atten`, a [new system format](../model/t ## HDF5 format -A system with the HDF5 format has the same structure as the Numpy format, but in an HDF5 file, a system is organized as an [HDF5 group](https://docs.h5py.org/en/stable/high/group.html). The file name of a Numpy file is the key in an HDF5 file, and the data is the value of the key. One needs to use `#` in a DP path to divide the path to the HDF5 file and the HDF5 path: +A system with the HDF5 format has the same structure as the NumPy format, but in an HDF5 file, a system is organized as an [HDF5 group](https://docs.h5py.org/en/stable/high/group.html). The file name of a NumPy file is the key in an HDF5 file, and the data is the value of the key. One needs to use `#` in a DP path to divide the path to the HDF5 file and the HDF5 path: ``` /path/to/data.hdf5#/H2O @@ -79,4 +79,4 @@ $ ls box.raw coord.raw energy.raw force.raw set.000 set.001 set.002 type.raw virial.raw ``` -It generates three sets `set.000`, `set.001` and `set.002`, with each set containing 2000 frames in the Numpy format. +It generates three sets `set.000`, `set.001` and `set.002`, with each set containing 2000 frames in the NumPy format. diff --git a/doc/development/create-a-model-pt.md b/doc/development/create-a-model-pt.md index 288d7172da..9df4fdc055 100644 --- a/doc/development/create-a-model-pt.md +++ b/doc/development/create-a-model-pt.md @@ -199,6 +199,6 @@ When implementing an existing model in a new backend, directly apply the existin ### Consistent tests -When transferring features from another backend to the PyTorch backend, it is essential to include a regression test in `/source/tests/consistent` to validate the consistency of the PyTorch backend with other backends. Presently, the regression tests cover self-consistency and cross-backend consistency between TensorFlow, PyTorch, and DP (Numpy) through the serialization/deserialization technique. +When transferring features from another backend to the PyTorch backend, it is essential to include a regression test in `/source/tests/consistent` to validate the consistency of the PyTorch backend with other backends. Presently, the regression tests cover self-consistency and cross-backend consistency between TensorFlow, PyTorch, and DP (NumPy) through the serialization/deserialization technique. -During the development of new components within the PyTorch backend, it is necessary to provide a DP (Numpy) implementation and incorporate corresponding regression tests. For PyTorch components, developers are also required to include a unit test using `torch.jit`. +During the development of new components within the PyTorch backend, it is necessary to provide a DP (NumPy) implementation and incorporate corresponding regression tests. For PyTorch components, developers are also required to include a unit test using `torch.jit`. diff --git a/doc/getting-started/quick_start.ipynb b/doc/getting-started/quick_start.ipynb index 67462c91d4..d1c45ad0b8 100644 --- a/doc/getting-started/quick_start.ipynb +++ b/doc/getting-started/quick_start.ipynb @@ -58,7 +58,7 @@ "\n", "* Prepare the formataive dataset and running scripts for training with DeePMD-kit;\n", "* Train, freeze, and test DeePMD-kit models;\n", - "* Use DeePMD-kit in Lammps for calculations;\n", + "* Use DeePMD-kit in LAMMPS for calculations;\n", "\n", "Work through this tutorial. It will take you 20 minutes, max!" ] @@ -239,7 +239,7 @@ "\n", "Detailed information about ABACUS can be found in its [documentation](https://abacus.deepmodeling.com/en/latest/). \n", "\n", - "DeePMD-kit uses a compressed data format. All training data should first be converted into this format before they can be used in DeePMD-kit. This data format is explained in detail in the DeePMD-kit manual, which can be found on [DeePMD-kit's Github](http://www.github.com/deepmodeling/deepmd-kit).\n", + "DeePMD-kit uses a compressed data format. All training data should first be converted into this format before they can be used in DeePMD-kit. This data format is explained in detail in the DeePMD-kit manual, which can be found on [DeePMD-kit's GitHub](http://www.github.com/deepmodeling/deepmd-kit).\n", "\n", "We provide a convenient tool **dpdata**, which can convert data generated by VASP, CP2K, Gaussian, Quantum Espresso, ABACUS, and LAMMPS into DeePMD-kit's compressed format.\n", "\n", @@ -863,7 +863,7 @@ "DEEPMD INFO saved checkpoint model.ckpt\n", "```\n", "\n", - "They present the training and testing time counts. At the end of the 1000th batch, the model is saved in Tensorflow's checkpoint file `model.ckpt`. At the same time, the training and testing errors are presented in file `lcurve.out`. \n", + "They present the training and testing time counts. At the end of the 1000th batch, the model is saved in TensorFlow's checkpoint file `model.ckpt`. At the same time, the training and testing errors are presented in file `lcurve.out`. \n", "\n", "The file contains 8 columns, form left to right, are the training step, the validation loss, training loss, root mean square (RMS) validation error of energy, RMS training error of energy, RMS validation error of force, RMS training error of force and the learning rate. The RMS error (RMSE) of the energy is normalized by number of atoms in the system. \n", "```\n", diff --git a/doc/install/easy-install.md b/doc/install/easy-install.md index 8d3ec16e36..a0c6270287 100644 --- a/doc/install/easy-install.md +++ b/doc/install/easy-install.md @@ -126,7 +126,7 @@ pip install torch --index-url https://download.pytorch.org/whl/cpu pip install deepmd-kit[cpu] ``` -[The LAMMPS module](../third-party/lammps-command.md) and [the i-Pi driver](../third-party/ipi.md) are only provided on Linux and macOS for the TensorFlow backend. To install LAMMPS and/or i-Pi, add `lmp` and/or `ipi` to extras: +[The LAMMPS module](../third-party/lammps-command.md) and [the i-PI driver](../third-party/ipi.md) are only provided on Linux and macOS for the TensorFlow backend. To install LAMMPS and/or i-PI, add `lmp` and/or `ipi` to extras: ```bash pip install deepmd-kit[gpu,cu12,torch,lmp,ipi] diff --git a/doc/install/install-from-source.md b/doc/install/install-from-source.md index 9b2cf27be2..7323e82173 100644 --- a/doc/install/install-from-source.md +++ b/doc/install/install-from-source.md @@ -224,7 +224,7 @@ If you don't install Horovod, DeePMD-kit will fall back to serial mode. ## Install the C++ interface -If one does not need to use DeePMD-kit with Lammps or I-Pi, then the python interface installed in the previous section does everything and he/she can safely skip this section. +If one does not need to use DeePMD-kit with LAMMPS or i-PI, then the python interface installed in the previous section does everything and he/she can safely skip this section. ### Install Backends' C++ interface (optional) @@ -234,9 +234,9 @@ If one does not need to use DeePMD-kit with Lammps or I-Pi, then the python inte Since TensorFlow 2.12, TensorFlow C++ library (`libtensorflow_cc`) is packaged inside the Python library. Thus, you can skip building TensorFlow C++ library manually. If that does not work for you, you can still build it manually. -The C++ interface of DeePMD-kit was tested with compiler GCC >= 4.8. It is noticed that the I-Pi support is only compiled with GCC >= 4.8. Note that TensorFlow may have specific requirements for the compiler version. +The C++ interface of DeePMD-kit was tested with compiler GCC >= 4.8. It is noticed that the i-PI support is only compiled with GCC >= 4.8. Note that TensorFlow may have specific requirements for the compiler version. -First, the C++ interface of Tensorflow should be installed. It is noted that the version of Tensorflow should be consistent with the python interface. You may follow [the instruction](install-tf.2.12.md) or run the script `$deepmd_source_dir/source/install/build_tf.py` to install the corresponding C++ interface. +First, the C++ interface of TensorFlow should be installed. It is noted that the version of TensorFlow should be consistent with the python interface. You may follow [the instruction](install-tf.2.12.md) or run the script `$deepmd_source_dir/source/install/build_tf.py` to install the corresponding C++ interface. ::: diff --git a/doc/install/install-gromacs.md b/doc/install/install-gromacs.md index 147822cf17..90ed73841c 100644 --- a/doc/install/install-gromacs.md +++ b/doc/install/install-gromacs.md @@ -1,4 +1,4 @@ -# Install GROMACS with DeepMD +# Install GROMACS with DeePMD-kit Before following this section, [DeePMD-kit C++ interface](install-from-source.md) should have be installed. diff --git a/doc/third-party/gromacs.md b/doc/third-party/gromacs.md index 32531dcf7b..5c5132feab 100644 --- a/doc/third-party/gromacs.md +++ b/doc/third-party/gromacs.md @@ -83,7 +83,7 @@ For comparison, the original topology file generated by `acpype` will be: 4 1 5 1 1.0758e+02 3.2635e+02 ; H3 - C1 - H4 ``` -### DeepMD Settings +### DeePMD-kit Settings Before running simulations, we need to tell GROMACS to use DeepPotential by setting the environment variable `GMX_DEEPMD_INPUT_JSON`: @@ -119,7 +119,7 @@ Here is an explanation for these settings: ``` - `lambda`: Optional, default 1.0. Used in alchemical calculations. -- `pbc`: Optional, default true. If true, the GROMACS periodic condition is passed to DeepMD. +- `pbc`: Optional, default true. If true, the GROMACS periodic condition is passed to DeePMD-kit. ### Run Simulation @@ -136,7 +136,7 @@ HW 1 1.008 0.0000 A 0.00000e+00 0.00000e+00 OW 8 16.00 0.0000 A 0.00000e+00 0.00000e+00 ``` -As mentioned in the above section, `input.json` and relevant files (`index.raw`, `type.raw`) should also be created. Then, we can start the simulation under the NVT ensemble and plot the radial distribution function (RDF) by `gmx rdf` command. We can see that the RDF given by Gromacs+DP matches perfectly with Lammps+DP, which further provides an evidence on the validity of our simulation. +As mentioned in the above section, `input.json` and relevant files (`index.raw`, `type.raw`) should also be created. Then, we can start the simulation under the NVT ensemble and plot the radial distribution function (RDF) by `gmx rdf` command. We can see that the RDF given by Gromacs+DP matches perfectly with LAMMPS+DP, which further provides an evidence on the validity of our simulation. ![rdf](../../examples/water/gmx/rdf.png) However, we still recommend you run an all-atom DP simulation using LAMMPS since it is more stable and efficient. diff --git a/doc/train/multi-task-training-pt.md b/doc/train/multi-task-training-pt.md index e6fbe3cb10..bfbee4b281 100644 --- a/doc/train/multi-task-training-pt.md +++ b/doc/train/multi-task-training-pt.md @@ -16,7 +16,7 @@ For each dataset, a training task is defined as \min_{\boldsymbol \theta} L^{(t)} (\boldsymbol x^{(t)}; \boldsymbol \theta^{(t)}, \tau), \quad t=1, \dots, n_t. ``` -In the Pytorch implementation, during the multi-task training process, all tasks can share any portion of the model parameters. +In the PyTorch implementation, during the multi-task training process, all tasks can share any portion of the model parameters. A typical scenario is that each task shares the same descriptor with trainable parameters $\boldsymbol{\theta}_ {d}$, while each has its own fitting network with trainable parameters $\boldsymbol{\theta}_ f^{(t)}$, thus $\boldsymbol{\theta}^{(t)} = \{ \boldsymbol{\theta}_ {d} , \boldsymbol{\theta}_ {f}^{(t)} \}$. At each training step, a task will be randomly selected from ${1, \dots, n_t}$ according to the user-specified probability, diff --git a/doc/troubleshooting/installation.md b/doc/troubleshooting/installation.md index 1d18cc648b..48b06dbe0b 100644 --- a/doc/troubleshooting/installation.md +++ b/doc/troubleshooting/installation.md @@ -2,7 +2,7 @@ ## Inadequate versions of gcc/g++ -Sometimes you may use a gcc/g++ of version < 4.8. In this way, you can still compile all the parts of TensorFlow and most of the parts of DeePMD-kit, but i-Pi and GROMACS plugins will be disabled automatically. Or if you have a gcc/g++ of version > 4.8, say, 7.2.0, you may choose to use it by doing +Sometimes you may use a gcc/g++ of version < 4.8. In this way, you can still compile all the parts of TensorFlow and most of the parts of DeePMD-kit, but i-PI and GROMACS plugins will be disabled automatically. Or if you have a gcc/g++ of version > 4.8, say, 7.2.0, you may choose to use it by doing ```bash export CC=/path/to/gcc-7.2.0/bin/gcc diff --git a/source/cmake/Findtensorflow.cmake b/source/cmake/Findtensorflow.cmake index 6f288f2d2b..6321d4872b 100644 --- a/source/cmake/Findtensorflow.cmake +++ b/source/cmake/Findtensorflow.cmake @@ -3,7 +3,7 @@ # Output: TensorFlow_FOUND TensorFlow_INCLUDE_DIRS TensorFlow_LIBRARY # TensorFlow_LIBRARY_PATH TensorFlowFramework_LIBRARY # TensorFlowFramework_LIBRARY_PATH TENSORFLOW_LINK_LIBPYTHON : whether -# Tensorflow::tensorflow_cc links libpython +# TensorFlow::tensorflow_cc links libpython # # Target: TensorFlow::tensorflow_framework TensorFlow::tensorflow_cc diff --git a/source/gmx/dp_gmx_patch b/source/gmx/dp_gmx_patch index c9259b7ec7..4dacaea835 100644 --- a/source/gmx/dp_gmx_patch +++ b/source/gmx/dp_gmx_patch @@ -59,7 +59,7 @@ check_patched () { } dp_gmx_patch () { - echo "- Staring DeepMD patch program to GROMACS ${VERSION}" + echo "- Staring DeePMD-kit patch program to GROMACS ${VERSION}" echo "- Mode: patch" if [ ! -d $1 ]; then echo "- ERROR: invalid gromacs root: $1" @@ -86,7 +86,7 @@ dp_gmx_patch () { } dp_gmx_revert () { - echo "- Staring DeepMD patch program to GROMACS ${VERSION}" + echo "- Staring DeePMD-kit patch program to GROMACS ${VERSION}" echo "- Mode: revert" check_patched $1 if [ ! -d $1 ]; then diff --git a/source/gmx/patches/2020.2/CMakeLists.txt.patch.in b/source/gmx/patches/2020.2/CMakeLists.txt.patch.in index fb1115fd36..9dcbc8831c 100644 --- a/source/gmx/patches/2020.2/CMakeLists.txt.patch.in +++ b/source/gmx/patches/2020.2/CMakeLists.txt.patch.in @@ -4,8 +4,8 @@ # (i.e., something that is exposed in installed headers). set(GMX_PUBLIC_LIBRARIES "") -+# DeepMD -+message(STATUS "Compling with DeepMD...") ++# DeePMD-kit ++message(STATUS "Compling with DeePMD-kit...") +add_definitions(-w) # close warning +# define deepmd and tensorflow root +if (NOT DEFINED GMX_DEEPMD_ROOT) @@ -22,7 +22,7 @@ + +# add link libraries +list (APPEND GMX_PUBLIC_LIBRARIES deepmd_gromacs) -+# DeepMD ++# DeePMD-kit + ######################################################################## # Check and warn if cache generated on a different host is being reused diff --git a/source/gmx/patches/2020.2/src/gromacs/mdlib/sim_util.cpp.patch b/source/gmx/patches/2020.2/src/gromacs/mdlib/sim_util.cpp.patch index bb8468b8de..f87c421169 100644 --- a/source/gmx/patches/2020.2/src/gromacs/mdlib/sim_util.cpp.patch +++ b/source/gmx/patches/2020.2/src/gromacs/mdlib/sim_util.cpp.patch @@ -22,14 +22,14 @@ simulationWork.useGpuPmePpCommunication, false, wcycle); } -+ /* DeepMD */ ++ /* DeePMD-kit */ + double dener; + std::vector dforce; + if (useDeepmd) + { + if (DIM != 3) + { -+ gmx_fatal(FARGS, "DeepMD does not support DIM < 3."); ++ gmx_fatal(FARGS, "DeePMD-kit does not support DIM < 3."); + } + else + { diff --git a/source/install/build_tf.py b/source/install/build_tf.py index 5e988dd721..808a19dfae 100755 --- a/source/install/build_tf.py +++ b/source/install/build_tf.py @@ -444,7 +444,7 @@ def built(self): return (PREFIX / "bin" / "bazelisk").exists() -class BuildNumpy(Build): +class BuildNumPy(Build): """Build NumPy.""" @property @@ -614,7 +614,7 @@ def dependencies(self) -> Dict[str, Build]: optional_dep["rocm"] = BuildROCM() return { "bazelisk": BuildBazelisk(), - "numpy": BuildNumpy(), + "numpy": BuildNumPy(), **optional_dep, } @@ -865,7 +865,7 @@ def parse_args(args: Optional[List[str]] = None): takes arguments from sys.argv """ parser = argparse.ArgumentParser( - description="Installer of Tensorflow C++ Library.\n\n" + pretty_print_env(), + description="Installer of TensorFlow C++ Library.\n\n" + pretty_print_env(), formatter_class=RawTextArgumentDefaultsHelpFormatter, ) parser.add_argument( From e2fa8afbde17423f3c790bf7ce86c05b59b357ef Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Mon, 15 Jul 2024 18:52:59 -0400 Subject: [PATCH 2/2] Update source/gmx/patches/2020.2/CMakeLists.txt.patch.in Signed-off-by: Jinzhe Zeng --- source/gmx/patches/2020.2/CMakeLists.txt.patch.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/gmx/patches/2020.2/CMakeLists.txt.patch.in b/source/gmx/patches/2020.2/CMakeLists.txt.patch.in index 9dcbc8831c..2386745b95 100644 --- a/source/gmx/patches/2020.2/CMakeLists.txt.patch.in +++ b/source/gmx/patches/2020.2/CMakeLists.txt.patch.in @@ -5,7 +5,7 @@ set(GMX_PUBLIC_LIBRARIES "") +# DeePMD-kit -+message(STATUS "Compling with DeePMD-kit...") ++message(STATUS "Compiling with DeePMD-kit...") +add_definitions(-w) # close warning +# define deepmd and tensorflow root +if (NOT DEFINED GMX_DEEPMD_ROOT)