From d2054b641187583a21c05b5622c9058de5b53e18 Mon Sep 17 00:00:00 2001
From: zjgemi <liuxin_zijian@163.com>
Date: Mon, 30 Sep 2024 15:50:02 +0800
Subject: [PATCH 01/13] Support PIMD for LAMMPS

Signed-off-by: zjgemi <liuxin_zijian@163.com>
---
 dpgen2/constants.py                           |  2 +
 dpgen2/exploration/task/lmp/lmp_input.py      | 14 +++--
 .../task/lmp_template_task_group.py           | 19 +++++--
 .../task/make_task_group_from_config.py       | 16 ++++++
 dpgen2/exploration/task/npt_task_group.py     |  3 ++
 dpgen2/op/run_lmp.py                          | 17 ++++++
 tests/op/test_run_lmp.py                      | 52 +++++++++++++++++++
 7 files changed, 114 insertions(+), 9 deletions(-)

diff --git a/dpgen2/constants.py b/dpgen2/constants.py
index fe1bffbf..6d5d0197 100644
--- a/dpgen2/constants.py
+++ b/dpgen2/constants.py
@@ -12,8 +12,10 @@
 plm_input_name = "input.plumed"
 plm_output_name = "output.plumed"
 lmp_traj_name = "traj.dump"
+lmp_pimd_traj_name = "traj.%s.dump"
 lmp_log_name = "log.lammps"
 lmp_model_devi_name = "model_devi.out"
+lmp_pimd_model_devi_name = "model_devi.%s.out"
 fp_index_pattern = "%06d"
 fp_task_pattern = "task." + fp_index_pattern
 fp_default_log_name = "fp.log"
diff --git a/dpgen2/exploration/task/lmp/lmp_input.py b/dpgen2/exploration/task/lmp/lmp_input.py
index 5898000f..e9626154 100644
--- a/dpgen2/exploration/task/lmp/lmp_input.py
+++ b/dpgen2/exploration/task/lmp/lmp_input.py
@@ -12,6 +12,9 @@
 )
 
 from dpgen2.constants import (
+    lmp_model_devi_name,
+    lmp_pimd_model_devi_name,
+    lmp_pimd_traj_name,
     lmp_traj_name,
 )
 
@@ -48,6 +51,7 @@ def make_lmp_input(
     max_seed: int = 1000000,
     deepmd_version="2.0",
     trj_seperate_files=True,
+    pimd_bead: Optional[str] = None,
 ):
     if (ele_temp_f is not None or ele_temp_a is not None) and Version(
         deepmd_version
@@ -97,9 +101,10 @@ def make_lmp_input(
     graph_list = ""
     for ii in graphs:
         graph_list += ii + " "
+    model_devi_file_name = lmp_pimd_model_devi_name % pimd_bead if pimd_bead is not None else lmp_model_devi_name
     if Version(deepmd_version) < Version("1"):
         # 0.x
-        ret += "pair_style      deepmd %s ${THERMO_FREQ} model_devi.out\n" % graph_list
+        ret += "pair_style      deepmd %s ${THERMO_FREQ} %s\n" % (graph_list, model_devi_file_name)
     else:
         # 1.x
         keywords = ""
@@ -114,8 +119,8 @@ def make_lmp_input(
         if ele_temp_a is not None:
             keywords += "aparam ${ELE_TEMP}"
         ret += (
-            "pair_style      deepmd %s out_freq ${THERMO_FREQ} out_file model_devi.out %s\n"
-            % (graph_list, keywords)
+            "pair_style      deepmd %s out_freq ${THERMO_FREQ} out_file %s %s\n"
+            % (graph_list, model_devi_file_name, keywords)
         )
     ret += "pair_coeff      * *\n"
     ret += "\n"
@@ -124,9 +129,10 @@ def make_lmp_input(
     if trj_seperate_files:
         ret += "dump            1 all custom ${DUMP_FREQ} traj/*.lammpstrj id type x y z fx fy fz\n"
     else:
+        lmp_traj_file_name = lmp_pimd_traj_name % pimd_bead if pimd_bead is not None else lmp_traj_name
         ret += (
             "dump            1 all custom ${DUMP_FREQ} %s id type x y z fx fy fz\n"
-            % lmp_traj_name
+            % lmp_traj_file_name
         )
     ret += "restart         10000 dpgen.restart\n"
     ret += "\n"
diff --git a/dpgen2/exploration/task/lmp_template_task_group.py b/dpgen2/exploration/task/lmp_template_task_group.py
index b82e1695..26ad3b4a 100644
--- a/dpgen2/exploration/task/lmp_template_task_group.py
+++ b/dpgen2/exploration/task/lmp_template_task_group.py
@@ -11,6 +11,9 @@
 from dpgen2.constants import (
     lmp_conf_name,
     lmp_input_name,
+    lmp_model_devi_name,
+    lmp_pimd_model_devi_name,
+    lmp_pimd_traj_name,
     lmp_traj_name,
     model_name_pattern,
     plm_input_name,
@@ -44,11 +47,13 @@ def set_lmp(
         revisions: dict = {},
         traj_freq: int = 10,
         extra_pair_style_args: str = "",
+        pimd_bead: Optional[str] = None,
     ) -> None:
         self.lmp_template = Path(lmp_template_fname).read_text().split("\n")
         self.revisions = revisions
         self.traj_freq = traj_freq
         self.extra_pair_style_args = extra_pair_style_args
+        self.pimd_bead = pimd_bead
         self.lmp_set = True
         self.model_list = sorted([model_name_pattern % ii for ii in range(numb_models)])
         self.lmp_template = revise_lmp_input_model(
@@ -56,8 +61,9 @@ def set_lmp(
             self.model_list,
             self.traj_freq,
             self.extra_pair_style_args,
+            self.pimd_bead,
         )
-        self.lmp_template = revise_lmp_input_dump(self.lmp_template, self.traj_freq)
+        self.lmp_template = revise_lmp_input_dump(self.lmp_template, self.traj_freq, self.pimd_bead)
         if plm_template_fname is not None:
             self.plm_template = Path(plm_template_fname).read_text().split("\n")
             self.plm_set = True
@@ -144,27 +150,30 @@ def find_only_one_key(lmp_lines, key):
 
 
 def revise_lmp_input_model(
-    lmp_lines, task_model_list, trj_freq, extra_pair_style_args="", deepmd_version="1"
+    lmp_lines, task_model_list, trj_freq, extra_pair_style_args="", pimd_bead=None, deepmd_version="1"
 ):
     idx = find_only_one_key(lmp_lines, ["pair_style", "deepmd"])
     if extra_pair_style_args:
         extra_pair_style_args = " " + extra_pair_style_args
     graph_list = " ".join(task_model_list)
+    model_devi_file_name = lmp_pimd_model_devi_name % pimd_bead if pimd_bead is not None else lmp_model_devi_name
     lmp_lines[idx] = (
-        "pair_style      deepmd %s out_freq %d out_file model_devi.out%s"
+        "pair_style      deepmd %s out_freq %d out_file %s%s"
         % (
             graph_list,
             trj_freq,
+            model_devi_file_name,
             extra_pair_style_args,
         )
     )
     return lmp_lines
 
 
-def revise_lmp_input_dump(lmp_lines, trj_freq):
+def revise_lmp_input_dump(lmp_lines, trj_freq, pimd_bead=None):
     idx = find_only_one_key(lmp_lines, ["dump", "dpgen_dump"])
+    lmp_traj_file_name = lmp_pimd_traj_name % pimd_bead if pimd_bead is not None else lmp_traj_name
     lmp_lines[idx] = (
-        f"dump            dpgen_dump all custom %d {lmp_traj_name} id type x y z"
+        f"dump            dpgen_dump all custom %d {lmp_traj_file_name} id type x y z"
         % trj_freq
     )
     return lmp_lines
diff --git a/dpgen2/exploration/task/make_task_group_from_config.py b/dpgen2/exploration/task/make_task_group_from_config.py
index c467fd8e..3b793c58 100644
--- a/dpgen2/exploration/task/make_task_group_from_config.py
+++ b/dpgen2/exploration/task/make_task_group_from_config.py
@@ -47,6 +47,7 @@ def npt_task_group_args():
     doc_relative_v_epsilon = "Calculate relative virial model deviation"
     doc_ele_temp_f = "The electron temperature set by frame style"
     doc_ele_temp_a = "The electron temperature set by atomistic style"
+    doc_pimd_bead = "Bead index for PIMD, None for non-PIMD"
 
     return [
         Argument("conf_idx", list, optional=False, doc=doc_conf_idx, alias=["sys_idx"]),
@@ -108,6 +109,13 @@ def npt_task_group_args():
             default=None,
             doc=doc_ele_temp_a,
         ),
+        Argument(
+            "pimd_bead",
+            str,
+            optional=True,
+            default=None,
+            doc=doc_pimd_bead,
+        ),
     ]
 
 
@@ -117,6 +125,7 @@ def lmp_template_task_group_args():
     doc_revisions = "The revisions. Should be a dict providing the key - list of desired values pair. Key is the word to be replaced in the templates, and it may appear in both the lammps and plumed input templates. All values in the value list will be enmerated."
     doc_traj_freq = "The frequency of dumping configurations and thermodynamic states"
     doc_extra_pair_style_args = "The extra arguments for pair_style"
+    doc_pimd_bead = "Bead index for PIMD, None for non-PIMD"
 
     return [
         Argument("conf_idx", list, optional=False, doc=doc_conf_idx, alias=["sys_idx"]),
@@ -158,6 +167,13 @@ def lmp_template_task_group_args():
             default="",
             doc=doc_extra_pair_style_args,
         ),
+        Argument(
+            "pimd_bead",
+            str,
+            optional=True,
+            default=None,
+            doc=doc_pimd_bead,
+        ),
     ]
 
 
diff --git a/dpgen2/exploration/task/npt_task_group.py b/dpgen2/exploration/task/npt_task_group.py
index 4c999638..27c1e001 100644
--- a/dpgen2/exploration/task/npt_task_group.py
+++ b/dpgen2/exploration/task/npt_task_group.py
@@ -49,6 +49,7 @@ def set_md(
         relative_v_epsilon: Optional[float] = None,
         ele_temp_f: Optional[float] = None,
         ele_temp_a: Optional[float] = None,
+        pimd_bead: Optional[str] = None,
     ):
         """
         Set MD parameters
@@ -72,6 +73,7 @@ def set_md(
         self.ele_temp_f = ele_temp_f
         self.ele_temp_a = ele_temp_a
         self.md_set = True
+        self.pimd_bead = pimd_bead
 
     def make_task(
         self,
@@ -131,6 +133,7 @@ def _make_lmp_task(
                 self.ele_temp_a,
                 self.no_pbc,
                 trj_seperate_files=False,
+                pimd_bead=self.pimd_bead,
             ),
         )
         return task
diff --git a/dpgen2/op/run_lmp.py b/dpgen2/op/run_lmp.py
index 2822a325..3ff366c9 100644
--- a/dpgen2/op/run_lmp.py
+++ b/dpgen2/op/run_lmp.py
@@ -1,3 +1,4 @@
+import glob
 import json
 import logging
 import os
@@ -194,6 +195,7 @@ def execute(
                     with open("job.json", "w") as f:
                         json.dump(data, f, indent=4)
 
+        merge_pimd_files()
         ret_dict = {
             "log": work_dir / lmp_log_name,
             "traj": work_dir / lmp_traj_name,
@@ -356,3 +358,18 @@ def freeze_model(input_model, frozen_model, head=None):
             )
         )
         raise TransientError("freeze failed")
+
+
+def merge_pimd_files():
+    traj_files = glob.glob("traj.*.dump")
+    if len(traj_files) > 0:
+        with open(lmp_traj_name, "w") as f:
+            for traj_file in sorted(traj_files):
+                with open(traj_file, "r") as f2:
+                    f.write(f2.read())
+    model_devi_files = glob.glob("model_devi.*.out")
+    if len(model_devi_files) > 0:
+        with open(lmp_model_devi_name, "w") as f:
+            for model_devi_file in sorted(model_devi_files):
+                with open(model_devi_file, "r") as f2:
+                    f.write(f2.read())
diff --git a/tests/op/test_run_lmp.py b/tests/op/test_run_lmp.py
index b727fb76..1b1ac6cc 100644
--- a/tests/op/test_run_lmp.py
+++ b/tests/op/test_run_lmp.py
@@ -6,6 +6,7 @@
     Path,
 )
 
+import dpdata
 import numpy as np
 from dflow.python import (
     OP,
@@ -35,6 +36,7 @@
 from dpgen2.op.run_lmp import (
     RunLmp,
     get_ele_temp,
+    merge_pimd_files,
     set_models,
 )
 from dpgen2.utils import (
@@ -286,3 +288,53 @@ def test_get_ele_temp(self):
     def tearDown(self):
         if os.path.exists("log"):
             os.remove("log")
+
+
+class TestMergePIMDFiles(unittest.TestCase):
+    def test_merge_pimd_files(self):
+        for i in range(1, 3):
+            with open("traj.%s.dump" % i, "w") as f:
+                f.write("""ITEM: TIMESTEP
+0
+ITEM: NUMBER OF ATOMS
+3
+ITEM: BOX BOUNDS xy xz yz pp pp pp
+0.0000000000000000e+00 1.2444661140399999e+01 0.0000000000000000e+00
+0.0000000000000000e+00 1.2444661140399999e+01 0.0000000000000000e+00
+0.0000000000000000e+00 1.2444661140399999e+01 0.0000000000000000e+00
+ITEM: ATOMS id type x y z
+1 8 7.23489 0.826309 4.61669
+2 1 8.04419 0.520382 5.14395
+3 1 6.48126 0.446895 4.99766
+ITEM: TIMESTEP
+10
+ITEM: NUMBER OF ATOMS
+3
+ITEM: BOX BOUNDS xy xz yz pp pp pp
+0.0000000000000000e+00 1.2444661140399999e+01 0.0000000000000000e+00
+0.0000000000000000e+00 1.2444661140399999e+01 0.0000000000000000e+00
+0.0000000000000000e+00 1.2444661140399999e+01 0.0000000000000000e+00
+ITEM: ATOMS id type x y z
+1 8 7.23103 0.814939 4.59892
+2 1 7.96453 0.61699 5.19158
+3 1 6.43661 0.370311 5.09854
+""")
+        for i in range(1, 3):
+            with open("model_devi.%s.out" % i, "w") as f:
+                f.write("""#       step         max_devi_v         min_devi_v         avg_devi_v         max_devi_f         min_devi_f         avg_devi_f
+           0       9.023897e-17       3.548771e-17       5.237314e-17       8.196123e-16       1.225653e-16       3.941002e-16
+          10       1.081667e-16       4.141596e-17       7.534462e-17       9.070597e-16       1.067947e-16       4.153524e-16
+""")
+
+        merge_pimd_files()
+        self.assertTrue(os.path.exists(lmp_traj_name))
+        self.assertTrue(os.path.exists(lmp_model_devi_name))
+        s = dpdata.System(lmp_traj_name, fmt="lammps/dump")
+        assert len(s) == 4
+        model_devi = np.loadtxt(lmp_model_devi_name)
+        assert model_devi.shape[0] == 4
+
+    def tearDown(self):
+        for f in [lmp_traj_name, "traj.1.dump", "traj.2.dump", lmp_model_devi_name, "model_devi.1.out", "model_devi.2.out"]:
+            if os.path.exists(f):
+                os.remove(f)

From db0e57752d0e546e90d44a0e2d2e5c7a50190275 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 30 Sep 2024 07:50:54 +0000
Subject: [PATCH 02/13] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 dpgen2/exploration/task/lmp/lmp_input.py      | 22 ++++++++----
 .../task/lmp_template_task_group.py           | 34 ++++++++++++-------
 tests/op/test_run_lmp.py                      | 21 +++++++++---
 3 files changed, 54 insertions(+), 23 deletions(-)

diff --git a/dpgen2/exploration/task/lmp/lmp_input.py b/dpgen2/exploration/task/lmp/lmp_input.py
index e9626154..c2a22b60 100644
--- a/dpgen2/exploration/task/lmp/lmp_input.py
+++ b/dpgen2/exploration/task/lmp/lmp_input.py
@@ -101,10 +101,17 @@ def make_lmp_input(
     graph_list = ""
     for ii in graphs:
         graph_list += ii + " "
-    model_devi_file_name = lmp_pimd_model_devi_name % pimd_bead if pimd_bead is not None else lmp_model_devi_name
+    model_devi_file_name = (
+        lmp_pimd_model_devi_name % pimd_bead
+        if pimd_bead is not None
+        else lmp_model_devi_name
+    )
     if Version(deepmd_version) < Version("1"):
         # 0.x
-        ret += "pair_style      deepmd %s ${THERMO_FREQ} %s\n" % (graph_list, model_devi_file_name)
+        ret += "pair_style      deepmd %s ${THERMO_FREQ} %s\n" % (
+            graph_list,
+            model_devi_file_name,
+        )
     else:
         # 1.x
         keywords = ""
@@ -118,9 +125,10 @@ def make_lmp_input(
             keywords += "fparam ${ELE_TEMP}"
         if ele_temp_a is not None:
             keywords += "aparam ${ELE_TEMP}"
-        ret += (
-            "pair_style      deepmd %s out_freq ${THERMO_FREQ} out_file %s %s\n"
-            % (graph_list, model_devi_file_name, keywords)
+        ret += "pair_style      deepmd %s out_freq ${THERMO_FREQ} out_file %s %s\n" % (
+            graph_list,
+            model_devi_file_name,
+            keywords,
         )
     ret += "pair_coeff      * *\n"
     ret += "\n"
@@ -129,7 +137,9 @@ def make_lmp_input(
     if trj_seperate_files:
         ret += "dump            1 all custom ${DUMP_FREQ} traj/*.lammpstrj id type x y z fx fy fz\n"
     else:
-        lmp_traj_file_name = lmp_pimd_traj_name % pimd_bead if pimd_bead is not None else lmp_traj_name
+        lmp_traj_file_name = (
+            lmp_pimd_traj_name % pimd_bead if pimd_bead is not None else lmp_traj_name
+        )
         ret += (
             "dump            1 all custom ${DUMP_FREQ} %s id type x y z fx fy fz\n"
             % lmp_traj_file_name
diff --git a/dpgen2/exploration/task/lmp_template_task_group.py b/dpgen2/exploration/task/lmp_template_task_group.py
index 26ad3b4a..d0b362f5 100644
--- a/dpgen2/exploration/task/lmp_template_task_group.py
+++ b/dpgen2/exploration/task/lmp_template_task_group.py
@@ -63,7 +63,9 @@ def set_lmp(
             self.extra_pair_style_args,
             self.pimd_bead,
         )
-        self.lmp_template = revise_lmp_input_dump(self.lmp_template, self.traj_freq, self.pimd_bead)
+        self.lmp_template = revise_lmp_input_dump(
+            self.lmp_template, self.traj_freq, self.pimd_bead
+        )
         if plm_template_fname is not None:
             self.plm_template = Path(plm_template_fname).read_text().split("\n")
             self.plm_set = True
@@ -150,28 +152,36 @@ def find_only_one_key(lmp_lines, key):
 
 
 def revise_lmp_input_model(
-    lmp_lines, task_model_list, trj_freq, extra_pair_style_args="", pimd_bead=None, deepmd_version="1"
+    lmp_lines,
+    task_model_list,
+    trj_freq,
+    extra_pair_style_args="",
+    pimd_bead=None,
+    deepmd_version="1",
 ):
     idx = find_only_one_key(lmp_lines, ["pair_style", "deepmd"])
     if extra_pair_style_args:
         extra_pair_style_args = " " + extra_pair_style_args
     graph_list = " ".join(task_model_list)
-    model_devi_file_name = lmp_pimd_model_devi_name % pimd_bead if pimd_bead is not None else lmp_model_devi_name
-    lmp_lines[idx] = (
-        "pair_style      deepmd %s out_freq %d out_file %s%s"
-        % (
-            graph_list,
-            trj_freq,
-            model_devi_file_name,
-            extra_pair_style_args,
-        )
+    model_devi_file_name = (
+        lmp_pimd_model_devi_name % pimd_bead
+        if pimd_bead is not None
+        else lmp_model_devi_name
+    )
+    lmp_lines[idx] = "pair_style      deepmd %s out_freq %d out_file %s%s" % (
+        graph_list,
+        trj_freq,
+        model_devi_file_name,
+        extra_pair_style_args,
     )
     return lmp_lines
 
 
 def revise_lmp_input_dump(lmp_lines, trj_freq, pimd_bead=None):
     idx = find_only_one_key(lmp_lines, ["dump", "dpgen_dump"])
-    lmp_traj_file_name = lmp_pimd_traj_name % pimd_bead if pimd_bead is not None else lmp_traj_name
+    lmp_traj_file_name = (
+        lmp_pimd_traj_name % pimd_bead if pimd_bead is not None else lmp_traj_name
+    )
     lmp_lines[idx] = (
         f"dump            dpgen_dump all custom %d {lmp_traj_file_name} id type x y z"
         % trj_freq
diff --git a/tests/op/test_run_lmp.py b/tests/op/test_run_lmp.py
index 1b1ac6cc..5b7f4542 100644
--- a/tests/op/test_run_lmp.py
+++ b/tests/op/test_run_lmp.py
@@ -294,7 +294,8 @@ class TestMergePIMDFiles(unittest.TestCase):
     def test_merge_pimd_files(self):
         for i in range(1, 3):
             with open("traj.%s.dump" % i, "w") as f:
-                f.write("""ITEM: TIMESTEP
+                f.write(
+                    """ITEM: TIMESTEP
 0
 ITEM: NUMBER OF ATOMS
 3
@@ -318,13 +319,16 @@ def test_merge_pimd_files(self):
 1 8 7.23103 0.814939 4.59892
 2 1 7.96453 0.61699 5.19158
 3 1 6.43661 0.370311 5.09854
-""")
+"""
+                )
         for i in range(1, 3):
             with open("model_devi.%s.out" % i, "w") as f:
-                f.write("""#       step         max_devi_v         min_devi_v         avg_devi_v         max_devi_f         min_devi_f         avg_devi_f
+                f.write(
+                    """#       step         max_devi_v         min_devi_v         avg_devi_v         max_devi_f         min_devi_f         avg_devi_f
            0       9.023897e-17       3.548771e-17       5.237314e-17       8.196123e-16       1.225653e-16       3.941002e-16
           10       1.081667e-16       4.141596e-17       7.534462e-17       9.070597e-16       1.067947e-16       4.153524e-16
-""")
+"""
+                )
 
         merge_pimd_files()
         self.assertTrue(os.path.exists(lmp_traj_name))
@@ -335,6 +339,13 @@ def test_merge_pimd_files(self):
         assert model_devi.shape[0] == 4
 
     def tearDown(self):
-        for f in [lmp_traj_name, "traj.1.dump", "traj.2.dump", lmp_model_devi_name, "model_devi.1.out", "model_devi.2.out"]:
+        for f in [
+            lmp_traj_name,
+            "traj.1.dump",
+            "traj.2.dump",
+            lmp_model_devi_name,
+            "model_devi.1.out",
+            "model_devi.2.out",
+        ]:
             if os.path.exists(f):
                 os.remove(f)

From 02b6f97acd64c7d89c9a5d0c1a0fdead58e46e4a Mon Sep 17 00:00:00 2001
From: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com>
Date: Wed, 2 Oct 2024 15:19:14 +0800
Subject: [PATCH 03/13] Update
 dpgen2/exploration/task/lmp_template_task_group.py

Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
---
 dpgen2/exploration/task/lmp_template_task_group.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/dpgen2/exploration/task/lmp_template_task_group.py b/dpgen2/exploration/task/lmp_template_task_group.py
index d0b362f5..c1a4af4a 100644
--- a/dpgen2/exploration/task/lmp_template_task_group.py
+++ b/dpgen2/exploration/task/lmp_template_task_group.py
@@ -183,7 +183,9 @@ def revise_lmp_input_dump(lmp_lines, trj_freq, pimd_bead=None):
         lmp_pimd_traj_name % pimd_bead if pimd_bead is not None else lmp_traj_name
     )
     lmp_lines[idx] = (
-        f"dump            dpgen_dump all custom %d {lmp_traj_file_name} id type x y z"
+        lmp_lines[idx] = (
+            f"dump            dpgen_dump all custom {trj_freq} {lmp_traj_file_name} id type x y z"
+        )
         % trj_freq
     )
     return lmp_lines

From 750918c03d05ccea76a83b432e3e6484d812e7e3 Mon Sep 17 00:00:00 2001
From: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com>
Date: Wed, 2 Oct 2024 15:21:03 +0800
Subject: [PATCH 04/13] Update lmp_template_task_group.py

---
 dpgen2/exploration/task/lmp_template_task_group.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/dpgen2/exploration/task/lmp_template_task_group.py b/dpgen2/exploration/task/lmp_template_task_group.py
index c1a4af4a..37b1e475 100644
--- a/dpgen2/exploration/task/lmp_template_task_group.py
+++ b/dpgen2/exploration/task/lmp_template_task_group.py
@@ -183,10 +183,7 @@ def revise_lmp_input_dump(lmp_lines, trj_freq, pimd_bead=None):
         lmp_pimd_traj_name % pimd_bead if pimd_bead is not None else lmp_traj_name
     )
     lmp_lines[idx] = (
-        lmp_lines[idx] = (
-            f"dump            dpgen_dump all custom {trj_freq} {lmp_traj_file_name} id type x y z"
-        )
-        % trj_freq
+        f"dump            dpgen_dump all custom {trj_freq} {lmp_traj_file_name} id type x y z"
     )
     return lmp_lines
 

From 679035f8333ff9c053f26dfd13eb3743493fc2b5 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 2 Oct 2024 07:21:11 +0000
Subject: [PATCH 05/13] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 dpgen2/exploration/task/lmp_template_task_group.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/dpgen2/exploration/task/lmp_template_task_group.py b/dpgen2/exploration/task/lmp_template_task_group.py
index 37b1e475..1a44cb8e 100644
--- a/dpgen2/exploration/task/lmp_template_task_group.py
+++ b/dpgen2/exploration/task/lmp_template_task_group.py
@@ -182,9 +182,9 @@ def revise_lmp_input_dump(lmp_lines, trj_freq, pimd_bead=None):
     lmp_traj_file_name = (
         lmp_pimd_traj_name % pimd_bead if pimd_bead is not None else lmp_traj_name
     )
-    lmp_lines[idx] = (
-        f"dump            dpgen_dump all custom {trj_freq} {lmp_traj_file_name} id type x y z"
-    )
+    lmp_lines[
+        idx
+    ] = f"dump            dpgen_dump all custom {trj_freq} {lmp_traj_file_name} id type x y z"
     return lmp_lines
 
 

From 1ba5fff2b943ad5894b2f8161dcbe285eac9d9cd Mon Sep 17 00:00:00 2001
From: zjgemi <liuxin_zijian@163.com>
Date: Thu, 3 Oct 2024 10:59:56 +0800
Subject: [PATCH 06/13] add UT for LmpTemplateTaskGroup with pimd_bead

Signed-off-by: zjgemi <liuxin_zijian@163.com>
---
 .../exploration/test_lmp_templ_task_group.py  | 103 ++++++++++++++++++
 1 file changed, 103 insertions(+)

diff --git a/tests/exploration/test_lmp_templ_task_group.py b/tests/exploration/test_lmp_templ_task_group.py
index 5ef67f72..6211137d 100644
--- a/tests/exploration/test_lmp_templ_task_group.py
+++ b/tests/exploration/test_lmp_templ_task_group.py
@@ -197,6 +197,84 @@
 )
 
 
+in_lmp_pimd_template = textwrap.dedent(
+    """variable        NSTEPS          equal V_NSTEPS
+variable        THERMO_FREQ     equal 10
+variable        DUMP_FREQ       equal 10
+variable        TEMP            equal V_TEMP
+variable        PRES            equal 0.0
+variable        TAU_T           equal 0.100000
+variable        TAU_P           equal 0.500000
+variable        ibead           uloop 4 pad
+
+units           metal
+boundary        p p p
+atom_style      atomic
+
+neighbor        1.0 bin
+
+box             tilt large
+read_data       conf.lmp
+change_box      all triclinic
+mass            1 27.000000
+mass            2 24.000000
+
+pair_style      deepmd
+pair_coeff      * *
+
+thermo_style    custom step temp pe ke etotal press vol lx ly lz xy xz yz
+thermo          ${THERMO_FREQ}
+
+dump            dpgen_dump
+
+velocity        all create ${TEMP} 826513
+fix             1 all pimd/langevin ensemble npt integrator baoab temp ${TEMP} thermostat PILE_L 1234 tau ${TAU_T} iso ${PRES} barostat BZP taup ${TAU_P}
+
+timestep        0.001
+run             ${NSTEPS}
+"""
+)
+
+
+expected_lmp_pimd_template = textwrap.dedent(
+    """variable        NSTEPS          equal 1000
+variable        THERMO_FREQ     equal 10
+variable        DUMP_FREQ       equal 10
+variable        TEMP            equal 300
+variable        PRES            equal 0.0
+variable        TAU_T           equal 0.100000
+variable        TAU_P           equal 0.500000
+variable        ibead           uloop 4 pad
+
+units           metal
+boundary        p p p
+atom_style      atomic
+
+neighbor        1.0 bin
+
+box             tilt large
+read_data       conf.lmp
+change_box      all triclinic
+mass            1 27.000000
+mass            2 24.000000
+
+pair_style      deepmd model.000.pb model.001.pb model.002.pb model.003.pb out_freq 20 out_file model_devi.${ibead}.out
+pair_coeff      * *
+
+thermo_style    custom step temp pe ke etotal press vol lx ly lz xy xz yz
+thermo          ${THERMO_FREQ}
+
+dump            dpgen_dump all custom 20 traj.${ibead}.dump id type x y z
+
+velocity        all create ${TEMP} 826513
+fix             1 all pimd/langevin ensemble npt integrator baoab temp ${TEMP} thermostat PILE_L 1234 tau ${TAU_T} iso ${PRES} barostat BZP taup ${TAU_P}
+
+timestep        0.001
+run             ${NSTEPS}
+"""
+)
+
+
 class TestLmpTemplateTaskGroup(unittest.TestCase):
     def setUp(self):
         self.lmp_template_fname = Path("lmp.template")
@@ -215,11 +293,14 @@ def setUp(self):
         }
         self.rev_empty = {}
         self.traj_freq = 20
+        self.lmp_pimd_template_fname = Path("lmp.pimd.template")
+        self.lmp_pimd_template_fname.write_text(in_lmp_pimd_template)
 
     def tearDown(self):
         os.remove(self.lmp_template_fname)
         os.remove(self.lmp_plm_template_fname)
         os.remove(self.plm_template_fname)
+        os.remove(self.lmp_pimd_template_fname)
 
     def test_lmp(self):
         task_group = LmpTemplateTaskGroup()
@@ -333,3 +414,25 @@ def test_lmp_empty(self):
                 ee,
             )
             idx += 1
+
+    def test_lmp_pimd(self):
+        task_group = LmpTemplateTaskGroup()
+        task_group.set_conf(["foo"])
+        task_group.set_lmp(
+            self.numb_models,
+            self.lmp_pimd_template_fname,
+            revisions={"V_NSTEPS": [1000], "V_TEMP": [300]},
+            traj_freq=self.traj_freq,
+            pimd_bead="${ibead}",
+        )
+        task_group.make_task()
+        ngroup = len(task_group)
+        self.assertEqual(
+            ngroup,
+            1,
+        )
+        ee = expected_lmp_pimd_template.split("\n")
+        self.assertEqual(
+            task_group[0].files()[lmp_input_name].split("\n"),
+            ee,
+        )

From 697cb4f0462d32c07c7b225cd60c31e59eee28c0 Mon Sep 17 00:00:00 2001
From: zjgemi <liuxin_zijian@163.com>
Date: Tue, 8 Oct 2024 11:40:57 +0800
Subject: [PATCH 07/13] fix merge PIMD files out of workdir

Signed-off-by: zjgemi <liuxin_zijian@163.com>
---
 dpgen2/op/run_lmp.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/dpgen2/op/run_lmp.py b/dpgen2/op/run_lmp.py
index 3ff366c9..cca2acfd 100644
--- a/dpgen2/op/run_lmp.py
+++ b/dpgen2/op/run_lmp.py
@@ -195,7 +195,8 @@ def execute(
                     with open("job.json", "w") as f:
                         json.dump(data, f, indent=4)
 
-        merge_pimd_files()
+            merge_pimd_files()
+
         ret_dict = {
             "log": work_dir / lmp_log_name,
             "traj": work_dir / lmp_traj_name,

From 981d9afe5f603a766e41469e34531e019c68f2fd Mon Sep 17 00:00:00 2001
From: zjgemi <liuxin_zijian@163.com>
Date: Tue, 8 Oct 2024 14:30:16 +0800
Subject: [PATCH 08/13] fix list of None for optional_outputs

Signed-off-by: zjgemi <liuxin_zijian@163.com>
---
 dpgen2/op/select_confs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dpgen2/op/select_confs.py b/dpgen2/op/select_confs.py
index cddcca55..e8ba891d 100644
--- a/dpgen2/op/select_confs.py
+++ b/dpgen2/op/select_confs.py
@@ -126,7 +126,7 @@ def validate_trajs(
             if tt is not None and mm is not None:
                 rett.append(tt)
                 retm.append(mm)
-                if optional_outputs:
+                if optional_outputs and any(optional_outputs):
                     oo = optional_outputs[i]
                     if oo is not None:
                         reto.append(oo)

From 5f263d312255382de3b29320ea807c53a9eb6c20 Mon Sep 17 00:00:00 2001
From: zjgemi <liuxin_zijian@163.com>
Date: Sun, 13 Oct 2024 13:23:20 +0800
Subject: [PATCH 09/13] Fix None when handling electronic temperature

Signed-off-by: zjgemi <liuxin_zijian@163.com>
---
 dpgen2/op/collect_data.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dpgen2/op/collect_data.py b/dpgen2/op/collect_data.py
index b1e057e5..68f397ff 100644
--- a/dpgen2/op/collect_data.py
+++ b/dpgen2/op/collect_data.py
@@ -95,9 +95,9 @@ def execute(
 
         ms = dpdata.MultiSystems(type_map=type_map)
         for ii in labeled_data:
-            if len(list(ii.rglob("fparam.npy"))) > 0:
+            if ii and len(list(ii.rglob("fparam.npy"))) > 0:
                 setup_ele_temp(False)
-            if len(list(ii.rglob("aparam.npy"))) > 0:
+            if ii and len(list(ii.rglob("aparam.npy"))) > 0:
                 setup_ele_temp(True)
             ss = dpdata.LabeledSystem(ii, fmt="deepmd/npy")
             ms.append(ss)

From 7c36020d403284ea9ff98e4936ed8d86e401e2b2 Mon Sep 17 00:00:00 2001
From: zjgemi <liuxin_zijian@163.com>
Date: Mon, 14 Oct 2024 15:41:06 +0800
Subject: [PATCH 10/13] add split_last_iter_valid_ratio

Signed-off-by: zjgemi <liuxin_zijian@163.com>
---
 dpgen2/op/run_dp_train.py | 85 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 85 insertions(+)

diff --git a/dpgen2/op/run_dp_train.py b/dpgen2/op/run_dp_train.py
index dccbc518..2f066ad3 100644
--- a/dpgen2/op/run_dp_train.py
+++ b/dpgen2/op/run_dp_train.py
@@ -1,7 +1,9 @@
 import glob
 import json
 import logging
+import math
 import os
+import random
 import shutil
 from pathlib import (
     Path,
@@ -197,6 +199,10 @@ def execute(
         valid_data = ip["valid_data"]
         iter_data_old_exp = _expand_all_multi_sys_to_sys(iter_data[:-1])
         iter_data_new_exp = _expand_all_multi_sys_to_sys(iter_data[-1:])
+        if config["split_last_iter_valid_ratio"] is not None:
+            train_systems, valid_systems = split_valid(iter_data_new_exp, config["split_last_iter_valid_ratio"])
+            iter_data_new_exp = train_systems
+            valid_data = append_valid_data(config, valid_data, valid_systems)
         iter_data_exp = iter_data_old_exp + iter_data_new_exp
         work_dir = Path(task_name)
         init_model_with_finetune = config["init_model_with_finetune"]
@@ -517,6 +523,7 @@ def training_args():
         doc_head = "Head to use in the multitask training"
         doc_init_model_with_finetune = "Use finetune for init model"
         doc_train_args = "Extra arguments for dp train"
+        doc_split_last_iter_valid_ratio = "Ratio of valid data if split data of last iter"
         return [
             Argument(
                 "command",
@@ -618,6 +625,13 @@ def training_args():
                 default="",
                 doc=doc_train_args,
             ),
+            Argument(
+                "split_last_iter_valid_ratio",
+                float,
+                optional=True,
+                default=None,
+                doc=doc_split_last_iter_valid_ratio,
+            ),
         ]
 
     @staticmethod
@@ -672,4 +686,75 @@ def _expand_all_multi_sys_to_sys(list_multi_sys):
     return all_sys_dirs
 
 
+def split_valid(systems: List[str], valid_ratio: float):
+    train_systems = []
+    valid_systems = []
+    for system in systems:
+        d = dpdata.MultiSystems()
+        mixed_type = len(glob.glob("%s/*/real_atom_types.npy" % system)) > 0
+        if mixed_type:
+            d.load_systems_from_file(system, fmt="deepmd/npy/mixed")
+        else:
+            k = dpdata.LabeledSystem(system, fmt="deepmd/npy")
+            d.append(k)
+
+        train_multi_systems = dpdata.MultiSystems()
+        valid_multi_systems = dpdata.MultiSystems()
+        for s in d:
+            nvalid = math.floor(len(s)*valid_ratio)
+            if random.random() < len(s)*valid_ratio - nvalid:
+                nvalid += 1
+            valid_indices = random.sample(range(len(s)), nvalid)
+            train_indices = list(set(range(len(s))).difference(valid_indices))
+            if len(valid_indices) > 0:
+                valid_multi_systems.append(s.sub_system(valid_indices))
+            if len(train_indices) > 0:
+                train_multi_systems.append(s.sub_system(train_indices))
+
+        if len(train_multi_systems) > 0:
+            target = "train_data/" + system
+            if mixed_type:
+                # The multisystem is loaded from one dir, thus we can safely keep one dir
+                train_multi_systems.to_deepmd_npy_mixed("%s.tmp" % target)
+                fs = os.listdir("%s.tmp" % target)
+                assert len(fs) == 1
+                os.rename(os.path.join("%s.tmp" % target, fs[0]), target)
+                os.rmdir("%s.tmp" % target)
+            else:
+                train_multi_systems[0].to_deepmd_npy(target)
+            train_systems.append(target)
+
+        if len(valid_multi_systems) > 0:
+            target = "valid_data/" + system
+            if mixed_type:
+                # The multisystem is loaded from one dir, thus we can safely keep one dir
+                valid_multi_systems.to_deepmd_npy_mixed("%s.tmp" % target)
+                fs = os.listdir("%s.tmp" % target)
+                assert len(fs) == 1
+                os.rename(os.path.join("%s.tmp" % target, fs[0]), target)
+                os.rmdir("%s.tmp" % target)
+            else:
+                valid_multi_systems[0].to_deepmd_npy(target)
+            valid_systems.append(target)
+
+    return train_systems, valid_systems
+
+
+def append_valid_data(config, valid_data, valid_systems):
+    if not valid_systems:
+        return valid_data
+    if config["multitask"]:
+        head = config["head"]
+        if not valid_data:
+            valid_data = {}
+        if head not in valid_data:
+            valid_data[head] = []
+        valid_data[head] += valid_systems
+    else:
+        if not valid_data:
+            valid_data = []
+        valid_data += valid_systems
+    return valid_data
+
+
 config_args = RunDPTrain.training_args

From e3d03ce37ac08492db5c21f868179a3e4ef743e5 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 14 Oct 2024 07:41:25 +0000
Subject: [PATCH 11/13] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 dpgen2/op/run_dp_train.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/dpgen2/op/run_dp_train.py b/dpgen2/op/run_dp_train.py
index 2f066ad3..29468e53 100644
--- a/dpgen2/op/run_dp_train.py
+++ b/dpgen2/op/run_dp_train.py
@@ -200,7 +200,9 @@ def execute(
         iter_data_old_exp = _expand_all_multi_sys_to_sys(iter_data[:-1])
         iter_data_new_exp = _expand_all_multi_sys_to_sys(iter_data[-1:])
         if config["split_last_iter_valid_ratio"] is not None:
-            train_systems, valid_systems = split_valid(iter_data_new_exp, config["split_last_iter_valid_ratio"])
+            train_systems, valid_systems = split_valid(
+                iter_data_new_exp, config["split_last_iter_valid_ratio"]
+            )
             iter_data_new_exp = train_systems
             valid_data = append_valid_data(config, valid_data, valid_systems)
         iter_data_exp = iter_data_old_exp + iter_data_new_exp
@@ -523,7 +525,9 @@ def training_args():
         doc_head = "Head to use in the multitask training"
         doc_init_model_with_finetune = "Use finetune for init model"
         doc_train_args = "Extra arguments for dp train"
-        doc_split_last_iter_valid_ratio = "Ratio of valid data if split data of last iter"
+        doc_split_last_iter_valid_ratio = (
+            "Ratio of valid data if split data of last iter"
+        )
         return [
             Argument(
                 "command",
@@ -701,8 +705,8 @@ def split_valid(systems: List[str], valid_ratio: float):
         train_multi_systems = dpdata.MultiSystems()
         valid_multi_systems = dpdata.MultiSystems()
         for s in d:
-            nvalid = math.floor(len(s)*valid_ratio)
-            if random.random() < len(s)*valid_ratio - nvalid:
+            nvalid = math.floor(len(s) * valid_ratio)
+            if random.random() < len(s) * valid_ratio - nvalid:
                 nvalid += 1
             valid_indices = random.sample(range(len(s)), nvalid)
             train_indices = list(set(range(len(s))).difference(valid_indices))

From ac0b179fe03917692cba58c363fbb13854fe61dd Mon Sep 17 00:00:00 2001
From: zjgemi <liuxin_zijian@163.com>
Date: Mon, 14 Oct 2024 15:43:24 +0800
Subject: [PATCH 12/13] Revert "[pre-commit.ci] auto fixes from pre-commit.com
 hooks"

This reverts commit e3d03ce37ac08492db5c21f868179a3e4ef743e5.
---
 dpgen2/op/run_dp_train.py | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/dpgen2/op/run_dp_train.py b/dpgen2/op/run_dp_train.py
index 29468e53..2f066ad3 100644
--- a/dpgen2/op/run_dp_train.py
+++ b/dpgen2/op/run_dp_train.py
@@ -200,9 +200,7 @@ def execute(
         iter_data_old_exp = _expand_all_multi_sys_to_sys(iter_data[:-1])
         iter_data_new_exp = _expand_all_multi_sys_to_sys(iter_data[-1:])
         if config["split_last_iter_valid_ratio"] is not None:
-            train_systems, valid_systems = split_valid(
-                iter_data_new_exp, config["split_last_iter_valid_ratio"]
-            )
+            train_systems, valid_systems = split_valid(iter_data_new_exp, config["split_last_iter_valid_ratio"])
             iter_data_new_exp = train_systems
             valid_data = append_valid_data(config, valid_data, valid_systems)
         iter_data_exp = iter_data_old_exp + iter_data_new_exp
@@ -525,9 +523,7 @@ def training_args():
         doc_head = "Head to use in the multitask training"
         doc_init_model_with_finetune = "Use finetune for init model"
         doc_train_args = "Extra arguments for dp train"
-        doc_split_last_iter_valid_ratio = (
-            "Ratio of valid data if split data of last iter"
-        )
+        doc_split_last_iter_valid_ratio = "Ratio of valid data if split data of last iter"
         return [
             Argument(
                 "command",
@@ -705,8 +701,8 @@ def split_valid(systems: List[str], valid_ratio: float):
         train_multi_systems = dpdata.MultiSystems()
         valid_multi_systems = dpdata.MultiSystems()
         for s in d:
-            nvalid = math.floor(len(s) * valid_ratio)
-            if random.random() < len(s) * valid_ratio - nvalid:
+            nvalid = math.floor(len(s)*valid_ratio)
+            if random.random() < len(s)*valid_ratio - nvalid:
                 nvalid += 1
             valid_indices = random.sample(range(len(s)), nvalid)
             train_indices = list(set(range(len(s))).difference(valid_indices))

From 041f9e16c8e2576c105e3713aabba734c7b036c0 Mon Sep 17 00:00:00 2001
From: zjgemi <liuxin_zijian@163.com>
Date: Mon, 14 Oct 2024 15:43:33 +0800
Subject: [PATCH 13/13] Revert "add split_last_iter_valid_ratio"

This reverts commit 7c36020d403284ea9ff98e4936ed8d86e401e2b2.
---
 dpgen2/op/run_dp_train.py | 85 ---------------------------------------
 1 file changed, 85 deletions(-)

diff --git a/dpgen2/op/run_dp_train.py b/dpgen2/op/run_dp_train.py
index 2f066ad3..dccbc518 100644
--- a/dpgen2/op/run_dp_train.py
+++ b/dpgen2/op/run_dp_train.py
@@ -1,9 +1,7 @@
 import glob
 import json
 import logging
-import math
 import os
-import random
 import shutil
 from pathlib import (
     Path,
@@ -199,10 +197,6 @@ def execute(
         valid_data = ip["valid_data"]
         iter_data_old_exp = _expand_all_multi_sys_to_sys(iter_data[:-1])
         iter_data_new_exp = _expand_all_multi_sys_to_sys(iter_data[-1:])
-        if config["split_last_iter_valid_ratio"] is not None:
-            train_systems, valid_systems = split_valid(iter_data_new_exp, config["split_last_iter_valid_ratio"])
-            iter_data_new_exp = train_systems
-            valid_data = append_valid_data(config, valid_data, valid_systems)
         iter_data_exp = iter_data_old_exp + iter_data_new_exp
         work_dir = Path(task_name)
         init_model_with_finetune = config["init_model_with_finetune"]
@@ -523,7 +517,6 @@ def training_args():
         doc_head = "Head to use in the multitask training"
         doc_init_model_with_finetune = "Use finetune for init model"
         doc_train_args = "Extra arguments for dp train"
-        doc_split_last_iter_valid_ratio = "Ratio of valid data if split data of last iter"
         return [
             Argument(
                 "command",
@@ -625,13 +618,6 @@ def training_args():
                 default="",
                 doc=doc_train_args,
             ),
-            Argument(
-                "split_last_iter_valid_ratio",
-                float,
-                optional=True,
-                default=None,
-                doc=doc_split_last_iter_valid_ratio,
-            ),
         ]
 
     @staticmethod
@@ -686,75 +672,4 @@ def _expand_all_multi_sys_to_sys(list_multi_sys):
     return all_sys_dirs
 
 
-def split_valid(systems: List[str], valid_ratio: float):
-    train_systems = []
-    valid_systems = []
-    for system in systems:
-        d = dpdata.MultiSystems()
-        mixed_type = len(glob.glob("%s/*/real_atom_types.npy" % system)) > 0
-        if mixed_type:
-            d.load_systems_from_file(system, fmt="deepmd/npy/mixed")
-        else:
-            k = dpdata.LabeledSystem(system, fmt="deepmd/npy")
-            d.append(k)
-
-        train_multi_systems = dpdata.MultiSystems()
-        valid_multi_systems = dpdata.MultiSystems()
-        for s in d:
-            nvalid = math.floor(len(s)*valid_ratio)
-            if random.random() < len(s)*valid_ratio - nvalid:
-                nvalid += 1
-            valid_indices = random.sample(range(len(s)), nvalid)
-            train_indices = list(set(range(len(s))).difference(valid_indices))
-            if len(valid_indices) > 0:
-                valid_multi_systems.append(s.sub_system(valid_indices))
-            if len(train_indices) > 0:
-                train_multi_systems.append(s.sub_system(train_indices))
-
-        if len(train_multi_systems) > 0:
-            target = "train_data/" + system
-            if mixed_type:
-                # The multisystem is loaded from one dir, thus we can safely keep one dir
-                train_multi_systems.to_deepmd_npy_mixed("%s.tmp" % target)
-                fs = os.listdir("%s.tmp" % target)
-                assert len(fs) == 1
-                os.rename(os.path.join("%s.tmp" % target, fs[0]), target)
-                os.rmdir("%s.tmp" % target)
-            else:
-                train_multi_systems[0].to_deepmd_npy(target)
-            train_systems.append(target)
-
-        if len(valid_multi_systems) > 0:
-            target = "valid_data/" + system
-            if mixed_type:
-                # The multisystem is loaded from one dir, thus we can safely keep one dir
-                valid_multi_systems.to_deepmd_npy_mixed("%s.tmp" % target)
-                fs = os.listdir("%s.tmp" % target)
-                assert len(fs) == 1
-                os.rename(os.path.join("%s.tmp" % target, fs[0]), target)
-                os.rmdir("%s.tmp" % target)
-            else:
-                valid_multi_systems[0].to_deepmd_npy(target)
-            valid_systems.append(target)
-
-    return train_systems, valid_systems
-
-
-def append_valid_data(config, valid_data, valid_systems):
-    if not valid_systems:
-        return valid_data
-    if config["multitask"]:
-        head = config["head"]
-        if not valid_data:
-            valid_data = {}
-        if head not in valid_data:
-            valid_data[head] = []
-        valid_data[head] += valid_systems
-    else:
-        if not valid_data:
-            valid_data = []
-        valid_data += valid_systems
-    return valid_data
-
-
 config_args = RunDPTrain.training_args