From 2f01b0514d98eda2211bfc483861ba07da43a471 Mon Sep 17 00:00:00 2001 From: Izabel Wang <37297881+IzabelWang@users.noreply.github.com> Date: Tue, 23 Apr 2019 11:24:09 +0800 Subject: [PATCH] v2.1/tools: update TiDB-Binlog (#1059) * v2.1/tools: update TiDB-Binlog * Update tidb-binlog-cluster.md Address a commit. --- v2.1/tools/tidb-binlog-cluster.md | 101 ++++++++++++++++++++++-------- 1 file changed, 75 insertions(+), 26 deletions(-) diff --git a/v2.1/tools/tidb-binlog-cluster.md b/v2.1/tools/tidb-binlog-cluster.md index cad2afcefc437..89037dbbda12e 100755 --- a/v2.1/tools/tidb-binlog-cluster.md +++ b/v2.1/tools/tidb-binlog-cluster.md @@ -142,9 +142,39 @@ It is recommended to deploy TiDB-Binlog using TiDB-Ansible. If you just want to pump3 ansible_host=172.16.10.74 deploy_dir=/data1/pump ``` -2. Deploy and start the TiDB cluster. +2. Deploy and start the TiDB cluster with Pump components. - For how to use Ansible to deploy the TiDB cluster, see [Deploy TiDB Using Ansible](../op-guide/ansible-deployment.md). When Binlog is enabled, Pump is deployed and started by default. + After configuring the `inventory.ini` file as described above, choose one of the following two methods for deployment. + + **Method 1**: Adding a Pump component to an existing TiDB cluster requires step-by-step steps as follows. + + 1. Deploy pump_servers and node_exporters + + ``` + Ansible-playbook deploy.yml -l ${pump1_ip}, ${pump2_ip}, [${alias1_name}, ${alias2_name}] + ``` + + 2. Start pump_servers + + ``` + Ansible-playbook start.yml --tags=pump + ``` + + 3. Update and restart tidb_servers + + ``` + Ansible-playbook rolling_update.yml --tags=tidb + ``` + + 4. Update monitoring information + + ``` + Ansible-playbook rolling_update_monitor.yml --tags=prometheus + ``` + + **Method 2**: Deploy a TiDB cluster with Pump components from scratch + + For how to use Ansible to deploy the TiDB cluster, see [Deploy TiDB Using Ansible](../op-guide/ansible-deployment.md). 3. Check the Pump status. @@ -153,9 +183,10 @@ It is recommended to deploy TiDB-Binlog using TiDB-Ansible. If you just want to ```bash $ cd /home/tidb/tidb-ansible $ resources/bin/binlogctl -pd-urls=http://172.16.10.72:2379 -cmd pumps - 2018/09/21 16:45:54 nodes.go:46: [info] pump: &{NodeID:ip-172-16-10-72:8250 Addr:172.16.10.72:8250 State:online IsAlive:false Score:0 Label: MaxCommitTS:0 UpdateTS:403051525690884099} - 2018/09/21 16:45:54 nodes.go:46: [info] pump: &{NodeID:ip-172-16-10-73:8250 Addr:172.16.10.73:8250 State:online IsAlive:false Score:0 Label: MaxCommitTS:0 UpdateTS:403051525703991299} - 2018/09/21 16:45:54 nodes.go:46: [info] pump: &{NodeID:ip-172-16-10-74:8250 Addr:172.16.10.74:8250 State:online IsAlive:false Score:0 Label: MaxCommitTS:0 UpdateTS:403051525717360643} + + INFO[0000] pump: {NodeID: ip-172-16-10-72:8250, Addr: 172.16.10.72:8250, State: online, MaxCommitTS: 403051525690884099, UpdateTime: 2018-12-25 14:23:37 +0800 CST} + INFO[0000] pump: {NodeID: ip-172-16-10-73:8250, Addr: 172.16.10.73:8250, State: online, MaxCommitTS: 403051525703991299, UpdateTime: 2018-12-25 14:23:36 +0800 CST} + INFO[0000] pump: {NodeID: ip-172-16-10-74:8250, Addr: 172.16.10.74:8250, State: online, MaxCommitTS: 403051525717360643, UpdateTime: 2018-12-25 14:23:35 +0800 CST} ``` #### Step 3: Deploy Drainer @@ -173,12 +204,10 @@ It is recommended to deploy TiDB-Binlog using TiDB-Ansible. If you just want to 2018/06/21 11:24:47 meta.go:117: [info] meta: &{CommitTS:400962745252184065} ``` - After this command is executed, a file named `{data-dir}/savepoint` is generated. This file contains `tso`, whose value is used as the value of the `initial-commit-ts` parameter needed for the initial start of Drainer. + After this command is executed, `meta: &{CommitTS:400962745252184065}` is generated. The value of CommitTS is used as the value of the `initial-commit-ts` parameter needed for the initial start of Drainer. 2. Back up and restore all the data. - If the downstream is MySQL/TiDB, to guarantee the data integrity, you need to make a full backup and restore of the data before Drainer starts (about 10 minutes after Pump starts to run). - It is recommended to use [mydumper](../tools/mydumper.md) to make a full backup of TiDB and then use [Loader](../tools/loader.md) to export the data to the downstream. For more details, see [Backup and Restore](../op-guide/backup-restore.md). 3. Modify the `tidb-ansible/inventory.ini` file. @@ -250,7 +279,7 @@ It is recommended to deploy TiDB-Binlog using TiDB-Ansible. If you just want to [syncer.to] compression = "" # default data directory: "{{ deploy_dir }}/data.drainer" - # dir = "data.drainer" + dir = "data.drainer" ``` 5. Deploy Drainer. @@ -363,6 +392,10 @@ The following part shows how to use Pump and Drainer based on the nodes above. # the address of the PD cluster nodes pd-urls = "http://192.168.0.16:2379,http://192.168.0.15:2379,http://192.168.0.14:2379" + + # [storage] + # Set to true (by default) to ensure reliability. Make sure the binlog data is updated to the disk + # sync-log = true ``` - The example of starting Pump: @@ -445,19 +478,20 @@ The following part shows how to use Pump and Drainer based on the nodes above. # the directory of the log file log-file = "drainer.log" + # Drainer compress data when the binlog is obtained form Pump.The value can be "gzip". If it is not configured, it will not be compressed. + # compressor = "gzip" + # Syncer Configuration [syncer] + # If this item is set, it will use its sql-mode to analyze DDL statements + # sql-mode = "STRICT_TRANS_TABLES,NO_ENGINE_SUBSTITUTION" - # the db filter list ("INFORMATION_SCHEMA,PERFORMANCE_SCHEMA,mysql,test" by default) - # Does not support the Rename DDL operation on tables of `ignore schemas`. - ignore-schemas = "INFORMATION_SCHEMA,PERFORMANCE_SCHEMA,mysql" - - # the number of SQL statements of a transaction which are output to the downstream database (1 by default) - txn-batch = 1 + # the number of SQL statements of a transaction which are output to the downstream database (20 by default) + txn-batch = 20 # the number of the concurrency of the downstream for synchronization. The bigger the value, - # the better throughput performance of the concurrency (1 by default) - worker-count = 1 + # the better throughput performance of the concurrency (16 by default) + worker-count = 16 # whether to disable the SQL feature of splitting a single binlog file. If it is set to "true", # each binlog file is restored to a single transaction for synchronization based on the order of binlogs. @@ -468,6 +502,10 @@ The following part shows how to use Pump and Drainer based on the nodes above. # Valid value: "mysql", "kafka", "pb", "flash" db-type = "mysql" + # the db filter list ("INFORMATION_SCHEMA,PERFORMANCE_SCHEMA,mysql,test" by default) + # Does not support the Rename DDL operation on tables of `ignore schemas`. + ignore-schemas = "INFORMATION_SCHEMA,PERFORMANCE_SCHEMA,mysql" + # `replicate-do-db` has priority over `replicate-do-table`. When they have the same `db` name, # regular expressions are supported for configuration. # The regular expression should start with "~". @@ -482,6 +520,11 @@ The following part shows how to use Pump and Drainer based on the nodes above. # db-name ="test" # tbl-name = "~^a.*" + # Ignore the replication of some tables + # [[syncer.ignore-table]] + # db-name = "test" + # tbl-name = "log" + # the server parameters of the downstream database when `db-type` is set to "mysql" [syncer.to] host = "192.168.0.13" @@ -498,6 +541,10 @@ The following part shows how to use Pump and Drainer based on the nodes above. # zookeeper-addrs = "127.0.0.1:2181" # kafka-addrs = "127.0.0.1:9092" # kafka-version = "0.8.2.0" + + # the topic name of the Kafka cluster that saves the binlog data. The default value is _obinlog + # To run multiple Drainers to replicate data to the same Kafka cluster, you need to set different `topic-name`s for each Drainer. + # topic-name = "" ``` - The example of starting Drainer: @@ -590,29 +637,31 @@ Usage of binlogctl: Command example: - Check the state of all the Pumps or Drainers: - + Set 'cmd' to 'pumps' or 'drainers' to check the state of all the Pumps or Drainers. For example: ```bash - bin/binlogctl -pd-urls=http://127.0.0.1:2379 -cmd pumps/drainers + bin/binlogctl -pd-urls=http://127.0.0.1:2379 -cmd pumps - 2018/12/18 03:17:09 nodes.go:46: [info] pump: &{NodeID:1.1.1.1:8250 Addr:pump:8250 State:online IsAlive:false Score:0 Label: MaxCommitTS:405039487358599169 UpdateTS:405027205608112129} - ``` + INFO[0000] pump: {NodeID: ip-172-16-30-67:8250, Addr: 172.16.30.192:8250, State: online, MaxCommitTS: 405197570529820673, UpdateTime: 2018-12-25 14:23:37 +0800 CST} - > **Note:** Currently, the `IsAlive`, `Score` and `Label` fields are not used, so you do not need to pay attention to them. + ``` - Modify the Pump/Drainer state: - ```bash - The Pump/Drainer states include `online`, `pausing`, `paused`, `closing` and `offline`. + Set 'cmd' to 'update-pump' or 'update-drainer' to update the states of Pump or Drainer. + + The Pump/Drainer states include `online`, `pausing`, `paused`, `closing` and `offline`. For example: - bin/binlogctl -pd-urls=http://127.0.0.1:2379 -cmd update-pump/update-drainer -node-id ip-127-0-0-1:8250/{nodeID} -state {state} + ```bash + bin/binlogctl -pd-urls=http://127.0.0.1:2379 -cmd update-pump -node-id ip-127-0-0-1:8250 -state paused ``` This command modifies the Pump/Drainer state saved in PD. - Pause or close Pump/Drainer: + Set 'cmd' as `pause-pump`,`pause-drainer`,`offline-pump` or `offline-drainer` to pause Pump, pause Drainer, offline Pump or offline Drainer. For example: ```bash - bin/binlogctl -pd-urls=http://127.0.0.1:2379 -cmd pause-pump/pause-drainer/offline-pump/offline-drainer -node-id ip-127-0-0-1:8250/{nodeID} + bin/binlogctl -pd-urls=http://127.0.0.1:2379 -cmd pause-pump -node-id ip-127-0-0-1:8250 ``` `binlogctl` sends the HTTP request to Pump/Drainer, and Pump/Drainer exits from the process after receiving the command and sets its state to `paused`/`offline`.