TiCDC 変更フィードの CLI およびコンフィグレーションパラメーター

変更フィード CLI パラメータ

このセクションでは、レプリケーション (変更フィード) タスクの作成方法を示して、TiCDC 変更フィードのコマンド ライン パラメーターを紹介します。

cdc cli changefeed create --server=http://10.0.10.25:8300 --sink-uri="mysql://root:123456@127.0.0.1:3306/" --changefeed-id="simple-replication-task"
Create changefeed successfully! ID: simple-replication-task Info: {"upstream_id":7178706266519722477,"namespace":"default","id":"simple-replication-task","sink_uri":"mysql://root:xxxxx@127.0.0.1:4000/?time-zone=","create_time":"2023-12-21T15:05:46.679218+08:00","start_ts":438156275634929669,"engine":"unified","config":{"case_sensitive":false,"enable_old_value":true,"force_replicate":false,"ignore_ineligible_table":false,"check_gc_safe_point":true,"enable_sync_point":true,"bdr_mode":false,"sync_point_interval":30000000000,"sync_point_retention":3600000000000,"filter":{"rules":["test.*"],"event_filters":null},"mounter":{"worker_num":16},"sink":{"protocol":"","schema_registry":"","csv":{"delimiter":",","quote":"\"","null":"\\N","include_commit_ts":false},"column_selectors":null,"transaction_atomicity":"none","encoder_concurrency":16,"terminator":"\r\n","date_separator":"none","enable_partition_separator":false},"consistent":{"level":"none","max_log_size":64,"flush_interval":2000,"storage":""}},"state":"normal","creator_version":"v7.1.3"}
  • --changefeed-id : レプリケーション タスクの ID。形式は^[a-zA-Z0-9]+(\-[a-zA-Z0-9]+)*$正規表現と一致する必要があります。この ID が指定されていない場合、TiCDC は UUID (バージョン 4 形式) を ID として自動的に生成します。

  • --sink-uri : レプリケーションタスクの下流アドレス。 --sink-uriを以下の形式で設定します。現在、このスキームはmysqltidb 、およびkafkaをサポートしています。

    [scheme]://[userinfo@][host]:[port][/path]?[query_parameters]

    シンク URI に! * ' ( ) ; : @ & = + $ , / ? % # [ ]などの特殊文字が含まれている場合は、特殊文字をエスケープする必要があります (たとえば、 URIエンコーダ

  • --start-ts : チェンジフィードの開始 TSO を指定します。この TSO から、TiCDC クラスターはデータのプルを開始します。デフォルト値は現在時刻です。

  • --target-ts : チェンジフィードの終了 TSO を指定します。この TSO に対して、TiCDC クラスターはデータのプルを停止します。デフォルト値は空です。これは、TiCDC がデータのプルを自動的に停止しないことを意味します。

  • --config : チェンジフィードの設定ファイルを指定します。

変更フィード構成パラメータ

このセクションでは、レプリケーション タスクの構成について説明します。

# Specifies the memory quota (in bytes) that can be used in the capture server by the sink manager. # If the value is exceeded, the overused part will be recycled by the go runtime. # The default value is `1073741824` (1 GB). # memory-quota = 1073741824 # Specifies whether the database names and tables in the configuration file are case-sensitive. # Starting from v6.5.6 and v7.1.3, the default value changes from true to false. # This configuration item affects configurations related to filter and sink. case-sensitive = false # Specifies whether to output the old value. New in v4.0.5. Since v5.0, the default value is `true`. enable-old-value = true # Specifies whether to enable the Syncpoint feature, which is supported since v6.3.0 and is disabled by default. # Since v6.4.0, only the changefeed with the SYSTEM_VARIABLES_ADMIN or SUPER privilege can use the TiCDC Syncpoint feature. # Note: This configuration item only takes effect if the downstream is TiDB. # enable-sync-point = false # Specifies the interval at which Syncpoint aligns the upstream and downstream snapshots. # The format is in h m s. For example, "1h30m30s". # The default value is "10m" and the minimum value is "30s". # Note: This configuration item only takes effect if the downstream is TiDB. # sync-point-interval = "5m" # Specifies how long the data is retained by Syncpoint in the downstream table. When this duration is exceeded, the data is cleaned up. # The format is in h m s. For example, "24h30m30s". # The default value is "24h". # Note: This configuration item only takes effect if the downstream is TiDB. # sync-point-retention = "1h" # Starting from v6.5.6 and v7.1.3, this configuration item specifies the SQL mode used when parsing DDL statements. Multiple modes are separated by commas. # The default value is the same as the default SQL mode of TiDB. # sql-mode = "ONLY_FULL_GROUP_BY,STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION" [mounter] # The number of threads with which the mounter decodes KV data. The default value is 16. # worker-num = 16 [filter] # Ignores the transaction of specified start_ts. # ignore-txn-start-ts = [1, 2] # Filter rules. # Filter syntax: <https://docs.pingcap.com/tidb/stable/table-filter#syntax>. rules = ['*.*', '!test.*'] # Event filter rules. # The detailed syntax is described in <https://docs.pingcap.com/tidb/stable/ticdc-filter> # The first event filter rule. # [[filter.event-filters]] # matcher = ["test.worker"] # matcher is an allow list, which means this rule only applies to the worker table in the test database. # ignore-event = ["insert"] # Ignore insert events. # ignore-sql = ["^drop", "add column"] # Ignore DDLs that start with "drop" or contain "add column". # ignore-delete-value-expr = "name = 'john'" # Ignore delete DMLs that contain the condition "name = 'john'". # ignore-insert-value-expr = "id >= 100" # Ignore insert DMLs that contain the condition "id >= 100". # ignore-update-old-value-expr = "age < 18" # Ignore update DMLs whose old value contains "age < 18". # ignore-update-new-value-expr = "gender = 'male'" # Ignore update DMLs whose new value contains "gender = 'male'". # The second event filter rule. # matcher = ["test.fruit"] # matcher is an allow list, which means this rule only applies to the fruit table in the test database. # ignore-event = ["drop table", "delete"] # Ignore the `drop table` DDL events and the `delete` DML events. # ignore-sql = ["^drop table", "alter table"] # Ignore DDL statements that start with `drop table` or contain `alter table`. # ignore-insert-value-expr = "price > 1000 and origin = 'no where'" # Ignore insert DMLs that contain the conditions "price > 1000" and "origin = 'no where'". [scheduler] # Allocate tables to multiple TiCDC nodes for replication on a per-Region basis. # Note: This configuration item only takes effect on Kafka changefeeds and is not supported on MySQL changefeeds. # The value is "false" by default. Set it to "true" to enable this feature. enable-table-across-nodes = false # When `enable-table-across-nodes` is enabled, there are two allocation modes: # 1. Allocate tables based on the number of Regions, so that each TiCDC node handles roughly the same number of Regions. If the number of Regions for a table exceeds the value of `region-threshold`, the table will be allocated to multiple nodes for replication. The default value of `region-threshold` is 10000. # region-threshold = 10000 # 2. Allocate tables based on the write traffic, so that each TiCDC node handles roughly the same number of modified rows. Only when the number of modified rows per minute in a table exceeds the value of `write-key-threshold`, will this allocation take effect. # write-key-threshold = 30000 # Note: # * The default value of `write-key-threshold` is 0, which means that the traffic allocation mode is not used by default. # * You only need to configure one of the two modes. If both `region-threshold` and `write-key-threshold` are configured, TiCDC prioritizes the traffic allocation mode, namely `write-key-threshold`. [sink] # For the sink of MQ type, you can use dispatchers to configure the event dispatcher. # Since v6.1.0, TiDB supports two types of event dispatchers: partition and topic. For more information, see <partition and topic link>. # The matching syntax of matcher is the same as the filter rule syntax. For details about the matcher rules, see <>. # Note: This configuration item only takes effect if the downstream is MQ. # dispatchers = [ # {matcher = ['test1.*', 'test2.*'], topic = "Topic expression 1", partition = "ts" }, # {matcher = ['test3.*', 'test4.*'], topic = "Topic expression 2", partition = "index-value" }, # {matcher = ['test1.*', 'test5.*'], topic = "Topic expression 3", partition = "table"}, # {matcher = ['test6.*'], partition = "ts"} # ] # The protocol configuration item specifies the protocol format of the messages sent to the downstream. # When the downstream is Kafka, the protocol can only be canal-json or avro. # When the downstream is a storage service, the protocol can only be canal-json or csv. # Note: This configuration item only takes effect if the downstream is Kafka or a storage service. # protocol = "canal-json" # The following three configuration items are only used when you replicate data to storage sinks and can be ignored when replicating data to MQ or MySQL sinks. # Row terminator, used for separating two data change events. The default value is an empty string, which means "\r\n" is used. # terminator = '' # Date separator type used in the file directory. Value options are `none`, `year`, `month`, and `day`. `day` is the default value and means separating files by day. For more information, see <https://docs.pingcap.com/tidb/v7.1/ticdc-sink-to-cloud-storage#data-change-records>. # Note: This configuration item only takes effect if the downstream is a storage service. date-separator = 'day' # Whether to use partitions as the separation string. The default value is true, which means that partitions in a table are stored in separate directories. It is recommended that you keep the value as `true` to avoid potential data loss in downstream partitioned tables <https://github.com/pingcap/tiflow/issues/8724>. For usage examples, see <https://docs.pingcap.com/tidb/v7.1/ticdc-sink-to-cloud-storage#data-change-records>. # Note: This configuration item only takes effect if the downstream is a storage service. enable-partition-separator = true # Schema registry URL. # Note: This configuration item only takes effect if the downstream is MQ. # schema-registry = "http://localhost:80801/subjects/{subject-name}/versions/{version-number}/schema" # Specifies the number of encoder threads used when encoding data. # Note: This configuration item only takes effect if the downstream is MQ. # The default value is 16. # encoder-concurrency = 16 # Specifies whether to enable kafka-sink-v2 that uses the kafka-go sink library. # Note: This configuration item only takes effect if the downstream is MQ. # The default value is false. # enable-kafka-sink-v2 = false # Starting from v7.1.0, this configuration item specifies whether to only output the updated columns. # Note: This configuration item only applies to the MQ downstream using the open-protocol and canal-json. # The default value is false. # only-output-updated-columns = false # Since v6.5.0, TiCDC supports saving data changes to storage services in CSV format. Ignore the following configurations if you replicate data to MQ or MySQL sinks. # [sink.csv] # The character used to separate fields in the CSV file. The value must be an ASCII character and defaults to `,`. # delimiter = ',' # The quotation character used to surround fields in the CSV file. The default value is `"`. If the value is empty, no quotation is used. # quote = '"' # The character displayed when a CSV column is null. The default value is `\N`. # null = '\N' # Whether to include commit-ts in CSV rows. The default value is false. # include-commit-ts = false # The encoding method of binary data, which can be 'base64' or 'hex'. New in v7.1.2. The default value is 'base64'. # binary-encoding-method = 'base64' # Specifies the replication consistency configurations for a changefeed when using the redo log. For more information, see https://docs.pingcap.com/tidb/stable/ticdc-sink-to-mysql#eventually-consistent-replication-in-disaster-scenarios. # Note: The consistency-related configuration items only take effect when the downstream is a database and the redo log feature is enabled. [consistent] # The data consistency level. Available options are "none" and "eventual". "none" means that the redo log is disabled. # The default value is "none". level = "none" # The max redo log size in MB. # The default value is 64. max-log-size = 64 # The flush interval for redo log. The default value is 2000 milliseconds. flush-interval = 2000 # The storage URI of the redo log. # The default value is empty. storage = "" # Specifies whether to store the redo log in a local file. # The default value is false. use-file-backend = false # The number of encoding and decoding workers in the redo module. # The default value is 16. encoding-worker-num = 16 # The number of flushing workers in the redo module. # The default value is 8. flush-worker-num = 8 # The behavior to compress redo log files. # Available options are "" and "lz4". The default value is "", which means no compression. compression = "" # The concurrency for uploading a single redo file. # The default value is 1, which means concurrency is disabled. flush-concurrency = 1 [integrity] # Whether to enable the checksum validation for single-row data. The default value is "none", which means to disable the feature. Value options are "none" and "correctness". integrity-check-level = "none" # Specifies the log level of the Changefeed when the checksum validation for single-row data fails. The default value is "warn". Value options are "warn" and "error". corruption-handle-level = "warn" # The following configuration items only take effect when the downstream is Kafka. Supported starting from v7.1.1. [sink.kafka-config] # The mechanism of Kafka SASL authentication. The default value is empty, indicating that SASL authentication is not used. sasl-mechanism = "OAUTHBEARER" # The client-id in the Kafka SASL OAUTHBEARER authentication. The default value is empty. This parameter is required when the OAUTHBEARER authentication is used. sasl-oauth-client-id = "producer-kafka" # The client-secret in the Kafka SASL OAUTHBEARER authentication. The default value is empty. This parameter is required when the OAUTHBEARER authentication is used. sasl-oauth-client-secret = "cHJvZHVjZXIta2Fma2E=" # The token-url in the Kafka SASL OAUTHBEARER authentication to obtain the token. The default value is empty. This parameter is required when the OAUTHBEARER authentication is used. sasl-oauth-token-url = "http://127.0.0.1:4444/oauth2/token" # The scopes in the Kafka SASL OAUTHBEARER authentication. The default value is empty. This parameter is optional when the OAUTHBEARER authentication is used. sasl-oauth-scopes = ["producer.kafka", "consumer.kafka"] # The grant-type in the Kafka SASL OAUTHBEARER authentication. The default value is "client_credentials". This parameter is optional when the OAUTHBEARER authentication is used. sasl-oauth-grant-type = "client_credentials" # The audience in the Kafka SASL OAUTHBEARER authentication. The default value is empty. This parameter is optional when the OAUTHBEARER authentication is used. sasl-oauth-audience = "kafka" [sink.cloud-storage-config] # The concurrency for saving data changes to the downstream cloud storage. # The default value is 16. worker-count = 16 # The interval for saving data changes to the downstream cloud storage. # The default value is "2s". flush-interval = "2s" # A data change file is saved to the cloud storage when the number of bytes in this file exceeds `file-size`. # The default value is 67108864 (this is, 64 MiB). file-size = 67108864 # The duration to retain files, which takes effect only when `date-separator` is configured as `day`. Assume that `file-expiration-days = 1` and `file-cleanup-cron-spec = "0 0 0 * * *"`, then TiCDC performs daily cleanup at 00:00:00 for files saved beyond 24 hours. For example, at 00:00:00 on 2023/12/02, TiCDC cleans up files generated before 2023/12/01, while files generated on 2023/12/01 remain unaffected. # The default value is 0, which means file cleanup is disabled. file-expiration-days = 0 # The running cycle of the scheduled cleanup task, compatible with the crontab configuration, with a format of `<Second> <Minute> <Hour> <Day of the month> <Month> <Day of the week (Optional)>` # The default value is "0 0 2 * * *", which means that the cleanup task is executed every day at 2 AM. file-cleanup-cron-spec = "0 0 2 * * *" # The concurrency for uploading a single file. # The default value is 1, which means concurrency is disabled. flush-concurrency = 1

このページは役に立ちましたか?