数据异构 之 DataX

Mongo

Src

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
docker run --name datax-src -p 27217:27017 -d mongo:4.0.4

/usr/local/mongodb/bin/mongo --port 27217

use report

db.getCollection("locationRecord").insert( {
terminalId: NumberInt("1900180"),
machineId: NumberInt("12206"),
presentTs: NumberLong("4294967295"),
longitude: 34.9016151428223,
latitude: 8.49752821035347e-14,
altitude: -1,
address: "Doui Thabet, Saida, Algeria",
acceptTime: ISODate("2020-05-19T23:43:17.837Z")
} );

Dest

1
docker run --name datax-dest -p 27218:27017 -d mongo:4.0.4

DataX

1
2
3
4
5
wget http://datax-opensource.oss-cn-hangzhou.aliyuncs.com/datax.tar.gz

tar xf datax.tar.gz && cd datax

vim job/job.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
{
"job": {
"setting": {
"speed": {
"channel": 1
}
},
"content": [
{
"reader": {
"name": "mongodbreader",
"parameter": {
"address": [
"127.0.0.1:27217"
],
"dbName": "report",
"collectionName": "locationRecord",
"column": [
{
"name": "terminalId",
"type": "int"
},
{
"name": "machineId",
"type": "int"
},
{
"name": "presentTs",
"type": "long"
},
{
"name": "longitude",
"type": "double"
},
{
"name": "latitude",
"type": "double"
},
{
"name": "altitude",
"type": "double"
},
{
"name": "address",
"type": "string"
},
{
"name": "acceptTime",
"type": "date"
}
]
}
},
"writer": {
"name": "mongodbwriter",
"parameter": {
"address": [
"127.0.0.1:27218"
],
"dbName": "report",
"collectionName": "locationRecord",
"column": [
{
"name": "terminalId",
"type": "int"
},
{
"name": "machineId",
"type": "int"
},
{
"name": "presentTs",
"type": "long"
},
{
"name": "longitude",
"type": "double"
},
{
"name": "latitude",
"type": "double"
},
{
"name": "altitude",
"type": "double"
},
{
"name": "address",
"type": "string"
},
{
"name": "acceptTime",
"type": "date"
}
]
}
}
}
]
}
}
1
2
3
4
5
conda create -n py2 python=2.7

conda activate py2

python bin/datax.py job/job.json
1
2
3
4
5
6
7
任务启动时刻                    : 2020-09-21 18:19:52
任务结束时刻 : 2020-09-21 18:20:03
任务总计耗时 : 10s
任务平均流量 : 10B/s
记录写入速度 : 0rec/s
读出记录总数 : 1
读写失败总数 : 0

Tablestore

1
vim job/job.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
{
"job": {
"setting": {
"speed": {
"channel": 1
}
},
"content": [
{
"reader": {
"name": "mongodbreader",
"parameter": {
"address": [
"127.0.0.1:27217"
],
"dbName": "report",
"collectionName": "locationRecord",
"column": [
{
"name": "terminalId",
"type": "int"
},
{
"name": "machineId",
"type": "int"
},
{
"name": "presentTs",
"type": "long"
},
{
"name": "longitude",
"type": "double"
},
{
"name": "latitude",
"type": "double"
},
{
"name": "altitude",
"type": "double"
},
{
"name": "address",
"type": "string"
},
{
"name": "acceptTime",
"type": "date"
}
]
}
},
"writer": {
"name": "otswriter",
"parameter": {
"endpoint": "https://saas-report.cn-hangzhou.ots.aliyuncs.com",
"accessId": "accessId",
"accessKey": "accessKey",
"instanceName": "saas-report",
"table": "locationRecord",
"primaryKey": [
{
"name": "terminalId",
"type": "int"
},
{
"name": "machineId",
"type": "int"
},
{
"name": "presentTs",
"type": "int"
}
],
"column": [
{
"name": "longitude",
"type": "double"
},
{
"name": "latitude",
"type": "double"
},
{
"name": "altitude",
"type": "double"
},
{
"name": "address",
"type": "string"
},
{
"name": "acceptTime",
"type": "string"
}
],
"writeMode": "UpdateRow"
}
}
}
]
}
}
1
python bin/datax.py job/job.json
1
2
3
4
5
6
7
任务启动时刻                    : 2020-09-22 08:49:35
任务结束时刻 : 2020-09-22 08:49:46
任务总计耗时 : 10s
任务平均流量 : 10B/s
记录写入速度 : 0rec/s
读出记录总数 : 1
读写失败总数 : 0

表格存储 全量同步