转:http://www.cnblogs.com/scotoma/archive/2013/05/16/3081236.html
http://blog.csdn.net/nnddanchaofan/article/details/19621427
最近项目中需要使用thrift和php来读写HBase中的相关数据,所以就整理了下相关的类,做了下测试.
现在自己用到的操作HBase的方式主要有以下几种:
1.HBase Shell, 主要是配置后执行 shell 通过命令查看 HBase 中的数据,比如 count 'xxxx', scan 'xxxx' 等.
2.通过Native Java Api , 自己封装了一个 RESTfull的Api , 通过提供的Api(http)方式来操作HBase
3.使用Thrift 的序列化技术,Thrift支持C++,PHP,Python等语言,适合其他的异构系统操作HBase,这块刚刚尝试
4.使用HBasExplorer,之前写的一个图形化的客户端来操作HBase, http://www.cnblogs.com/scotoma/archive/2012/12/18/2824311.html
5. Hive/Pig , 这个现在还没真正的用过.
当前主要讲第三种方式 Thrift, 这个是Facebook开源出来的, 官方网站是 http://thrift.apache.org/ .
下载安装和启动,请看参考文章中的内容
查看是否跑成功...
使用php 类文件操作Hbase, 生成类文件的方式,请看参考文章中的生产的方法,不过我自己测试的生成方法有Bug,生成的 类文件中 namespace 是空的, 但是从官方源码库中生成的是 namespace Hbase, 所以这里需要注意一下.
我调试了一个驱动类文件,放到了github上了,大家需要的可以下载使用.
https://github.com/xinqiyang/buddy/tree/master/Vender/thrift
接下来进行测试操作,参考http://blog.csdn.net/hguisu/article/details/7298456 这里的测试类,写了个测试,并调试了下
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
|
<?php
/***
Thrift Test Class by xinqiyang
*/
ini_set('display_error', E_ALL);
$GLOBALS['THRIFT_ROOT'] = './lib';
/* Dependencies. In the proper order. */
require_once $GLOBALS['THRIFT_ROOT'].'/Thrift/Transport/TTransport.php';
require_once $GLOBALS['THRIFT_ROOT'].'/Thrift/Transport/TSocket.php';
require_once $GLOBALS['THRIFT_ROOT'].'/Thrift/Protocol/TProtocol.php';
require_once $GLOBALS['THRIFT_ROOT'].'/Thrift/Protocol/TBinaryProtocol.php';
require_once $GLOBALS['THRIFT_ROOT'].'/Thrift/Transport/TBufferedTransport.php';
require_once $GLOBALS['THRIFT_ROOT'].'/Thrift/Type/TMessageType.php';
require_once $GLOBALS['THRIFT_ROOT'].'/Thrift/Factory/TStringFuncFactory.php';
require_once $GLOBALS['THRIFT_ROOT'].'/Thrift/StringFunc/TStringFunc.php';
require_once $GLOBALS['THRIFT_ROOT'].'/Thrift/StringFunc/Core.php';
require_once $GLOBALS['THRIFT_ROOT'].'/Thrift/Type/TType.php';
require_once $GLOBALS['THRIFT_ROOT'].'/Thrift/Exception/TException.php';
require_once $GLOBALS['THRIFT_ROOT'].'/Thrift/Exception/TTransportException.php';
require_once $GLOBALS['THRIFT_ROOT'].'/Thrift/Exception/TProtocolException.php';
/* Remember these two files? */
require_once $GLOBALS['THRIFT_ROOT'].'/Types.php';
require_once $GLOBALS['THRIFT_ROOT'].'/Hbase.php';
use Thrift\Protocol\TBinaryProtocol;
use Thrift\Transport\TSocket;
use Thrift\Transport\TSocketPool;
use Thrift\Transport\TFramedTransport;
use Thrift\Transport\TBufferedTransport;
use Hbase\HbaseClient;
//define host and port
$host = '192.168.56.56';
$port = 9090;
$socket = new Thrift\Transport\TSocket($host, $port);
$transport = new TBufferedTransport($socket);
$protocol = new TBinaryProtocol($transport);
// Create a calculator client
$client = new HbaseClient($protocol);
$transport->open();
//echo "Time: " . $client -> time();
$tables = $client->getTableNames();
sort($tables);
foreach ($tables as $name) {
echo $name."\r\n";
}
//create a fc and then create a table
$columns = array(
new \Hbase\ColumnDescriptor(array(
'name' => 'id:',
'maxVersions' => 10
)),
new \Hbase\ColumnDescriptor(array(
'name' => 'name:'
)),
new \Hbase\ColumnDescriptor(array(
'name' => 'score:'
)),
);
$tableName = "student";
/*
try {
$client->createTable($tableName, $columns);
} catch (AlreadyExists $ae) {
var_dump( "WARN: {$ae->message}\n" );
}
*/
// get table descriptors
$descriptors = $client->getColumnDescriptors($tableName);
asort($descriptors);
foreach ($descriptors as $col) {
var_dump( " column: {$col->name}, maxVer: {$col->maxVersions}\n" );
}
//set clomn
//add update column data
$time = time();
var_dump($time);
$row = '2';
$valid = "foobar-".$time;
$mutations = array(
new \Hbase\Mutation(array(
'column' => 'score',
'value' => $valid
)),
);
$mutations1 = array(
new \Hbase\Mutation(array(
'column' => 'score:a',
'value' => $time,
)),
);
$attributes = array (
);
//add row, write a row
$row1 = $time;
$client->mutateRow($tableName, $row1, $mutations1, $attributes);
echo "-------write row $row1 ---\r\n";
//update row
$client->mutateRow($tableName, $row, $mutations, $attributes);
//get column data
$row_name = $time;
$fam_col_name = 'score:a';
$arr = $client->get($tableName, $row_name, $fam_col_name, $attributes);
// $arr = array
foreach ($arr as $k => $v) {
// $k = TCell
echo " ------ get one : value = {$v->value} , <br> ";
echo " ------ get one : timestamp = {$v->timestamp} <br>";
}
echo "----------\r\n";
$arr = $client->getRow($tableName, $row_name, $attributes);
// $client->getRow return a array
foreach ($arr as $k => $TRowResult) {
// $k = 0 ; non-use
// $TRowResult = TRowResult
var_dump($TRowResult);
}
echo "----------\r\n";
/******
//no test
public function scannerOpenWithScan($tableName, \Hbase\TScan $scan, $attributes);
public function scannerOpen($tableName, $startRow, $columns, $attributes);
public function scannerOpenWithStop($tableName, $startRow, $stopRow, $columns, $attributes);
public function scannerOpenWithPrefix($tableName, $startAndPrefix, $columns, $attributes);
public function scannerOpenTs($tableName, $startRow, $columns, $timestamp, $attributes);
public function scannerOpenWithStopTs($tableName, $startRow, $stopRow, $columns, $timestamp, $attributes);
public function scannerGet($id);
public function scannerGetList($id, $nbRows);
public function scannerClose($id);
*/
echo "----scanner get ------\r\n";
$startRow = '1';
$columns = array ('column' => 'score', );
//
$scan = $client->scannerOpen($tableName, $startRow, $columns, $attributes);
//$startAndPrefix = '13686667';
//$scan = $client->scannerOpenWithPrefix($tableName,$startAndPrefix,$columns,$attributes);
//$startRow = '1';
//$stopRow = '2';
//$scan = $client->scannerOpenWithStop($tableName, $startRow, $stopRow, $columns, $attributes);
//$arr = $client->scannerGet($scan);
$nbRows = 1000;
$arr = $client->scannerGetList($scan, $nbRows);
var_dump('count of result :'.count($arr));
foreach ($arr as $k => $TRowResult) {
// code...
//var_dump($TRowResult);
}
$client->scannerClose($scan);
//close transport
$transport->close();
|
这里操作了 createTable , Insert Row , Get Table , Update Row,Scan Table 这些常用的,先熟悉下.
实际操作的时候,需要注意:
1.php的版本,需要支持命名空间,所以需要5.3以上的支持
2.安装thrift的php扩展,貌似这个没有实际用到,还是得使用相关的php文件,谁能写个扩展就好了.不知道性能是否能够提升.
3.对于scan的相关操作,测试了 start/stop, prefix的Scan,感觉还是可以的.
4.感觉php的命名空间很挫,怎么办..\分割感觉就是那么的不地道......
接下来,有时间的话,会做下其他的几个操作,并进行压力测试,并将这个部署到集群中去.
大家有用Thrift的欢迎交流,感谢hguisu写的这个文章(参考文章),让大家能够尽快的入门.
更新内容:
20130517 在集群上启动了Thrift发现写入操作的时候,还是不稳定,有比较严重的超时现象,对于这块的操作,需要进行 php 操作类的优化. 其实感觉操作类还是写的太复杂的了.
参考文章: