Hbase - 如何获取 table 中的列名?
Hbase - How to get column names in a table?
我有一些 HBase table 有数百万行但只有几列。我想提取每个 table 的列名并将其存储在一个单独的文件中。做这个的最好方式是什么?
谢谢
这应该将列名保存在本地(而不是 hdfs)的 Hbase_table_columns.txt
文件中:
echo "scan 'table_name'" | $HBASE_HOME/bin/hbase shell | awk -F'=' '{print }' | awk -F ':' '{print }' > Hbase_table_columns.txt
这应该在控制台上保存列名:
echo "scan 'table_name'" | $HBASE_HOME/bin/hbase shell | awk -F'=' '{print }' | awk -F ':' '{print }'
这应该将列名保存在 Hbase_table_columns.txt
文件中并在控制台上打印:
echo "scan 'table_name'" | $HBASE_HOME/bin/hbase shell | awk -F'=' '{print }' | awk -F ':' '{print }' |tee Hbase_table_columns.txt
这应该save/print column family:column name
:
echo "scan 'table_name'" | $HBASE_HOME/bin/hbase shell | awk -F'=' '{print }'|tee Hbase_table_columns.txt
我会提供 java Hbase 客户端 API,它被 HbaseAdmin
class 公开,如下所示...
客户会喜欢
package mytest;
import com.usertest.*;
import java.io.IOException;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
public class ListHbaseTablesAndColumns {
public static void main(String[] args) {
try {
HbaseMetaData hbaseMetaData =new HbaseMetaData();
for(String hbaseTable:hbaseMetaData .getTableNames(".*yourtables.*")){
for (String column : hbaseMetaData .getColumns(hbaseTable, 10000)) {
System.out.println(hbaseTable + "," + column);
}
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
使用下面的 class 获取 HbaseMetaData..
package com.usertest;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.PageFilter;
import java.io.IOException;
import java.util.*;
import java.util.regex.Pattern;
public class HbaseMetaData {
private HBaseAdmin hBaseAdmin;
private Configuration hBaseConfiguration;
public HbaseMetaData () throws IOException {
this.hBaseConfiguration = HBaseConfiguration.create();
this.hBaseAdmin = new HBaseAdmin(hBaseConfiguration);
}
/** get all Table names **/
public List<String> getTableNames(String regex) throws IOException {
Pattern pattern=Pattern.compile(regex);
List<String> tableList = new ArrayList<String>();
TableName[] tableNames=hBaseAdmin.listTableNames();
for (TableName tableName:tableNames){
if(pattern.matcher(tableName.toString()).find()){
tableList.add(tableName.toString());
}
}
return tableList;
}
/** Get all columns **/
public Set<String> getColumns(String hbaseTable) throws IOException {
return getColumns(hbaseTable, 10000);
}
/** get all columns from the table **/
public Set<String> getColumns(String hbaseTable, int limitScan) throws IOException {
Set<String> columnList = new TreeSet<String>();
HTable hTable=new HTable(hBaseConfiguration, hbaseTable);
Scan scan=new Scan();
scan.setFilter(new PageFilter(limitScan));
ResultScanner results = hTable.getScanner(scan);
for(Result result:results){
for(KeyValue keyValue:result.list()){
columnList.add(
new String(keyValue.getFamily()) + ":" +
new String(keyValue.getQualifier())
);
}
}
return columnList;
}
}
下面将有助于获取与特定键相关的列
scan 'namespace:tablename',{FILTER=>'KeyOnlyFilter()'}
我有一些 HBase table 有数百万行但只有几列。我想提取每个 table 的列名并将其存储在一个单独的文件中。做这个的最好方式是什么? 谢谢
这应该将列名保存在本地(而不是 hdfs)的 Hbase_table_columns.txt
文件中:
echo "scan 'table_name'" | $HBASE_HOME/bin/hbase shell | awk -F'=' '{print }' | awk -F ':' '{print }' > Hbase_table_columns.txt
这应该在控制台上保存列名:
echo "scan 'table_name'" | $HBASE_HOME/bin/hbase shell | awk -F'=' '{print }' | awk -F ':' '{print }'
这应该将列名保存在 Hbase_table_columns.txt
文件中并在控制台上打印:
echo "scan 'table_name'" | $HBASE_HOME/bin/hbase shell | awk -F'=' '{print }' | awk -F ':' '{print }' |tee Hbase_table_columns.txt
这应该save/print column family:column name
:
echo "scan 'table_name'" | $HBASE_HOME/bin/hbase shell | awk -F'=' '{print }'|tee Hbase_table_columns.txt
我会提供 java Hbase 客户端 API,它被 HbaseAdmin
class 公开,如下所示...
客户会喜欢
package mytest;
import com.usertest.*;
import java.io.IOException;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
public class ListHbaseTablesAndColumns {
public static void main(String[] args) {
try {
HbaseMetaData hbaseMetaData =new HbaseMetaData();
for(String hbaseTable:hbaseMetaData .getTableNames(".*yourtables.*")){
for (String column : hbaseMetaData .getColumns(hbaseTable, 10000)) {
System.out.println(hbaseTable + "," + column);
}
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
使用下面的 class 获取 HbaseMetaData..
package com.usertest;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.PageFilter;
import java.io.IOException;
import java.util.*;
import java.util.regex.Pattern;
public class HbaseMetaData {
private HBaseAdmin hBaseAdmin;
private Configuration hBaseConfiguration;
public HbaseMetaData () throws IOException {
this.hBaseConfiguration = HBaseConfiguration.create();
this.hBaseAdmin = new HBaseAdmin(hBaseConfiguration);
}
/** get all Table names **/
public List<String> getTableNames(String regex) throws IOException {
Pattern pattern=Pattern.compile(regex);
List<String> tableList = new ArrayList<String>();
TableName[] tableNames=hBaseAdmin.listTableNames();
for (TableName tableName:tableNames){
if(pattern.matcher(tableName.toString()).find()){
tableList.add(tableName.toString());
}
}
return tableList;
}
/** Get all columns **/
public Set<String> getColumns(String hbaseTable) throws IOException {
return getColumns(hbaseTable, 10000);
}
/** get all columns from the table **/
public Set<String> getColumns(String hbaseTable, int limitScan) throws IOException {
Set<String> columnList = new TreeSet<String>();
HTable hTable=new HTable(hBaseConfiguration, hbaseTable);
Scan scan=new Scan();
scan.setFilter(new PageFilter(limitScan));
ResultScanner results = hTable.getScanner(scan);
for(Result result:results){
for(KeyValue keyValue:result.list()){
columnList.add(
new String(keyValue.getFamily()) + ":" +
new String(keyValue.getQualifier())
);
}
}
return columnList;
}
}
下面将有助于获取与特定键相关的列
scan 'namespace:tablename',{FILTER=>'KeyOnlyFilter()'}