在 Hbase 中查找具有空值的行数
Find count of rows with empty value in Hbase
我已经使用 rowid 和与 tweet 相关的各种信息填充了 Hbase table,例如纯文本、url、hashtag 等,如下所示
902221655086211073 column=clean-tweet:clean-text-cta, timestamp=1514793745304, value=democrat mayor order hurricane harvey stand houston
但是在填充时我注意到有些行是空的,比如
902487280543305728 column=clean-tweet:clean-text-cta, timestamp=1514622371008, value=
现在如何找到包含数据的行数?
请帮帮我
目前 HBase shell 中还没有这样做的规定。也许您可以使用像这样的简单代码来获取一些记录,这些记录对于提供的列限定符没有值。
CountAndFilter [表名] [columnFamily] [columnQualifier]
import java.io.IOException;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.util.Bytes;
public class CountAndFilter {
private static Connection conn;
private static int recordsWithoutValue = 0;
public static Admin getConnection() throws IOException {
if (conn == null) {
conn = ConnectionFactory.createConnection(HBaseConfiguration.create());
}
return conn.getAdmin();
}
public static void main(String args[]) throws IOException {
getConnection();
scan(args[0], args[1], args[2]);
System.out.println("Records with empty value : " + recordsWithoutValue);
}
public static void scan(String tableName, String columnFamily, String columnQualifier) throws IOException {
Table table = conn.getTable(TableName.valueOf(tableName));
ResultScanner rs = table.getScanner(new Scan().addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(columnQualifier)));
Result res = null;
try {
while ((res = rs.next()) != null) {
if (res.containsEmptyColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(columnQualifier))){
recordsWithoutValue++;
}
}
} finally {
rs.close();
}
}
}
我已经使用 rowid 和与 tweet 相关的各种信息填充了 Hbase table,例如纯文本、url、hashtag 等,如下所示
902221655086211073 column=clean-tweet:clean-text-cta, timestamp=1514793745304, value=democrat mayor order hurricane harvey stand houston
但是在填充时我注意到有些行是空的,比如
902487280543305728 column=clean-tweet:clean-text-cta, timestamp=1514622371008, value=
现在如何找到包含数据的行数?
请帮帮我
目前 HBase shell 中还没有这样做的规定。也许您可以使用像这样的简单代码来获取一些记录,这些记录对于提供的列限定符没有值。
CountAndFilter [表名] [columnFamily] [columnQualifier]
import java.io.IOException;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.util.Bytes;
public class CountAndFilter {
private static Connection conn;
private static int recordsWithoutValue = 0;
public static Admin getConnection() throws IOException {
if (conn == null) {
conn = ConnectionFactory.createConnection(HBaseConfiguration.create());
}
return conn.getAdmin();
}
public static void main(String args[]) throws IOException {
getConnection();
scan(args[0], args[1], args[2]);
System.out.println("Records with empty value : " + recordsWithoutValue);
}
public static void scan(String tableName, String columnFamily, String columnQualifier) throws IOException {
Table table = conn.getTable(TableName.valueOf(tableName));
ResultScanner rs = table.getScanner(new Scan().addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(columnQualifier)));
Result res = null;
try {
while ((res = rs.next()) != null) {
if (res.containsEmptyColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(columnQualifier))){
recordsWithoutValue++;
}
}
} finally {
rs.close();
}
}
}