更新 Hive 中指定时间范围的列值 table
Update column value for a specified time range in Hive table
一个Hivetable"Employee"包含一列"timerange",数据为
timerange
1:10
1:13
1:17
1:21
1:26
如果最后一位数字范围介于 (0 & 4) 之间,则数据必须更新为 0。如果最后一位数字范围介于 (5 & 9) 之间,则必须更新为 5。
预期输出为
timerange
1:10
1:10
1:15
1:20
1:25
我该怎么做?
您可以创建通用 UDF (GenericUDF)。
这是一个示例 UDF:
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
public class TimeRangeConverter GenericUDF {
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
if (arguments.length != 1) {
throw new UDFArgumentLengthException("The function time_range_converter(time_rage) requires 1 argument.");
}
ObjectInspector timeRangeVal = arguments[0];
if (!(timeRangeVal instanceof StringObjectInspector)) {
throw new UDFArgumentException("First argument must be of type String (time_range as String)");
}
return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
}
@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
String timeRangeVal = (String) ObjectInspectorUtils.copyToStandardJavaObject(arguments[0].get(),
PrimitiveObjectInspectorFactory.javaStringObjectInspector);
char[] characters = timeRangeVal.toCharArray();
if (characters[characters.length - 1] > '5') {
characters[characters.length - 1] = '5';
} else {
characters[characters.length - 1] = '0';
}
return String.valueOf(characters);
}
@Override
public String getDisplayString(String[] arguments) {
assert (arguments.length == 1);
return "time_range_converter(" + arguments[0] + ")";
}
}
像这样调用 Hive 更新语句:
CREATE TEMPORARY FUNCTION time_range_converterAS 'TimeRangeConverter';
UPDATE
Employee
SET
timerange = time_range_converter(timerange);
您可以通过内置的字符串操作来做到这一点:
SELECT CASE WHEN SUBSTRING(timerange, LENGTH(timerange)) < "5"
THEN CONCAT(SUBSTRING(timerange, 1, LENGTH(timerange) - 1), "0")
ELSE CONCAT(SUBSTRING(timerange, 1, LENGTH(timerange) - 1), "5")
END AS timerange
FROM Employee;
一个Hivetable"Employee"包含一列"timerange",数据为
timerange
1:10
1:13
1:17
1:21
1:26
如果最后一位数字范围介于 (0 & 4) 之间,则数据必须更新为 0。如果最后一位数字范围介于 (5 & 9) 之间,则必须更新为 5。
预期输出为
timerange
1:10
1:10
1:15
1:20
1:25
我该怎么做?
您可以创建通用 UDF (GenericUDF)。
这是一个示例 UDF:
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
public class TimeRangeConverter GenericUDF {
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
if (arguments.length != 1) {
throw new UDFArgumentLengthException("The function time_range_converter(time_rage) requires 1 argument.");
}
ObjectInspector timeRangeVal = arguments[0];
if (!(timeRangeVal instanceof StringObjectInspector)) {
throw new UDFArgumentException("First argument must be of type String (time_range as String)");
}
return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
}
@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
String timeRangeVal = (String) ObjectInspectorUtils.copyToStandardJavaObject(arguments[0].get(),
PrimitiveObjectInspectorFactory.javaStringObjectInspector);
char[] characters = timeRangeVal.toCharArray();
if (characters[characters.length - 1] > '5') {
characters[characters.length - 1] = '5';
} else {
characters[characters.length - 1] = '0';
}
return String.valueOf(characters);
}
@Override
public String getDisplayString(String[] arguments) {
assert (arguments.length == 1);
return "time_range_converter(" + arguments[0] + ")";
}
}
像这样调用 Hive 更新语句:
CREATE TEMPORARY FUNCTION time_range_converterAS 'TimeRangeConverter';
UPDATE
Employee
SET
timerange = time_range_converter(timerange);
您可以通过内置的字符串操作来做到这一点:
SELECT CASE WHEN SUBSTRING(timerange, LENGTH(timerange)) < "5"
THEN CONCAT(SUBSTRING(timerange, 1, LENGTH(timerange) - 1), "0")
ELSE CONCAT(SUBSTRING(timerange, 1, LENGTH(timerange) - 1), "5")
END AS timerange
FROM Employee;