替换火花嵌套属性名称中的无效字符
Replacing invalid characters in spark nested attribute names
这里有一些帖子是关于在第一层处理无效字符而不是多嵌套属性的
我在使用多嵌套架构时遇到了这个错误
org.apache.spark.sql.AnalysisException: Attribute name "Foo Bar" contains invalid character(s) among " ,;{}()\n\t=". Please use alias to rename it.;
这是我在 scala 中的解决方案
private val INVALID_ATTRIBUTE_CHARS = "[ ,;{}()\n\t=]"
def replaceBadAttriName(structType: StructType): StructType =
StructType(structType.fields.map(cleanStructFld))
private def cleanStructFld(fld: StructField): StructField = {
fld.dataType match {
case struct: StructType =>
StructField(fld.name, StructType(struct.map(cleanStructFld)), fld.nullable, fld.metadata)
case _ =>
val newName = fld.name.replaceAll(INVALID_ATTRIBUTE_CHARS, "_")
StructField(newName, fld.dataType, fld.nullable, fld.metadata)
}
}
这里有一些帖子是关于在第一层处理无效字符而不是多嵌套属性的
我在使用多嵌套架构时遇到了这个错误
org.apache.spark.sql.AnalysisException: Attribute name "Foo Bar" contains invalid character(s) among " ,;{}()\n\t=". Please use alias to rename it.;
这是我在 scala 中的解决方案
private val INVALID_ATTRIBUTE_CHARS = "[ ,;{}()\n\t=]"
def replaceBadAttriName(structType: StructType): StructType =
StructType(structType.fields.map(cleanStructFld))
private def cleanStructFld(fld: StructField): StructField = {
fld.dataType match {
case struct: StructType =>
StructField(fld.name, StructType(struct.map(cleanStructFld)), fld.nullable, fld.metadata)
case _ =>
val newName = fld.name.replaceAll(INVALID_ATTRIBUTE_CHARS, "_")
StructField(newName, fld.dataType, fld.nullable, fld.metadata)
}
}