读取文件时 Scala 项目出错:原因:java.io.IOException:方案没有文件系统:文件
Error in scala project while reading file : Caused by: java.io.IOException: No FileSystem for scheme: file
我研究了 google 但没有找到解决方案,因此张贴在这里。
val a_spark: SparkSession = SparkUtils.getSparkInstance("abc")
filepath : /Users/user1/Documents/input/demo.xml
在下面的方法中使用上面的变量
def getDataFrame(a_spark: SparkSession, filePath: String): DataFrame = {
a_spark.read
.format("com.databricks.spark.xml")
.option("rootTag", "PlaceList")
.option("rowTag", "Place")
.load(filePath) //error on this line
}
Exception in thread "main" java.lang.ExceptionInInitializerError
at Main$.delayedEndpoint$Main(Main.scala:8)
at Main$delayedInit$body.apply(Main.scala:3)
at scala.Function0.apply$mcV$sp(Function0.scala:39)
at scala.Function0.apply$mcV$sp$(Function0.scala:39)
at scala.runtime.AbstractFunction0.apply$mcV$sp(AbstractFunction0.scala:17)
at scala.App.$anonfun$main$adapted(App.scala:80)
at scala.collection.immutable.List.foreach(List.scala:392)
at scala.App.main(App.scala:80)
at scala.App.main$(App.scala:78)
at Main$.main(Main.scala:3)
at Main.main(Main.scala)
Caused by: java.io.IOException: No FileSystem for scheme: file
at org.apache.hadoop.fs.FileSystem.getFileSystemClass(FileSystem.java:2586)
at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2593)
at org.apache.hadoop.fs.FileSystem.access0(FileSystem.java:91)
at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2632)
at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2614)
at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:370)
at org.apache.hadoop.fs.FileSystem.getLocal(FileSystem.java:341)
at org.apache.spark.SparkContext.$anonfun$newAPIHadoopFile(SparkContext.scala:1151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.SparkContext.withScope(SparkContext.scala:699)
at org.apache.spark.SparkContext.newAPIHadoopFile(SparkContext.scala:1146)
at com.databricks.spark.xml.util.XmlFile$.withCharset(XmlFile.scala:46)
at com.databricks.spark.xml.DefaultSource.$anonfun$createRelation(DefaultSource.scala:71)
at com.databricks.spark.xml.XmlRelation.$anonfun$schema(XmlRelation.scala:43)
at scala.Option.getOrElse(Option.scala:189)
at com.databricks.spark.xml.XmlRelation.<init>(XmlRelation.scala:42)
at com.databricks.spark.xml.XmlRelation$.apply(XmlRelation.scala:29)
at com.databricks.spark.xml.DefaultSource.createRelation(DefaultSource.scala:74)
at com.databricks.spark.xml.DefaultSource.createRelation(DefaultSource.scala:52)
at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:318)
at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:223)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:211)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:178)
at util.SparkUtils$.getDataFrame(SparkUtils.scala:26)
尝试在文件路径的开头添加 file:// 前缀,但在添加之后我也遇到同样的错误。
这个问题的解决方案是在 sparksession 中添加文件系统配置。
object IdentifyData {
val m_spark: SparkSession = SparkUtils.getSparkInstance("name1")
val hadoopConfig: Configuration = m_spark.sparkContext.hadoopConfiguration
hadoopConfig.set("fs.hdfs.impl", classOf[org.apache.hadoop.hdfs.DistributedFileSystem].getName)
hadoopConfig.set("fs.file.impl", classOf[org.apache.hadoop.fs.LocalFileSystem].getName)
我研究了 google 但没有找到解决方案,因此张贴在这里。
val a_spark: SparkSession = SparkUtils.getSparkInstance("abc")
filepath : /Users/user1/Documents/input/demo.xml
在下面的方法中使用上面的变量
def getDataFrame(a_spark: SparkSession, filePath: String): DataFrame = {
a_spark.read
.format("com.databricks.spark.xml")
.option("rootTag", "PlaceList")
.option("rowTag", "Place")
.load(filePath) //error on this line
}
Exception in thread "main" java.lang.ExceptionInInitializerError
at Main$.delayedEndpoint$Main(Main.scala:8)
at Main$delayedInit$body.apply(Main.scala:3)
at scala.Function0.apply$mcV$sp(Function0.scala:39)
at scala.Function0.apply$mcV$sp$(Function0.scala:39)
at scala.runtime.AbstractFunction0.apply$mcV$sp(AbstractFunction0.scala:17)
at scala.App.$anonfun$main$adapted(App.scala:80)
at scala.collection.immutable.List.foreach(List.scala:392)
at scala.App.main(App.scala:80)
at scala.App.main$(App.scala:78)
at Main$.main(Main.scala:3)
at Main.main(Main.scala)
Caused by: java.io.IOException: No FileSystem for scheme: file
at org.apache.hadoop.fs.FileSystem.getFileSystemClass(FileSystem.java:2586)
at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2593)
at org.apache.hadoop.fs.FileSystem.access0(FileSystem.java:91)
at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2632)
at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2614)
at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:370)
at org.apache.hadoop.fs.FileSystem.getLocal(FileSystem.java:341)
at org.apache.spark.SparkContext.$anonfun$newAPIHadoopFile(SparkContext.scala:1151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.SparkContext.withScope(SparkContext.scala:699)
at org.apache.spark.SparkContext.newAPIHadoopFile(SparkContext.scala:1146)
at com.databricks.spark.xml.util.XmlFile$.withCharset(XmlFile.scala:46)
at com.databricks.spark.xml.DefaultSource.$anonfun$createRelation(DefaultSource.scala:71)
at com.databricks.spark.xml.XmlRelation.$anonfun$schema(XmlRelation.scala:43)
at scala.Option.getOrElse(Option.scala:189)
at com.databricks.spark.xml.XmlRelation.<init>(XmlRelation.scala:42)
at com.databricks.spark.xml.XmlRelation$.apply(XmlRelation.scala:29)
at com.databricks.spark.xml.DefaultSource.createRelation(DefaultSource.scala:74)
at com.databricks.spark.xml.DefaultSource.createRelation(DefaultSource.scala:52)
at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:318)
at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:223)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:211)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:178)
at util.SparkUtils$.getDataFrame(SparkUtils.scala:26)
尝试在文件路径的开头添加 file:// 前缀,但在添加之后我也遇到同样的错误。
这个问题的解决方案是在 sparksession 中添加文件系统配置。
object IdentifyData {
val m_spark: SparkSession = SparkUtils.getSparkInstance("name1")
val hadoopConfig: Configuration = m_spark.sparkContext.hadoopConfiguration
hadoopConfig.set("fs.hdfs.impl", classOf[org.apache.hadoop.hdfs.DistributedFileSystem].getName)
hadoopConfig.set("fs.file.impl", classOf[org.apache.hadoop.fs.LocalFileSystem].getName)