无法初始化 class com.datastax.spark.connector.types.TypeConverter$
Could not initialize class com.datastax.spark.connector.types.TypeConverter$
我正在尝试使用 Apache-Spark 查询本地 Cassandra 表,但是当 运行 任何 select show 语句
时,我 运行 遇到了这个错误
Could not initialize class com.datastax.spark.connector.types.TypeConverter$
版本:
Cassandra:版本 3.11.2 | cqlsh 版本 5.0.1
Apache-Spark:版本 2.3.1
Scala 版本 2.12.6
Cassandra 键空间 -> Table
CREATE KEYSPACE test_users
... WITH REPLICATION = {
... 'class' : 'SimpleStrategy',
... 'replication_factor' : 1
... };
CREATE TABLE member (
member_id bigint PRIMARY KEY,
member_name varchar,
member_age int
);
cqlsh> select * from member;
+---------+----------+-----------------+
|member_id|member_age| member_name|
+---------+----------+-----------------+
| 5| 53| Walter White|
| 6| 29|Henry Derplestick|
| 1| 67| Larry David|
| 4| 31| Joe Schmoe|
| 2| 19| Karen Dinglebop|
| 3| 49| Kenny Logins|
+---------+----------+-----------------+
QueryMembers.Scala
spark-shell --packages com.datastax.spark:spark-cassandra-connector_2.11:2.0.0-M3 --conf spark.cassandra.connection.host="10.0.0.233"
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
import org.apache.spark.sql.Encoder
import org.apache.spark.sql.expressions.Window
import spark.implicits._
import com.datastax.spark.connector._
import com.datastax.spark.connector.cql.CassandraConnector
import org.joda.time.LocalDate
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.functions.{litrow_number}
import org.apache.spark.sql.cassandra._
import org.apache.spark.sql.SQLContext
val conf = new SparkConf(true).
set("spark.cassandra.connection.host", "10.0.0.233").
set("spark.cassandra.connection.port", "9042")
val sc = new SparkContext("spark://10.0.0.233:9042", "test", conf)
val members = spark.
read.
format("org.apache.spark.sql.cassandra").
options(Map( "table" -> "member", "keyspace" -> "test_users" )).
load()
members.printSchema()
val older_members = members.select("member_id", "member_age", "member_name").
where("member_age > 50")
older_members: org.apache.spark.sql.Dataset[org.apache.spark.sql.Row] = [member_id: bigint, member_age: int ... 1 more field]
older_members.show() // breaks here
错误
Caused by: java.io.IOException: Exception during preparation of SELECT "member_id", "member_age", "member_name" FROM "test_users"."member" WHERE token("member_id") > ? AND token("member_id") <= ? ALLOW FILTERING: Could not initialize class com.datastax.spark.connector.types.TypeConverter$
2018-07-29 18:57:09 ERROR Executor:91 - Exception in task 0.0 in stage 10.0 (TID 29)
java.io.IOException: Exception during preparation of SELECT "member_id", "member_age", "member_name" FROM "test_users"."member" WHERE token("member_id") > ? AND token("member_id") <= ? ALLOW FILTERING: Could not initialize class com.datastax.spark.connector.types.TypeConverter$
at com.datastax.spark.connector.rdd.CassandraTableScanRDD.createStatement(CassandraTableScanRDD.scala:293)
at com.datastax.spark.connector.rdd.CassandraTableScanRDD.com$datastax$spark$connector$rdd$CassandraTableScanRDD$$fetchTokenRange(CassandraTableScanRDD.scala:307)
at com.datastax.spark.connector.rdd.CassandraTableScanRDD$$anonfun.apply(CassandraTableScanRDD.scala:335)
at com.datastax.spark.connector.rdd.CassandraTableScanRDD$$anonfun.apply(CassandraTableScanRDD.scala:335)
at scala.collection.Iterator$$anon.nextCur(Iterator.scala:434)
at scala.collection.Iterator$$anon.hasNext(Iterator.scala:440)
at com.datastax.spark.connector.util.CountingIterator.hasNext(CountingIterator.scala:12)
at scala.collection.Iterator$$anon.hasNext(Iterator.scala:408)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$$anon.hasNext(WholeStageCodegenExec.scala:614)
at org.apache.spark.sql.execution.SparkPlan$$anonfun.apply(SparkPlan.scala:253)
at org.apache.spark.sql.execution.SparkPlan$$anonfun.apply(SparkPlan.scala:247)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$$anonfun$apply.apply(RDD.scala:830)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$$anonfun$apply.apply(RDD.scala:830)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:109)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.NoClassDefFoundError: Could not initialize class com.datastax.spark.connector.types.TypeConverter$
at com.datastax.spark.connector.types.BigIntType$.converterToCassandra(PrimitiveColumnType.scala:50)
at com.datastax.spark.connector.types.BigIntType$.converterToCassandra(PrimitiveColumnType.scala:46)
at com.datastax.spark.connector.types.ColumnType$.converterToCassandra(ColumnType.scala:229)
at com.datastax.spark.connector.rdd.CassandraTableScanRDD$$anonfun.apply(CassandraTableScanRDD.scala:282)
at com.datastax.spark.connector.rdd.CassandraTableScanRDD$$anonfun.apply(CassandraTableScanRDD.scala:282)
at scala.collection.TraversableLike$$anonfun$map.apply(TraversableLike.scala:234)
at scala.collection.TraversableLike$$anonfun$map.apply(TraversableLike.scala:234)
at scala.collection.Iterator$class.foreach(Iterator.scala:893)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
at scala.collection.IterableLike$class.foreach(IterableLike.scala:72)
at scala.collection.AbstractIterable.foreach(Iterable.scala:54)
at scala.collection.TraversableLike$class.map(TraversableLike.scala:
如有任何见解,我们将不胜感激。
经过进一步研究,我决定尝试将 Apache Spark 版本从 2.3.1 回滚到 2.2.0,这已经解决了问题。
我现在能够运行并获得查询结果
val older_members = members.select("member_id", "member_age", "member_name").
where("member_age > 50")
older_members.show
对于 Spark 2.3.0,您需要使用最新版本的 spark-cassandra-connector:2.3.1...
此外,您不能使用 2.0.0-M3 版本 - 它是预发布版本。 2.0.x 系列的最新版本是 2.0.9。您需要始终检查 Maven Central.
的版本
这个连接器也有同样的问题。花了将近 1 天的时间才弄清楚。不要使用 spark-cassandra-connector_2.10-2.0.0-M3,使用较新版本的 spark-cassandra-connector。
感谢@Alex Ott
我正在尝试使用 Apache-Spark 查询本地 Cassandra 表,但是当 运行 任何 select show 语句
时,我 运行 遇到了这个错误Could not initialize class com.datastax.spark.connector.types.TypeConverter$
版本:
Cassandra:版本 3.11.2 | cqlsh 版本 5.0.1
Apache-Spark:版本 2.3.1
Scala 版本 2.12.6
Cassandra 键空间 -> Table
CREATE KEYSPACE test_users
... WITH REPLICATION = {
... 'class' : 'SimpleStrategy',
... 'replication_factor' : 1
... };
CREATE TABLE member (
member_id bigint PRIMARY KEY,
member_name varchar,
member_age int
);
cqlsh> select * from member;
+---------+----------+-----------------+
|member_id|member_age| member_name|
+---------+----------+-----------------+
| 5| 53| Walter White|
| 6| 29|Henry Derplestick|
| 1| 67| Larry David|
| 4| 31| Joe Schmoe|
| 2| 19| Karen Dinglebop|
| 3| 49| Kenny Logins|
+---------+----------+-----------------+
QueryMembers.Scala
spark-shell --packages com.datastax.spark:spark-cassandra-connector_2.11:2.0.0-M3 --conf spark.cassandra.connection.host="10.0.0.233"
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
import org.apache.spark.sql.Encoder
import org.apache.spark.sql.expressions.Window
import spark.implicits._
import com.datastax.spark.connector._
import com.datastax.spark.connector.cql.CassandraConnector
import org.joda.time.LocalDate
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.functions.{litrow_number}
import org.apache.spark.sql.cassandra._
import org.apache.spark.sql.SQLContext
val conf = new SparkConf(true).
set("spark.cassandra.connection.host", "10.0.0.233").
set("spark.cassandra.connection.port", "9042")
val sc = new SparkContext("spark://10.0.0.233:9042", "test", conf)
val members = spark.
read.
format("org.apache.spark.sql.cassandra").
options(Map( "table" -> "member", "keyspace" -> "test_users" )).
load()
members.printSchema()
val older_members = members.select("member_id", "member_age", "member_name").
where("member_age > 50")
older_members: org.apache.spark.sql.Dataset[org.apache.spark.sql.Row] = [member_id: bigint, member_age: int ... 1 more field]
older_members.show() // breaks here
错误
Caused by: java.io.IOException: Exception during preparation of SELECT "member_id", "member_age", "member_name" FROM "test_users"."member" WHERE token("member_id") > ? AND token("member_id") <= ? ALLOW FILTERING: Could not initialize class com.datastax.spark.connector.types.TypeConverter$
2018-07-29 18:57:09 ERROR Executor:91 - Exception in task 0.0 in stage 10.0 (TID 29)
java.io.IOException: Exception during preparation of SELECT "member_id", "member_age", "member_name" FROM "test_users"."member" WHERE token("member_id") > ? AND token("member_id") <= ? ALLOW FILTERING: Could not initialize class com.datastax.spark.connector.types.TypeConverter$
at com.datastax.spark.connector.rdd.CassandraTableScanRDD.createStatement(CassandraTableScanRDD.scala:293)
at com.datastax.spark.connector.rdd.CassandraTableScanRDD.com$datastax$spark$connector$rdd$CassandraTableScanRDD$$fetchTokenRange(CassandraTableScanRDD.scala:307)
at com.datastax.spark.connector.rdd.CassandraTableScanRDD$$anonfun.apply(CassandraTableScanRDD.scala:335)
at com.datastax.spark.connector.rdd.CassandraTableScanRDD$$anonfun.apply(CassandraTableScanRDD.scala:335)
at scala.collection.Iterator$$anon.nextCur(Iterator.scala:434)
at scala.collection.Iterator$$anon.hasNext(Iterator.scala:440)
at com.datastax.spark.connector.util.CountingIterator.hasNext(CountingIterator.scala:12)
at scala.collection.Iterator$$anon.hasNext(Iterator.scala:408)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$$anon.hasNext(WholeStageCodegenExec.scala:614)
at org.apache.spark.sql.execution.SparkPlan$$anonfun.apply(SparkPlan.scala:253)
at org.apache.spark.sql.execution.SparkPlan$$anonfun.apply(SparkPlan.scala:247)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$$anonfun$apply.apply(RDD.scala:830)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$$anonfun$apply.apply(RDD.scala:830)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:109)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.NoClassDefFoundError: Could not initialize class com.datastax.spark.connector.types.TypeConverter$
at com.datastax.spark.connector.types.BigIntType$.converterToCassandra(PrimitiveColumnType.scala:50)
at com.datastax.spark.connector.types.BigIntType$.converterToCassandra(PrimitiveColumnType.scala:46)
at com.datastax.spark.connector.types.ColumnType$.converterToCassandra(ColumnType.scala:229)
at com.datastax.spark.connector.rdd.CassandraTableScanRDD$$anonfun.apply(CassandraTableScanRDD.scala:282)
at com.datastax.spark.connector.rdd.CassandraTableScanRDD$$anonfun.apply(CassandraTableScanRDD.scala:282)
at scala.collection.TraversableLike$$anonfun$map.apply(TraversableLike.scala:234)
at scala.collection.TraversableLike$$anonfun$map.apply(TraversableLike.scala:234)
at scala.collection.Iterator$class.foreach(Iterator.scala:893)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
at scala.collection.IterableLike$class.foreach(IterableLike.scala:72)
at scala.collection.AbstractIterable.foreach(Iterable.scala:54)
at scala.collection.TraversableLike$class.map(TraversableLike.scala:
如有任何见解,我们将不胜感激。
经过进一步研究,我决定尝试将 Apache Spark 版本从 2.3.1 回滚到 2.2.0,这已经解决了问题。
我现在能够运行并获得查询结果
val older_members = members.select("member_id", "member_age", "member_name").
where("member_age > 50")
older_members.show
对于 Spark 2.3.0,您需要使用最新版本的 spark-cassandra-connector:2.3.1...
此外,您不能使用 2.0.0-M3 版本 - 它是预发布版本。 2.0.x 系列的最新版本是 2.0.9。您需要始终检查 Maven Central.
的版本这个连接器也有同样的问题。花了将近 1 天的时间才弄清楚。不要使用 spark-cassandra-connector_2.10-2.0.0-M3,使用较新版本的 spark-cassandra-connector。
感谢@Alex Ott