java.net.SocketTimeoutException:使用 spark 2.2 将数据帧加载到 teradata 时连接超时

java.net.SocketTimeoutException: connect timed out while loading dataframe to teradata using spark 2.2

我正在尝试使用 JDBC 驱动程序将数据帧加载到 teradata 中,但出现以下错误。

jdbc 版本:- 16.10.00.07 cloudera 版本:- 5.15

使用以下命令调用 spark shell。 spark2-shell --jars terajdbc4.jar,tdgssconfig.jar,teradata-connector-1.4.4.jar

我们能够获取 table 的架构,但无法对 table 的所有列进行 select 或插入操作。

我正在使用的代码也可以在下面找到。

val insertdf = spark.sql(query)
val prop = new java.util.Properties
prop.setProperty("driver", "com.teradata.jdbc.TeraDriver")
prop.setProperty("user", "xxxxxx")
prop.setProperty("password", "*******") 

val url = "jdbc:teradata://10.xxx.130.69/database=stg_database, TMODE=TERA"
val table = "test_table"
insertdf.write.mode("append").jdbc(url, table, prop)


ERROR:-

  Driver stacktrace:
  at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1499)
  at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage.apply(DAGScheduler.scala:1487)
  at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage.apply(DAGScheduler.scala:1486)
  at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
  at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
  at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1486)
  at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed.apply(DAGScheduler.scala:814)
  at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed.apply(DAGScheduler.scala:814)
  at scala.Option.foreach(Option.scala:257)
  at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:814)
  at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1714)
  at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1669)
  at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1658)
  at org.apache.spark.util.EventLoop$$anon.run(EventLoop.scala:48)
  at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:630)
  at org.apache.spark.SparkContext.runJob(SparkContext.scala:2024)
  at org.apache.spark.SparkContext.runJob(SparkContext.scala:2045)
  at org.apache.spark.SparkContext.runJob(SparkContext.scala:2064)
  at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:336)
  at org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:38)
  at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collectFromPlan(Dataset.scala:2853)
  at org.apache.spark.sql.Dataset$$anonfun$head.apply(Dataset.scala:2153)
  at org.apache.spark.sql.Dataset$$anonfun$head.apply(Dataset.scala:2153)
  at org.apache.spark.sql.Dataset$$anonfun.apply(Dataset.scala:2837)
  at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:65)
  at org.apache.spark.sql.Dataset.withAction(Dataset.scala:2836)
  at org.apache.spark.sql.Dataset.head(Dataset.scala:2153)
  at org.apache.spark.sql.Dataset.take(Dataset.scala:2366)
  at org.apache.spark.sql.Dataset.showString(Dataset.scala:245)
  at org.apache.spark.sql.Dataset.show(Dataset.scala:644)
  at org.apache.spark.sql.Dataset.show(Dataset.scala:603)
  at org.apache.spark.sql.Dataset.show(Dataset.scala:612)
  ... 48 elided
Caused by: java.sql.SQLException: [Teradata JDBC Driver] [TeraJDBC 16.10.00.07] [Error 1277] [SQLState 08S01] Login timeout for Connection to 10.130.130.69 Tue Feb 04 02:05:21 EST 2020 socket orig=10.130.130.69 cid=44c91563 sess=0 java.net.SocketTimeoutException: connect timed out  at java.net.PlainSocketImpl.socketConnect(Native Method)  at java.net.AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:350)  at java.net.AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:206)  at java.net.AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:188)  at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392)  at java.net.Socket.connect(Socket.java:589)  at com.teradata.jdbc.jdbc_4.io.TDNetworkIOIF$ConnectThread.run(TDNetworkIOIF.java:1242)
  at com.teradata.jdbc.jdbc_4.util.ErrorFactory.makeDriverJDBCException(ErrorFactory.java:95)
  at com.teradata.jdbc.jdbc_4.util.ErrorFactory.makeDriverJDBCException(ErrorFactory.java:70)
  at com.teradata.jdbc.jdbc_4.util.ErrorFactory.makeIoJDBCException(ErrorFactory.java:208)
  at com.teradata.jdbc.jdbc_4.util.ErrorAnalyzer.analyzeIoError(ErrorAnalyzer.java:59)
  at com.teradata.jdbc.jdbc_4.io.TDNetworkIOIF.createSocketConnection(TDNetworkIOIF.java:163)
  at com.teradata.jdbc.jdbc_4.io.TDNetworkIOIF.<init>(TDNetworkIOIF.java:142)
  at com.teradata.jdbc.jdbc.GenericTeradataConnection.getIO(GenericTeradataConnection.java:142)
  at com.teradata.jdbc.jdbc.GenericLogonController.run(GenericLogonController.java:100)
  at com.teradata.jdbc.jdbc_4.TDSession.<init>(TDSession.java:211)
  at com.teradata.jdbc.jdk6.JDK6_SQL_Connection.<init>(JDK6_SQL_Connection.java:36)
  at com.teradata.jdbc.jdk6.JDK6ConnectionFactory.constructSQLConnection(JDK6ConnectionFactory.java:25)
  at com.teradata.jdbc.jdbc.ConnectionFactory.createConnection(ConnectionFactory.java:178)
  at com.teradata.jdbc.jdbc.ConnectionFactory.createConnection(ConnectionFactory.java:168)
  at com.teradata.jdbc.TeraDriver.doConnect(TeraDriver.java:236)
  at com.teradata.jdbc.TeraDriver.connect(TeraDriver.java:162)
  at org.apache.spark.sql.execution.datasources.jdbc.DriverWrapper.connect(DriverWrapper.scala:45)
  at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$$anonfun$createConnectionFactory.apply(JdbcUtils.scala:61)
  at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$$anonfun$createConnectionFactory.apply(JdbcUtils.scala:52)
  at org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD.compute(JDBCRDD.scala:286)
  at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
  at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
  at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
  at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
  at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
  at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
  at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
  at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
  at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
  at org.apache.spark.scheduler.Task.run(Task.scala:108)
  at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:338)
  at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
  at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
  at java.lang.Thread.run(Thread.java:748)
Caused by: java.net.SocketTimeoutException: connect timed out
  at java.net.PlainSocketImpl.socketConnect(Native Method)
  at java.net.AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:350)
  at java.net.AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:206)
  at java.net.AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:188)
  at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392)
  at java.net.Socket.connect(Socket.java:589)

这些是来自 Teradata JDBC 驱动程序的堆栈跟踪的重要部分:

[错误 1277] [SQLState 08S01] 连接到 10.130.130.69 的登录超时 ... 原因:java.net.SocketTimeoutException:连接超时

此错误表示 Teradata JDBC 驱动程序无法连接到 IP 地址为 10.130.130.69 的数据库。

要么是错误的 IP 地址,要么是数据库不是 运行(它已关闭),或者你所在的系统 运行 你的程序没有连接到那个IP 地址(被防火墙阻止)。