如何从 Google DataFlow 连接到 Cloud SQL
how to connect to Cloud SQL from Google DataFlow
我正在尝试使用 beam java SDK 和 Google Dataflow 创建管道任务,以将数据从 Cloud SQL 移动到 Elastic search
我创建了以下 class 主要方法:
public static void main(String[] args) throws Exception{
DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
options.setProject("staging");
options.setTempLocation("gs://csv_to_sql_staging/temp");
options.setRunner(DataflowRunner.class);
options.setGcpTempLocation("gs://csv_to_sql_staging/temp"); options.setUsePublicIps(false);
options.setJobName("tamer-new"); '
options.setSubnetwork("regions/us-central1/subnetworks/new-network");
final List<String> SCOPES = Arrays.asList(
"https://www.googleapis.com/auth/cloud-platform",
"https://www.googleapis.com/auth/devstorage.full_control",
"https://www.googleapis.com/auth/userinfo.email",
"https://www.googleapis.com/auth/datastore",
"https://www.googleapis.com/auth/sqlservice.admin",
"https://www.googleapis.com/auth/pubsub");
options.setGcpCredential(ServiceAccountCredentials.fromStream(new ElasticSearchIO().getClass().getResourceAsStream("/staging-b648da5d2b9b.json")).createScoped(SCOPES)); options.setServiceAccount("data-flow@staging.iam.gserviceaccount.com");
Pipeline p = Pipeline.create(options);
p.begin();
PCollection < List < String >> rows = p.apply(JdbcIO. < List < String >> read().withQuery("select u.id, u.name from user_table").withDataSourceConfiguration(JdbcIO.DataSourceConfiguration.create("com.mysql.jdbc.Driver", "jdbc:mysql://google/nameDB_new?cloudSqlInstance=staging:europe-west1:sql-staging-instance&socketFactory=com.google.cloud.sql.mysql.SocketFactory&useUnicode=true&characterEncoding=UTF-8&user=user&password=password&useSSL=false")).withRowMapper(new RowMapper < List < String >> () {
@Override public List < String > mapRow(ResultSet resultSet) throws Exception {
List < String > addRow = new ArrayList < String > ();
for (int i = 1; i <= resultSet.getMetaData().getColumnCount(); i++) {
addRow.add(i - 1, String.valueOf(resultSet.getObject(i)));
}
//LOG.info(String.join(",", addRow));
return addRow;
}
})
.withCoder(ListCoder.of(StringUtf8Coder. < Object > of ()))
);
Write w = ElasticsearchIO.write().withConnectionConfiguration(
ElasticsearchIO.ConnectionConfiguration.create(new String[] {
"https://host:9243"
}, "user-temp", "String").withUsername("elastic").withPassword("password")
);
rows.apply(w.compose(new SerializableFunction() {
@Override public Object apply(Object input) {
// TODO Auto-generated method stub
return input;
}
}));
p.run().waitUntilFinish();
}
下面是 pom.xml 文件:
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.harmonica.dataflow</groupId>
<artifactId>com-harmonica-dataflow</artifactId>
<version>0.0.1-SNAPSHOT</version>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven-compiler-plugin.version>3.7.0</maven-compiler-plugin.version>
<exec-maven-plugin.version>1.6.0</exec-maven-plugin.version>
<slf4j.version>1.7.25</slf4j.version>
<beam.version>2.19.0</beam.version>
</properties>
<repositories>
<repository>
<id>ossrh.snapshots</id>
<name>Sonatype OSS Repository Hosting</name>
<url>https://oss.sonatype.org/content/repositories/snapshots/</url>
<releases>
<enabled>false</enabled>
</releases>
<snapshots>
<enabled>true</enabled>
</snapshots>
</repository>
</repositories>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>${maven-compiler-plugin.version}</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
</plugins>
<pluginManagement>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<version>${exec-maven-plugin.version}</version>
<configuration>
<cleanupDaemonThreads>false</cleanupDaemonThreads>
</configuration>
</plugin>
</plugins>
</pluginManagement>
</build>
<dependencies>
<!-- Beam Lib -->
<dependency>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-core</artifactId>
<version>${beam.version}</version>
</dependency>
<dependency>
<groupId>org.apache.beam</groupId>
<artifactId>beam-runners-google-cloud-dataflow-java</artifactId>
<version>${beam.version}</version>
</dependency>
<dependency>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-io-elasticsearch</artifactId>
<version>${beam.version}</version>
</dependency>
<dependency>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-io-jdbc</artifactId>
<version>${beam.version}</version>
</dependency>
<dependency>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-io-google-cloud-platform</artifactId>
<version>${beam.version}</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>8.0.19</version>
</dependency>
<dependency>
<groupId>com.google.cloud.sql</groupId>
<artifactId>mysql-socket-factory-connector-j-8</artifactId>
<version>1.0.15</version>
</dependency>
<!-- slf4j API frontend binding with JUL backend -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>${slf4j.version}</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-jdk14</artifactId>
<version>${slf4j.version}</version>
</dependency>
</dependencies>
</project>
当我执行这个命令时:
man exec mvn compile exec:java -Dexec.mainClass=com.dataflow.ElasticSearchIO
worker 成功启动,但无法连接到云 SQL:
甚至以为我已经完成了流动:
- 我创建了一个拥有项目所有者访问权限的服务帐户,并将其传递给运行器选项
- 我创建了一个名为 new-network 的 VPC 网络,IP 范围为 190.10.0.0/16 并将其分配给管道选项,然后在云中将该范围列入白名单 SQL
但是我仍然收到此错误:
Error message from worker: java.lang.RuntimeException:
org.apache.beam.sdk.util.UserCodeException: java.sql.SQLException:
Cannot create PoolableConnectionFactory (Communications link failure
The last packet sent successfully to the server was 0 milliseconds
ago. The driver has not received any packets from the server.)
org.apache.beam.runners.dataflow.worker.IntrinsicMapTaskExecutorFactory.typedApply(IntrinsicMapTaskExecutorFactory.java:194)
org.apache.beam.runners.dataflow.worker.IntrinsicMapTaskExecutorFactory.typedApply(IntrinsicMapTaskExecutorFactory.java:165)
org.apache.beam.runners.dataflow.worker.graph.Networks$TypeSafeNodeFunction.apply(Networks.java:63)
org.apache.beam.runners.dataflow.worker.graph.Networks$TypeSafeNodeFunction.apply(Networks.java:50)
org.apache.beam.runners.dataflow.worker.graph.Networks.replaceDirectedNetworkNodes(Networks.java:87)
org.apache.beam.runners.dataflow.worker.IntrinsicMapTaskExecutorFactory.create(IntrinsicMapTaskExecutorFactory.java:125)
org.apache.beam.runners.dataflow.worker.BatchDataflowWorker.doWork(BatchDataflowWorker.java:352)
org.apache.beam.runners.dataflow.worker.BatchDataflowWorker.getAndPerformWork(BatchDataflowWorker.java:305)
org.apache.beam.runners.dataflow.worker.DataflowBatchWorkerHarness$WorkerThread.doWork(DataflowBatchWorkerHarness.java:140)
org.apache.beam.runners.dataflow.worker.DataflowBatchWorkerHarness$WorkerThread.call(DataflowBatchWorkerHarness.java:120)
org.apache.beam.runners.dataflow.worker.DataflowBatchWorkerHarness$WorkerThread.call(DataflowBatchWorkerHarness.java:107)
java.util.concurrent.FutureTask.run(FutureTask.java:266)
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
java.lang.Thread.run(Thread.java:748) Caused by:
org.apache.beam.sdk.util.UserCodeException: java.sql.SQLException:
Cannot create PoolableConnectionFactory (Communications link failure
The last packet sent successfully to the server was 0 milliseconds
ago. The driver has not received any packets from the server.)
org.apache.beam.sdk.util.UserCodeException.wrap(UserCodeException.java:34)
org.apache.beam.sdk.io.jdbc.JdbcIO$ReadFn$DoFnInvoker.invokeSetup(Unknown
Source)
org.apache.beam.runners.dataflow.worker.DoFnInstanceManagers$ConcurrentQueueInstanceManager.deserializeCopy(DoFnInstanceManagers.java:80)
org.apache.beam.runners.dataflow.worker.DoFnInstanceManagers$ConcurrentQueueInstanceManager.peek(DoFnInstanceManagers.java:62)
org.apache.beam.runners.dataflow.worker.UserParDoFnFactory.create(UserParDoFnFactory.java:95)
org.apache.beam.runners.dataflow.worker.DefaultParDoFnFactory.create(DefaultParDoFnFactory.java:75)
org.apache.beam.runners.dataflow.worker.IntrinsicMapTaskExecutorFactory.createParDoOperation(IntrinsicMapTaskExecutorFactory.java:264)
org.apache.beam.runners.dataflow.worker.IntrinsicMapTaskExecutorFactory.access[=15=]0(IntrinsicMapTaskExecutorFactory.java:86)
org.apache.beam.runners.dataflow.worker.IntrinsicMapTaskExecutorFactory.typedApply(IntrinsicMapTaskExecutorFactory.java:183)
... 14 more Caused by: java.sql.SQLException: Cannot create
PoolableConnectionFactory (Communications link failure The last packet
sent successfully to the server was 0 milliseconds ago. The driver has
not received any packets from the server.)
org.apache.commons.dbcp2.BasicDataSource.createPoolableConnectionFactory(BasicDataSource.java:735)
org.apache.commons.dbcp2.BasicDataSource.createDataSource(BasicDataSource.java:605)
org.apache.commons.dbcp2.BasicDataSource.getConnection(BasicDataSource.java:809)
org.apache.beam.sdk.io.jdbc.JdbcIO$ReadFn.setup(JdbcIO.java:881)
Caused by: com.mysql.cj.jdbc.exceptions.CommunicationsException:
Communications link failure The last packet sent successfully to the
server was 0 milliseconds ago. The driver has not received any packets
from the server.
com.mysql.cj.jdbc.exceptions.SQLError.createCommunicationsException(SQLError.java:174)
com.mysql.cj.jdbc.exceptions.SQLExceptionsMapping.translateException(SQLExceptionsMapping.java:64)
com.mysql.cj.jdbc.ConnectionImpl.createNewIO(ConnectionImpl.java:836)
com.mysql.cj.jdbc.ConnectionImpl.(ConnectionImpl.java:456)
com.mysql.cj.jdbc.ConnectionImpl.getInstance(ConnectionImpl.java:246)
com.mysql.cj.jdbc.NonRegisteringDriver.connect(NonRegisteringDriver.java:197)
org.apache.commons.dbcp2.DriverConnectionFactory.createConnection(DriverConnectionFactory.java:53)
org.apache.commons.dbcp2.PoolableConnectionFactory.makeObject(PoolableConnectionFactory.java:355)
org.apache.commons.dbcp2.BasicDataSource.validateConnectionFactory(BasicDataSource.java:116)
org.apache.commons.dbcp2.BasicDataSource.createPoolableConnectionFactory(BasicDataSource.java:731)
org.apache.commons.dbcp2.BasicDataSource.createDataSource(BasicDataSource.java:605)
org.apache.commons.dbcp2.BasicDataSource.getConnection(BasicDataSource.java:809)
org.apache.beam.sdk.io.jdbc.JdbcIO$ReadFn.setup(JdbcIO.java:881)
org.apache.beam.sdk.io.jdbc.JdbcIO$ReadFn$DoFnInvoker.invokeSetup(Unknown
Source)
org.apache.beam.runners.dataflow.worker.DoFnInstanceManagers$ConcurrentQueueInstanceManager.deserializeCopy(DoFnInstanceManagers.java:80)
org.apache.beam.runners.dataflow.worker.DoFnInstanceManagers$ConcurrentQueueInstanceManager.peek(DoFnInstanceManagers.java:62)
org.apache.beam.runners.dataflow.worker.UserParDoFnFactory.create(UserParDoFnFactory.java:95)
org.apache.beam.runners.dataflow.worker.DefaultParDoFnFactory.create(DefaultParDoFnFactory.java:75)
org.apache.beam.runners.dataflow.worker.IntrinsicMapTaskExecutorFactory.createParDoOperation(IntrinsicMapTaskExecutorFactory.java:264)
org.apache.beam.runners.dataflow.worker.IntrinsicMapTaskExecutorFactory.access[=15=]0(IntrinsicMapTaskExecutorFactory.java:86)
org.apache.beam.runners.dataflow.worker.IntrinsicMapTaskExecutorFactory.typedApply(IntrinsicMapTaskExecutorFactory.java:183)
org.apache.beam.runners.dataflow.worker.IntrinsicMapTaskExecutorFactory.typedApply(IntrinsicMapTaskExecutorFactory.java:165)
org.apache.beam.runners.dataflow.worker.graph.Networks$TypeSafeNodeFunction.apply(Networks.java:63)
org.apache.beam.runners.dataflow.worker.graph.Networks$TypeSafeNodeFunction.apply(Networks.java:50)
org.apache.beam.runners.dataflow.worker.graph.Networks.replaceDirectedNetworkNodes(Networks.java:87)
org.apache.beam.runners.dataflow.worker.IntrinsicMapTaskExecutorFactory.create(IntrinsicMapTaskExecutorFactory.java:125)
org.apache.beam.runners.dataflow.worker.BatchDataflowWorker.doWork(BatchDataflowWorker.java:352)
org.apache.beam.runners.dataflow.worker.BatchDataflowWorker.getAndPerformWork(BatchDataflowWorker.java:305)
org.apache.beam.runners.dataflow.worker.DataflowBatchWorkerHarness$WorkerThread.doWork(DataflowBatchWorkerHarness.java:140)
org.apache.beam.runners.dataflow.worker.DataflowBatchWorkerHarness$WorkerThread.call(DataflowBatchWorkerHarness.java:120)
org.apache.beam.runners.dataflow.worker.DataflowBatchWorkerHarness$WorkerThread.call(DataflowBatchWorkerHarness.java:107)
java.util.concurrent.FutureTask.run(FutureTask.java:266)
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
java.lang.Thread.run(Thread.java:748) Caused by:
com.mysql.cj.exceptions.CJCommunicationsException: Communications link
failure The last packet sent successfully to the server was 0
milliseconds ago. The driver has not received any packets from the
server. sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native
Method)
sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
java.lang.reflect.Constructor.newInstance(Constructor.java:423)
com.mysql.cj.exceptions.ExceptionFactory.createException(ExceptionFactory.java:61)
com.mysql.cj.exceptions.ExceptionFactory.createException(ExceptionFactory.java:105)
com.mysql.cj.exceptions.ExceptionFactory.createException(ExceptionFactory.java:151)
com.mysql.cj.exceptions.ExceptionFactory.createCommunicationsException(ExceptionFactory.java:167)
com.mysql.cj.protocol.a.NativeSocketConnection.connect(NativeSocketConnection.java:91)
com.mysql.cj.NativeSession.connect(NativeSession.java:144)
com.mysql.cj.jdbc.ConnectionImpl.connectOneTryOnly(ConnectionImpl.java:956)
com.mysql.cj.jdbc.ConnectionImpl.createNewIO(ConnectionImpl.java:826)
... 32 more Caused by: java.net.ConnectException: Connection timed out
(Connection timed out) java.net.PlainSocketImpl.socketConnect(Native
Method)
java.net.AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:350)
java.net.AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:206)
java.net.AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:188)
java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392)
java.net.Socket.connect(Socket.java:589)
sun.security.ssl.SSLSocketImpl.connect(SSLSocketImpl.java:673)
sun.security.ssl.BaseSSLSocketImpl.connect(BaseSSLSocketImpl.java:173)
com.google.cloud.sql.core.CoreSocketFactory.createSslSocket(CoreSocketFactory.java:233)
com.google.cloud.sql.core.CoreSocketFactory.connect(CoreSocketFactory.java:185)
com.google.cloud.sql.mysql.SocketFactory.connect(SocketFactory.java:48)
com.google.cloud.sql.mysql.SocketFactory.connect(SocketFactory.java:38)
com.mysql.cj.protocol.a.NativeSocketConnection.connect(NativeSocketConnection.java:65)
... 35 more
非常感谢您的帮助!
提前致谢
如果可能,请为 sql 连接尝试以下代码:
connection = connectToCloudSql(map.get(LiteralConstant.URL.toString()),
map.get(LiteralConstant.USERNAME.toString()), map.get(LiteralConstant.PASSWORD.toString()));
然后使用以下代码从 sql 连接获取结果:
statement = connection.prepareCall("query");
statement.execute();
resultSet = statement.getResultSet();
ResultSetMetaData rsmd = resultSet.getMetaData();
int count = rsmd.getColumnCount();
if(!resultSet.next() || count < 1)
throw new ConnectionFailureException("Failed to connect to Cloud SQL");
for (int k = 1; k <= count; k++) {
row.set(rsmd.getColumnName(k), resultSet.getString(k));
}
在PCollection中得到上面的结果
注意:不要忘记启用 Cloud sql api 和 Cloud sql admin api.
Maven 依赖项:
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.25</version>
</dependency>
<dependency>
<groupId>com.google.cloud.sql</groupId>
<artifactId>mysql-socket-factory</artifactId> <!-- mysql-socket-factory-connector-j-6 if using 6.x.x -->
<version>1.0.0</version>
</dependency>
以上这段代码适用于我的情况。如果此解决方案适合您,请告诉我。
您可以使用以下代码建立连接:
管道 p = Pipeline.create(选项);
//Increase pool size based on your records
ComboPooledDataSource dataSource = new ComboPooledDataSource();
dataSource.setDriverClass("com.mysql.jdbc.Driver");
dataSource.setJdbcUrl(
"jdbc:mysql://google/test?cloudSqlInstance=dataflowtest-:us-central1:sql-test&socketFactory=com.google.cloud.sql.mysql.SocketFactory");
dataSource.setUser("root");
dataSource.setPassword("root");
dataSource.setMaxPoolSize(10);
dataSource.setInitialPoolSize(6);
JdbcIO.DataSourceConfiguration config = JdbcIO.DataSourceConfiguration.create(dataSource);
// ADD rewriteBatchedStatements=true to improve write speed"
PCollection<KV<String, String>> sqlResult = p.apply(JdbcIO.<KV<String, String>>read()
.withDataSourceConfiguration(config)
.withQuery("select * from test_table").withCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()))
.withRowMapper(new JdbcIO.RowMapper<KV<String, String>>() {
private static final long serialVersionUID = 1L;
public KV<String, String> mapRow(ResultSet resultSet) throws Exception {
return KV.of(resultSet.getString(1), resultSet.getString(2));
}
}));
在pom.xml
中添加以下依赖
<dependency>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-io-jdbc</artifactId>
<version>2.17.0</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.25</version>
</dependency>
<dependency>
<groupId>com.google.cloud.sql</groupId>
<artifactId>mysql-socket-factory</artifactId>
<version>1.0.0</version>
</dependency>
这应该有效..
我正在尝试使用 beam java SDK 和 Google Dataflow 创建管道任务,以将数据从 Cloud SQL 移动到 Elastic search
我创建了以下 class 主要方法:
public static void main(String[] args) throws Exception{
DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
options.setProject("staging");
options.setTempLocation("gs://csv_to_sql_staging/temp");
options.setRunner(DataflowRunner.class);
options.setGcpTempLocation("gs://csv_to_sql_staging/temp"); options.setUsePublicIps(false);
options.setJobName("tamer-new"); '
options.setSubnetwork("regions/us-central1/subnetworks/new-network");
final List<String> SCOPES = Arrays.asList(
"https://www.googleapis.com/auth/cloud-platform",
"https://www.googleapis.com/auth/devstorage.full_control",
"https://www.googleapis.com/auth/userinfo.email",
"https://www.googleapis.com/auth/datastore",
"https://www.googleapis.com/auth/sqlservice.admin",
"https://www.googleapis.com/auth/pubsub");
options.setGcpCredential(ServiceAccountCredentials.fromStream(new ElasticSearchIO().getClass().getResourceAsStream("/staging-b648da5d2b9b.json")).createScoped(SCOPES)); options.setServiceAccount("data-flow@staging.iam.gserviceaccount.com");
Pipeline p = Pipeline.create(options);
p.begin();
PCollection < List < String >> rows = p.apply(JdbcIO. < List < String >> read().withQuery("select u.id, u.name from user_table").withDataSourceConfiguration(JdbcIO.DataSourceConfiguration.create("com.mysql.jdbc.Driver", "jdbc:mysql://google/nameDB_new?cloudSqlInstance=staging:europe-west1:sql-staging-instance&socketFactory=com.google.cloud.sql.mysql.SocketFactory&useUnicode=true&characterEncoding=UTF-8&user=user&password=password&useSSL=false")).withRowMapper(new RowMapper < List < String >> () {
@Override public List < String > mapRow(ResultSet resultSet) throws Exception {
List < String > addRow = new ArrayList < String > ();
for (int i = 1; i <= resultSet.getMetaData().getColumnCount(); i++) {
addRow.add(i - 1, String.valueOf(resultSet.getObject(i)));
}
//LOG.info(String.join(",", addRow));
return addRow;
}
})
.withCoder(ListCoder.of(StringUtf8Coder. < Object > of ()))
);
Write w = ElasticsearchIO.write().withConnectionConfiguration(
ElasticsearchIO.ConnectionConfiguration.create(new String[] {
"https://host:9243"
}, "user-temp", "String").withUsername("elastic").withPassword("password")
);
rows.apply(w.compose(new SerializableFunction() {
@Override public Object apply(Object input) {
// TODO Auto-generated method stub
return input;
}
}));
p.run().waitUntilFinish();
}
下面是 pom.xml 文件:
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.harmonica.dataflow</groupId>
<artifactId>com-harmonica-dataflow</artifactId>
<version>0.0.1-SNAPSHOT</version>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven-compiler-plugin.version>3.7.0</maven-compiler-plugin.version>
<exec-maven-plugin.version>1.6.0</exec-maven-plugin.version>
<slf4j.version>1.7.25</slf4j.version>
<beam.version>2.19.0</beam.version>
</properties>
<repositories>
<repository>
<id>ossrh.snapshots</id>
<name>Sonatype OSS Repository Hosting</name>
<url>https://oss.sonatype.org/content/repositories/snapshots/</url>
<releases>
<enabled>false</enabled>
</releases>
<snapshots>
<enabled>true</enabled>
</snapshots>
</repository>
</repositories>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>${maven-compiler-plugin.version}</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
</plugins>
<pluginManagement>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<version>${exec-maven-plugin.version}</version>
<configuration>
<cleanupDaemonThreads>false</cleanupDaemonThreads>
</configuration>
</plugin>
</plugins>
</pluginManagement>
</build>
<dependencies>
<!-- Beam Lib -->
<dependency>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-core</artifactId>
<version>${beam.version}</version>
</dependency>
<dependency>
<groupId>org.apache.beam</groupId>
<artifactId>beam-runners-google-cloud-dataflow-java</artifactId>
<version>${beam.version}</version>
</dependency>
<dependency>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-io-elasticsearch</artifactId>
<version>${beam.version}</version>
</dependency>
<dependency>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-io-jdbc</artifactId>
<version>${beam.version}</version>
</dependency>
<dependency>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-io-google-cloud-platform</artifactId>
<version>${beam.version}</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>8.0.19</version>
</dependency>
<dependency>
<groupId>com.google.cloud.sql</groupId>
<artifactId>mysql-socket-factory-connector-j-8</artifactId>
<version>1.0.15</version>
</dependency>
<!-- slf4j API frontend binding with JUL backend -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>${slf4j.version}</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-jdk14</artifactId>
<version>${slf4j.version}</version>
</dependency>
</dependencies>
</project>
当我执行这个命令时:
man exec mvn compile exec:java -Dexec.mainClass=com.dataflow.ElasticSearchIO
worker 成功启动,但无法连接到云 SQL: 甚至以为我已经完成了流动:
- 我创建了一个拥有项目所有者访问权限的服务帐户,并将其传递给运行器选项
- 我创建了一个名为 new-network 的 VPC 网络,IP 范围为 190.10.0.0/16 并将其分配给管道选项,然后在云中将该范围列入白名单 SQL
但是我仍然收到此错误:
Error message from worker: java.lang.RuntimeException: org.apache.beam.sdk.util.UserCodeException: java.sql.SQLException: Cannot create PoolableConnectionFactory (Communications link failure The last packet sent successfully to the server was 0 milliseconds ago. The driver has not received any packets from the server.) org.apache.beam.runners.dataflow.worker.IntrinsicMapTaskExecutorFactory.typedApply(IntrinsicMapTaskExecutorFactory.java:194) org.apache.beam.runners.dataflow.worker.IntrinsicMapTaskExecutorFactory.typedApply(IntrinsicMapTaskExecutorFactory.java:165) org.apache.beam.runners.dataflow.worker.graph.Networks$TypeSafeNodeFunction.apply(Networks.java:63) org.apache.beam.runners.dataflow.worker.graph.Networks$TypeSafeNodeFunction.apply(Networks.java:50) org.apache.beam.runners.dataflow.worker.graph.Networks.replaceDirectedNetworkNodes(Networks.java:87) org.apache.beam.runners.dataflow.worker.IntrinsicMapTaskExecutorFactory.create(IntrinsicMapTaskExecutorFactory.java:125) org.apache.beam.runners.dataflow.worker.BatchDataflowWorker.doWork(BatchDataflowWorker.java:352) org.apache.beam.runners.dataflow.worker.BatchDataflowWorker.getAndPerformWork(BatchDataflowWorker.java:305) org.apache.beam.runners.dataflow.worker.DataflowBatchWorkerHarness$WorkerThread.doWork(DataflowBatchWorkerHarness.java:140) org.apache.beam.runners.dataflow.worker.DataflowBatchWorkerHarness$WorkerThread.call(DataflowBatchWorkerHarness.java:120) org.apache.beam.runners.dataflow.worker.DataflowBatchWorkerHarness$WorkerThread.call(DataflowBatchWorkerHarness.java:107) java.util.concurrent.FutureTask.run(FutureTask.java:266) java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) java.lang.Thread.run(Thread.java:748) Caused by: org.apache.beam.sdk.util.UserCodeException: java.sql.SQLException: Cannot create PoolableConnectionFactory (Communications link failure The last packet sent successfully to the server was 0 milliseconds ago. The driver has not received any packets from the server.) org.apache.beam.sdk.util.UserCodeException.wrap(UserCodeException.java:34) org.apache.beam.sdk.io.jdbc.JdbcIO$ReadFn$DoFnInvoker.invokeSetup(Unknown Source) org.apache.beam.runners.dataflow.worker.DoFnInstanceManagers$ConcurrentQueueInstanceManager.deserializeCopy(DoFnInstanceManagers.java:80) org.apache.beam.runners.dataflow.worker.DoFnInstanceManagers$ConcurrentQueueInstanceManager.peek(DoFnInstanceManagers.java:62) org.apache.beam.runners.dataflow.worker.UserParDoFnFactory.create(UserParDoFnFactory.java:95) org.apache.beam.runners.dataflow.worker.DefaultParDoFnFactory.create(DefaultParDoFnFactory.java:75) org.apache.beam.runners.dataflow.worker.IntrinsicMapTaskExecutorFactory.createParDoOperation(IntrinsicMapTaskExecutorFactory.java:264) org.apache.beam.runners.dataflow.worker.IntrinsicMapTaskExecutorFactory.access[=15=]0(IntrinsicMapTaskExecutorFactory.java:86) org.apache.beam.runners.dataflow.worker.IntrinsicMapTaskExecutorFactory.typedApply(IntrinsicMapTaskExecutorFactory.java:183) ... 14 more Caused by: java.sql.SQLException: Cannot create PoolableConnectionFactory (Communications link failure The last packet sent successfully to the server was 0 milliseconds ago. The driver has not received any packets from the server.) org.apache.commons.dbcp2.BasicDataSource.createPoolableConnectionFactory(BasicDataSource.java:735) org.apache.commons.dbcp2.BasicDataSource.createDataSource(BasicDataSource.java:605) org.apache.commons.dbcp2.BasicDataSource.getConnection(BasicDataSource.java:809) org.apache.beam.sdk.io.jdbc.JdbcIO$ReadFn.setup(JdbcIO.java:881) Caused by: com.mysql.cj.jdbc.exceptions.CommunicationsException: Communications link failure The last packet sent successfully to the server was 0 milliseconds ago. The driver has not received any packets from the server. com.mysql.cj.jdbc.exceptions.SQLError.createCommunicationsException(SQLError.java:174) com.mysql.cj.jdbc.exceptions.SQLExceptionsMapping.translateException(SQLExceptionsMapping.java:64) com.mysql.cj.jdbc.ConnectionImpl.createNewIO(ConnectionImpl.java:836) com.mysql.cj.jdbc.ConnectionImpl.(ConnectionImpl.java:456) com.mysql.cj.jdbc.ConnectionImpl.getInstance(ConnectionImpl.java:246) com.mysql.cj.jdbc.NonRegisteringDriver.connect(NonRegisteringDriver.java:197) org.apache.commons.dbcp2.DriverConnectionFactory.createConnection(DriverConnectionFactory.java:53) org.apache.commons.dbcp2.PoolableConnectionFactory.makeObject(PoolableConnectionFactory.java:355) org.apache.commons.dbcp2.BasicDataSource.validateConnectionFactory(BasicDataSource.java:116) org.apache.commons.dbcp2.BasicDataSource.createPoolableConnectionFactory(BasicDataSource.java:731) org.apache.commons.dbcp2.BasicDataSource.createDataSource(BasicDataSource.java:605) org.apache.commons.dbcp2.BasicDataSource.getConnection(BasicDataSource.java:809) org.apache.beam.sdk.io.jdbc.JdbcIO$ReadFn.setup(JdbcIO.java:881) org.apache.beam.sdk.io.jdbc.JdbcIO$ReadFn$DoFnInvoker.invokeSetup(Unknown Source) org.apache.beam.runners.dataflow.worker.DoFnInstanceManagers$ConcurrentQueueInstanceManager.deserializeCopy(DoFnInstanceManagers.java:80) org.apache.beam.runners.dataflow.worker.DoFnInstanceManagers$ConcurrentQueueInstanceManager.peek(DoFnInstanceManagers.java:62) org.apache.beam.runners.dataflow.worker.UserParDoFnFactory.create(UserParDoFnFactory.java:95) org.apache.beam.runners.dataflow.worker.DefaultParDoFnFactory.create(DefaultParDoFnFactory.java:75) org.apache.beam.runners.dataflow.worker.IntrinsicMapTaskExecutorFactory.createParDoOperation(IntrinsicMapTaskExecutorFactory.java:264) org.apache.beam.runners.dataflow.worker.IntrinsicMapTaskExecutorFactory.access[=15=]0(IntrinsicMapTaskExecutorFactory.java:86) org.apache.beam.runners.dataflow.worker.IntrinsicMapTaskExecutorFactory.typedApply(IntrinsicMapTaskExecutorFactory.java:183) org.apache.beam.runners.dataflow.worker.IntrinsicMapTaskExecutorFactory.typedApply(IntrinsicMapTaskExecutorFactory.java:165) org.apache.beam.runners.dataflow.worker.graph.Networks$TypeSafeNodeFunction.apply(Networks.java:63) org.apache.beam.runners.dataflow.worker.graph.Networks$TypeSafeNodeFunction.apply(Networks.java:50) org.apache.beam.runners.dataflow.worker.graph.Networks.replaceDirectedNetworkNodes(Networks.java:87) org.apache.beam.runners.dataflow.worker.IntrinsicMapTaskExecutorFactory.create(IntrinsicMapTaskExecutorFactory.java:125) org.apache.beam.runners.dataflow.worker.BatchDataflowWorker.doWork(BatchDataflowWorker.java:352) org.apache.beam.runners.dataflow.worker.BatchDataflowWorker.getAndPerformWork(BatchDataflowWorker.java:305) org.apache.beam.runners.dataflow.worker.DataflowBatchWorkerHarness$WorkerThread.doWork(DataflowBatchWorkerHarness.java:140) org.apache.beam.runners.dataflow.worker.DataflowBatchWorkerHarness$WorkerThread.call(DataflowBatchWorkerHarness.java:120) org.apache.beam.runners.dataflow.worker.DataflowBatchWorkerHarness$WorkerThread.call(DataflowBatchWorkerHarness.java:107) java.util.concurrent.FutureTask.run(FutureTask.java:266) java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) java.lang.Thread.run(Thread.java:748) Caused by: com.mysql.cj.exceptions.CJCommunicationsException: Communications link failure The last packet sent successfully to the server was 0 milliseconds ago. The driver has not received any packets from the server. sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62) sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) java.lang.reflect.Constructor.newInstance(Constructor.java:423) com.mysql.cj.exceptions.ExceptionFactory.createException(ExceptionFactory.java:61) com.mysql.cj.exceptions.ExceptionFactory.createException(ExceptionFactory.java:105) com.mysql.cj.exceptions.ExceptionFactory.createException(ExceptionFactory.java:151) com.mysql.cj.exceptions.ExceptionFactory.createCommunicationsException(ExceptionFactory.java:167) com.mysql.cj.protocol.a.NativeSocketConnection.connect(NativeSocketConnection.java:91) com.mysql.cj.NativeSession.connect(NativeSession.java:144) com.mysql.cj.jdbc.ConnectionImpl.connectOneTryOnly(ConnectionImpl.java:956) com.mysql.cj.jdbc.ConnectionImpl.createNewIO(ConnectionImpl.java:826) ... 32 more Caused by: java.net.ConnectException: Connection timed out (Connection timed out) java.net.PlainSocketImpl.socketConnect(Native Method) java.net.AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:350) java.net.AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:206) java.net.AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:188) java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392) java.net.Socket.connect(Socket.java:589) sun.security.ssl.SSLSocketImpl.connect(SSLSocketImpl.java:673) sun.security.ssl.BaseSSLSocketImpl.connect(BaseSSLSocketImpl.java:173) com.google.cloud.sql.core.CoreSocketFactory.createSslSocket(CoreSocketFactory.java:233) com.google.cloud.sql.core.CoreSocketFactory.connect(CoreSocketFactory.java:185) com.google.cloud.sql.mysql.SocketFactory.connect(SocketFactory.java:48) com.google.cloud.sql.mysql.SocketFactory.connect(SocketFactory.java:38) com.mysql.cj.protocol.a.NativeSocketConnection.connect(NativeSocketConnection.java:65) ... 35 more
非常感谢您的帮助! 提前致谢
如果可能,请为 sql 连接尝试以下代码:
connection = connectToCloudSql(map.get(LiteralConstant.URL.toString()),
map.get(LiteralConstant.USERNAME.toString()), map.get(LiteralConstant.PASSWORD.toString()));
然后使用以下代码从 sql 连接获取结果:
statement = connection.prepareCall("query");
statement.execute();
resultSet = statement.getResultSet();
ResultSetMetaData rsmd = resultSet.getMetaData();
int count = rsmd.getColumnCount();
if(!resultSet.next() || count < 1)
throw new ConnectionFailureException("Failed to connect to Cloud SQL");
for (int k = 1; k <= count; k++) {
row.set(rsmd.getColumnName(k), resultSet.getString(k));
}
在PCollection中得到上面的结果 注意:不要忘记启用 Cloud sql api 和 Cloud sql admin api.
Maven 依赖项:
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.25</version>
</dependency>
<dependency>
<groupId>com.google.cloud.sql</groupId>
<artifactId>mysql-socket-factory</artifactId> <!-- mysql-socket-factory-connector-j-6 if using 6.x.x -->
<version>1.0.0</version>
</dependency>
以上这段代码适用于我的情况。如果此解决方案适合您,请告诉我。
您可以使用以下代码建立连接:
管道 p = Pipeline.create(选项);
//Increase pool size based on your records
ComboPooledDataSource dataSource = new ComboPooledDataSource();
dataSource.setDriverClass("com.mysql.jdbc.Driver");
dataSource.setJdbcUrl(
"jdbc:mysql://google/test?cloudSqlInstance=dataflowtest-:us-central1:sql-test&socketFactory=com.google.cloud.sql.mysql.SocketFactory");
dataSource.setUser("root");
dataSource.setPassword("root");
dataSource.setMaxPoolSize(10);
dataSource.setInitialPoolSize(6);
JdbcIO.DataSourceConfiguration config = JdbcIO.DataSourceConfiguration.create(dataSource);
// ADD rewriteBatchedStatements=true to improve write speed"
PCollection<KV<String, String>> sqlResult = p.apply(JdbcIO.<KV<String, String>>read()
.withDataSourceConfiguration(config)
.withQuery("select * from test_table").withCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()))
.withRowMapper(new JdbcIO.RowMapper<KV<String, String>>() {
private static final long serialVersionUID = 1L;
public KV<String, String> mapRow(ResultSet resultSet) throws Exception {
return KV.of(resultSet.getString(1), resultSet.getString(2));
}
}));
在pom.xml
中添加以下依赖 <dependency>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-io-jdbc</artifactId>
<version>2.17.0</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.25</version>
</dependency>
<dependency>
<groupId>com.google.cloud.sql</groupId>
<artifactId>mysql-socket-factory</artifactId>
<version>1.0.0</version>
</dependency>
这应该有效..