由于访问集合时出错,Solr-Spark 无法编制索引 url
Solr-Spark unable to index due to an error from accessing the collection url
我在 Java 中使用 Solr 和 Spark 来索引文档。
我在端口 2181 上设置了 Zookeeper 运行,我的收集测试有两个分片
当我启动我的代码时,我有一个 java.lang.NullPointerException。
这是我的代码(class 仅用于索引):
public class SparkRead implements Serializable {
private Integer nbLinesToSkip;
private Integer lineNumber = 0;
private String fileName;
private Integer sizeToRead;
private Integer increment = 0;
public SparkRead(Integer nbLinesToSkip) {
this.nbLinesToSkip = nbLinesToSkip;
}
public void loadDocuments(String pathToFile) {
ArrayList<document> Documents = new ArrayList<>();
// configure spark
SparkConf sparkConf = new SparkConf().setAppName("Read Text to RDD")
.setMaster("local[2]").set("spark.executor.memory","2g");
// start a spark context
JavaSparkContext sc = new JavaSparkContext(sparkConf);
// provide path to input text file
String path = pathToFile;
// read text file to RDD
JavaRDD<String> documents = sc.textFile(path, 1000);
JavaRDD<String[]> lines = documents.map(document -> {
return document.split("\r?\n");
});
JavaRDD<String[]> header = sc.parallelize(lines.take(1));
lines = lines.subtract(header);
System.out.println(Arrays.toString(lines.take(1).get(0)));
JavaRDD<document> sDocs = lines.map(x -> getDocumentFromLine(Arrays.toString(x), lineNumber));
JavaRDD<SolrInputDocument> solrInputDocuments = sDocs.map(x -> getSolrInputDocument(x));
String zkHost = "127.0.0.1:2181" ;
SolrSupport.indexDocs(zkHost,"test", 1, solrInputDocuments.rdd());
}
这是错误:
org.apache.solr.client.solrj.impl.CloudSolrClient$RouteException: Error from server at http://127.0.0.1:8983/solr/test_shard1_replica_n1: java.lang.NullPointerException
at org.apache.solr.update.processor.AddSchemaFieldsUpdateProcessorFactory$AddSchemaFieldsUpdateProcessor.mapValueClassesToFieldType(AddSchemaFieldsUpdateProcessorFactory.java:509)
at org.apache.solr.update.processor.AddSchemaFieldsUpdateProcessorFactory$AddSchemaFieldsUpdateProcessor.processAdd(AddSchemaFieldsUpdateProcessorFactory.java:396)
at org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:55)
at org.apache.solr.update.processor.FieldMutatingUpdateProcessor.processAdd(FieldMutatingUpdateProcessor.java:118)
at org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:55)
at org.apache.solr.update.processor.FieldMutatingUpdateProcessor.processAdd(FieldMutatingUpdateProcessor.java:118)
at org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:55)
at org.apache.solr.update.processor.FieldMutatingUpdateProcessor.processAdd(FieldMutatingUpdateProcessor.java:118)
at org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:55)
at org.apache.solr.update.processor.FieldMutatingUpdateProcessor.processAdd(FieldMutatingUpdateProcessor.java:118)
at org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:55)
at org.apache.solr.update.processor.FieldNameMutatingUpdateProcessorFactory.processAdd(FieldNameMutatingUpdateProcessorFactory.java:75)
at org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:55)
at org.apache.solr.update.processor.FieldMutatingUpdateProcessor.processAdd(FieldMutatingUpdateProcessor.java:118)
at org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:55)
at org.apache.solr.update.processor.AbstractDefaultValueUpdateProcessorFactory$DefaultValueUpdateProcessor.processAdd(AbstractDefaultValueUpdateProcessorFactory.java:92)
at org.apache.solr.handler.loader.JavabinLoader.update(JavabinLoader.java:98)
at org.apache.solr.client.solrj.request.JavaBinUpdateRequestCodec.readOuterMostDocIterator(JavaBinUpdateRequestCodec.java:188)
at org.apache.solr.client.solrj.request.JavaBinUpdateRequestCodec.readIterator(JavaBinUpdateRequestCodec.java:144)
at org.apache.solr.common.util.JavaBinCodec.readObject(JavaBinCodec.java:311)
at org.apache.solr.common.util.JavaBinCodec.readVal(JavaBinCodec.java:256)
at org.apache.solr.client.solrj.request.JavaBinUpdateRequestCodec.readNamedList(JavaBinUpdateRequestCodec.java:130)
at org.apache.solr.common.util.JavaBinCodec.readObject(JavaBinCodec.java:276)
at org.apache.solr.common.util.JavaBinCodec.readVal(JavaBinCodec.java:256)
at org.apache.solr.common.util.JavaBinCodec.unmarshal(JavaBinCodec.java:178)
at org.apache.solr.client.solrj.request.JavaBinUpdateRequestCodec.unmarshal(JavaBinUpdateRequestCodec.java:195)
at org.apache.solr.handler.loader.JavabinLoader.parseAndLoadDocs(JavabinLoader.java:108)
at org.apache.solr.handler.loader.JavabinLoader.load(JavabinLoader.java:55)
at org.apache.solr.handler.UpdateRequestHandler.load(UpdateRequestHandler.java:97)
at org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68)
at org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:177)
at org.apache.solr.core.SolrCore.execute(SolrCore.java:2503)
at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:710)
at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:516)
at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:382)
at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:326)
at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1751)
at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:582)
at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143)
at org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548)
at org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226)
at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180)
at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:512)
at org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185)
at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112)
at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141)
at
.
.
.
分片确实存在,shar1 副本和 url
127.0.0.1:8983/solr/#/test_shard1_replica_n1 实际上也有效。
我真的不知道是什么问题,我一直在寻找解决方案。
谢谢!
设置solrclient,也一直在纠结这个问题,这个解决了。
String zkHost = "localhost:9983";
TEMP_DIR = Files.createTempDirectory("MiniSolrCloudCluster");
JettyConfig.Builder jettyConfig = JettyConfig.builder();
jettyConfig.waitForLoadingCoresToFinish(null);
cluster = new MiniSolrCloudCluster(1, TEMP_DIR, jettyConfig.build());
cloudSolrClient = cluster.getSolrClient();
我在 Java 中使用 Solr 和 Spark 来索引文档。 我在端口 2181 上设置了 Zookeeper 运行,我的收集测试有两个分片
当我启动我的代码时,我有一个 java.lang.NullPointerException。
这是我的代码(class 仅用于索引):
public class SparkRead implements Serializable {
private Integer nbLinesToSkip;
private Integer lineNumber = 0;
private String fileName;
private Integer sizeToRead;
private Integer increment = 0;
public SparkRead(Integer nbLinesToSkip) {
this.nbLinesToSkip = nbLinesToSkip;
}
public void loadDocuments(String pathToFile) {
ArrayList<document> Documents = new ArrayList<>();
// configure spark
SparkConf sparkConf = new SparkConf().setAppName("Read Text to RDD")
.setMaster("local[2]").set("spark.executor.memory","2g");
// start a spark context
JavaSparkContext sc = new JavaSparkContext(sparkConf);
// provide path to input text file
String path = pathToFile;
// read text file to RDD
JavaRDD<String> documents = sc.textFile(path, 1000);
JavaRDD<String[]> lines = documents.map(document -> {
return document.split("\r?\n");
});
JavaRDD<String[]> header = sc.parallelize(lines.take(1));
lines = lines.subtract(header);
System.out.println(Arrays.toString(lines.take(1).get(0)));
JavaRDD<document> sDocs = lines.map(x -> getDocumentFromLine(Arrays.toString(x), lineNumber));
JavaRDD<SolrInputDocument> solrInputDocuments = sDocs.map(x -> getSolrInputDocument(x));
String zkHost = "127.0.0.1:2181" ;
SolrSupport.indexDocs(zkHost,"test", 1, solrInputDocuments.rdd());
}
这是错误:
org.apache.solr.client.solrj.impl.CloudSolrClient$RouteException: Error from server at http://127.0.0.1:8983/solr/test_shard1_replica_n1: java.lang.NullPointerException
at org.apache.solr.update.processor.AddSchemaFieldsUpdateProcessorFactory$AddSchemaFieldsUpdateProcessor.mapValueClassesToFieldType(AddSchemaFieldsUpdateProcessorFactory.java:509)
at org.apache.solr.update.processor.AddSchemaFieldsUpdateProcessorFactory$AddSchemaFieldsUpdateProcessor.processAdd(AddSchemaFieldsUpdateProcessorFactory.java:396)
at org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:55)
at org.apache.solr.update.processor.FieldMutatingUpdateProcessor.processAdd(FieldMutatingUpdateProcessor.java:118)
at org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:55)
at org.apache.solr.update.processor.FieldMutatingUpdateProcessor.processAdd(FieldMutatingUpdateProcessor.java:118)
at org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:55)
at org.apache.solr.update.processor.FieldMutatingUpdateProcessor.processAdd(FieldMutatingUpdateProcessor.java:118)
at org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:55)
at org.apache.solr.update.processor.FieldMutatingUpdateProcessor.processAdd(FieldMutatingUpdateProcessor.java:118)
at org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:55)
at org.apache.solr.update.processor.FieldNameMutatingUpdateProcessorFactory.processAdd(FieldNameMutatingUpdateProcessorFactory.java:75)
at org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:55)
at org.apache.solr.update.processor.FieldMutatingUpdateProcessor.processAdd(FieldMutatingUpdateProcessor.java:118)
at org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:55)
at org.apache.solr.update.processor.AbstractDefaultValueUpdateProcessorFactory$DefaultValueUpdateProcessor.processAdd(AbstractDefaultValueUpdateProcessorFactory.java:92)
at org.apache.solr.handler.loader.JavabinLoader.update(JavabinLoader.java:98)
at org.apache.solr.client.solrj.request.JavaBinUpdateRequestCodec.readOuterMostDocIterator(JavaBinUpdateRequestCodec.java:188)
at org.apache.solr.client.solrj.request.JavaBinUpdateRequestCodec.readIterator(JavaBinUpdateRequestCodec.java:144)
at org.apache.solr.common.util.JavaBinCodec.readObject(JavaBinCodec.java:311)
at org.apache.solr.common.util.JavaBinCodec.readVal(JavaBinCodec.java:256)
at org.apache.solr.client.solrj.request.JavaBinUpdateRequestCodec.readNamedList(JavaBinUpdateRequestCodec.java:130)
at org.apache.solr.common.util.JavaBinCodec.readObject(JavaBinCodec.java:276)
at org.apache.solr.common.util.JavaBinCodec.readVal(JavaBinCodec.java:256)
at org.apache.solr.common.util.JavaBinCodec.unmarshal(JavaBinCodec.java:178)
at org.apache.solr.client.solrj.request.JavaBinUpdateRequestCodec.unmarshal(JavaBinUpdateRequestCodec.java:195)
at org.apache.solr.handler.loader.JavabinLoader.parseAndLoadDocs(JavabinLoader.java:108)
at org.apache.solr.handler.loader.JavabinLoader.load(JavabinLoader.java:55)
at org.apache.solr.handler.UpdateRequestHandler.load(UpdateRequestHandler.java:97)
at org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68)
at org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:177)
at org.apache.solr.core.SolrCore.execute(SolrCore.java:2503)
at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:710)
at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:516)
at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:382)
at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:326)
at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1751)
at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:582)
at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143)
at org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548)
at org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226)
at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180)
at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:512)
at org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185)
at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112)
at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141)
at
.
.
.
分片确实存在,shar1 副本和 url 127.0.0.1:8983/solr/#/test_shard1_replica_n1 实际上也有效。
我真的不知道是什么问题,我一直在寻找解决方案。
谢谢!
设置solrclient,也一直在纠结这个问题,这个解决了。
String zkHost = "localhost:9983";
TEMP_DIR = Files.createTempDirectory("MiniSolrCloudCluster");
JettyConfig.Builder jettyConfig = JettyConfig.builder();
jettyConfig.waitForLoadingCoresToFinish(null);
cluster = new MiniSolrCloudCluster(1, TEMP_DIR, jettyConfig.build());
cloudSolrClient = cluster.getSolrClient();