JAVA 使用 Gson 解析 JSON(流模式)时出错:预期 BEGIN_ARRAY 但在第 1 行第 2 列路径 $=BEGIN_OBJECT
JAVA Error parsing JSON (stream mode) with Gson: Expected BEGIN_ARRAY but was BEGIN_OBJECT at line 1 column 2 path $
更新:我已经根据第一个回复中的建议修改了我的代码,但仍然产生错误。
我编写了以下代码来解析一个非常大的 json 文件:
public static void main(String[] args) throws Exception {
String jsonFile="/home/zz/Work/data/wdc/WDC_ProdMatch/idclusters.json";
WDCProdMatchDatasetIndexer_2 indexer = new WDCProdMatchDatasetIndexer_2();
indexer.readClusterMetadata(jsonFile);
}
public void readClusterMetadata(String jsonFile){
try(JsonReader jsonReader = new JsonReader(
new InputStreamReader(
new FileInputStream(jsonFile), StandardCharsets.UTF_8))) {
Gson gson = new GsonBuilder().create();
jsonReader.beginObject(); //start of json array
int numberOfRecords = 0;
while (jsonReader.hasNext()){ //next json array element
Cluster c = gson.fromJson(jsonReader, Cluster.class);
long[] sizeInfo=new long[]{c.clusterSizeInOffers, c.size};
//clusterSize.put(String.valueOf(c.id), sizeInfo);
numberOfRecords++;
if (numberOfRecords%1000==0)
System.out.println(String.format("processed %d clusters", numberOfRecords));
}
jsonReader.endArray();
System.out.println("Total Records Found : "+numberOfRecords);
}
catch (Exception e) {
e.printStackTrace();
}
}
class ArrayAsStringJsonDeserializer implements JsonDeserializer<List<String>> {
@Override
public List<String> deserialize(JsonElement json, Type typeOfT, JsonDeserializationContext context) throws JsonParseException {
String value = json.getAsString().trim();
value = value.substring(1, value.length() - 1);
return Arrays.stream(value.split(",")).map(String::trim).collect(Collectors.toList());
}
}
class Cluster {
protected long id;
protected long size;
@SerializedName("cluster_size_in_offers")
protected long clusterSizeInOffers;
@JsonAdapter(ArrayAsStringJsonDeserializer.class)
@SerializedName("id_values")
protected List<String> idValues;
@SerializedName("categoryDensity")
protected double catDensity;
@SerializedName("category")
protected String cat;
}
数据文件如下所示(前 10 行)
{"size":4,"cluster_size_in_offers":1,"id_values":"[814914023129, w2190254, pfl60gs25ssdr, pfl60gs25ssdr]","id":2,"categoryDensity":1,"category":"Computers_and_Accessories"}
{"size":2,"cluster_size_in_offers":1,"id_values":"[hst322440ss, g1042641]","id":3,"categoryDensity":1,"category":"Office_Products"}
{"size":4,"cluster_size_in_offers":1,"id_values":"[4051329063869, t24datr01765, t24datr01763, datr01763]","id":4,"categoryDensity":1,"category":"Automotive"}
{"size":2,"cluster_size_in_offers":1,"id_values":"[5057195062301, sppct335a2bl]","id":7,"categoryDensity":1,"category":"Office_Products"}
{"size":3,"cluster_size_in_offers":1,"id_values":"[ 845173001269, mpnlkbusmokeam89us, lkbusmokeam89]","id":8,"categoryDensity":1,"category":"Computers_and_Accessories"}
{"size":2,"cluster_size_in_offers":1,"id_values":"[ksw26r0100, g1104817]","id":9,"categoryDensity":1,"category":"Other_Electronics"}
{"size":2,"cluster_size_in_offers":1,"id_values":"[5054328719897, ltr12x31r685c15]","id":11,"categoryDensity":1,"category":"Office_Products"}
{"size":2,"cluster_size_in_offers":1,"id_values":"[model82226, sirsir822261]","id":15,"categoryDensity":1,"category":"Tools_and_Home_Improvement"}
{"size":2,"cluster_size_in_offers":1,"id_values":"[5054328970724, sscl3816114a2bl]","id":17,"categoryDensity":1,"category":"Office_Products"}
{"size":2,"cluster_size_in_offers":1,"id_values":"[814882011647, 203932664]","id":20,"categoryDensity":1,"category":"Tools_and_Home_Improvement"}
但是当代码在这个数据上运行时,会产生如下错误:
com.google.gson.JsonSyntaxException: java.lang.IllegalStateException: Expected BEGIN_OBJECT but was NAME at line 1 column 3 path $.
at com.google.gson.internal.bind.ReflectiveTypeAdapterFactory$Adapter.read(ReflectiveTypeAdapterFactory.java:226)
at com.google.gson.Gson.fromJson(Gson.java:927)
at uk.ac.shef.inf.wop.indexing.WDCProdMatchDatasetIndexer_2.readClusterMetadata(WDCProdMatchDatasetIndexer_2.java:38)
at uk.ac.shef.inf.wop.indexing.WDCProdMatchDatasetIndexer_2.main(WDCProdMatchDatasetIndexer_2.java:25)
Caused by: java.lang.IllegalStateException: Expected BEGIN_OBJECT but was NAME at line 1 column 3 path $.
at com.google.gson.stream.JsonReader.beginObject(JsonReader.java:385)
at com.google.gson.internal.bind.ReflectiveTypeAdapterFactory$Adapter.read(ReflectiveTypeAdapterFactory.java:215)
... 3 more
有什么建议吗?
在一个文件中,每一行都是一个单独的 JSON Object
。它的一个问题是 JSON Array
包含在引号中,这使得它成为 String
原语。您需要为其提供自定义反序列化器,从引号中解包数组并用逗号 (,
) 手动拆分项目。示例解决方案如下所示:
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import com.google.gson.JsonDeserializationContext;
import com.google.gson.JsonDeserializer;
import com.google.gson.JsonElement;
import com.google.gson.JsonParseException;
import com.google.gson.annotations.JsonAdapter;
import com.google.gson.annotations.SerializedName;
import lombok.Data;
import lombok.ToString;
import java.io.File;
import java.io.IOException;
import java.lang.reflect.Type;
import java.nio.file.Files;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
public class GsonApp {
public static void main(String[] args) throws Exception {
File jsonFile = new File("./resource/test.json").getAbsoluteFile();
List<Cluster> clusters = readClusters(jsonFile);
clusters.forEach(System.out::println);
}
private static List<Cluster> readClusters(File jsonFile) throws IOException {
Gson gson = new GsonBuilder().create();
try (Stream<String> lines = Files.lines(jsonFile.toPath())) {
return lines
.map(line -> gson.fromJson(line, Cluster.class))
.collect(Collectors.toList());
}
}
}
class ArrayAsStringJsonDeserializer implements JsonDeserializer<List<String>> {
@Override
public List<String> deserialize(JsonElement json, Type typeOfT, JsonDeserializationContext context) throws JsonParseException {
String value = json.getAsString().trim();
value = value.substring(1, value.length() - 1);
return Arrays.stream(value.split(",")).map(String::trim).collect(Collectors.toList());
}
}
@Data
@ToString
class Cluster {
protected long id;
protected long size;
@SerializedName("cluster_size_in_offers")
protected long clusterSizeInOffers;
@JsonAdapter(ArrayAsStringJsonDeserializer.class)
@SerializedName("id_values")
protected List<String> idValues;
@SerializedName("categoryDensity")
protected int catDensity;
@SerializedName("category")
protected String cat;
}
以上代码打印:
Cluster(id=2, size=4, clusterSizeInOffers=1, idValues=[814914023129, w2190254, pfl60gs25ssdr, pfl60gs25ssdr], catDensity=1, cat=Computers_and_Accessories)
Cluster(id=3, size=2, clusterSizeInOffers=1, idValues=[hst322440ss, g1042641], catDensity=1, cat=Office_Products)
Cluster(id=4, size=4, clusterSizeInOffers=1, idValues=[4051329063869, t24datr01765, t24datr01763, datr01763], catDensity=1, cat=Automotive)
...
更新:我已经根据第一个回复中的建议修改了我的代码,但仍然产生错误。
我编写了以下代码来解析一个非常大的 json 文件:
public static void main(String[] args) throws Exception {
String jsonFile="/home/zz/Work/data/wdc/WDC_ProdMatch/idclusters.json";
WDCProdMatchDatasetIndexer_2 indexer = new WDCProdMatchDatasetIndexer_2();
indexer.readClusterMetadata(jsonFile);
}
public void readClusterMetadata(String jsonFile){
try(JsonReader jsonReader = new JsonReader(
new InputStreamReader(
new FileInputStream(jsonFile), StandardCharsets.UTF_8))) {
Gson gson = new GsonBuilder().create();
jsonReader.beginObject(); //start of json array
int numberOfRecords = 0;
while (jsonReader.hasNext()){ //next json array element
Cluster c = gson.fromJson(jsonReader, Cluster.class);
long[] sizeInfo=new long[]{c.clusterSizeInOffers, c.size};
//clusterSize.put(String.valueOf(c.id), sizeInfo);
numberOfRecords++;
if (numberOfRecords%1000==0)
System.out.println(String.format("processed %d clusters", numberOfRecords));
}
jsonReader.endArray();
System.out.println("Total Records Found : "+numberOfRecords);
}
catch (Exception e) {
e.printStackTrace();
}
}
class ArrayAsStringJsonDeserializer implements JsonDeserializer<List<String>> {
@Override
public List<String> deserialize(JsonElement json, Type typeOfT, JsonDeserializationContext context) throws JsonParseException {
String value = json.getAsString().trim();
value = value.substring(1, value.length() - 1);
return Arrays.stream(value.split(",")).map(String::trim).collect(Collectors.toList());
}
}
class Cluster {
protected long id;
protected long size;
@SerializedName("cluster_size_in_offers")
protected long clusterSizeInOffers;
@JsonAdapter(ArrayAsStringJsonDeserializer.class)
@SerializedName("id_values")
protected List<String> idValues;
@SerializedName("categoryDensity")
protected double catDensity;
@SerializedName("category")
protected String cat;
}
数据文件如下所示(前 10 行)
{"size":4,"cluster_size_in_offers":1,"id_values":"[814914023129, w2190254, pfl60gs25ssdr, pfl60gs25ssdr]","id":2,"categoryDensity":1,"category":"Computers_and_Accessories"}
{"size":2,"cluster_size_in_offers":1,"id_values":"[hst322440ss, g1042641]","id":3,"categoryDensity":1,"category":"Office_Products"}
{"size":4,"cluster_size_in_offers":1,"id_values":"[4051329063869, t24datr01765, t24datr01763, datr01763]","id":4,"categoryDensity":1,"category":"Automotive"}
{"size":2,"cluster_size_in_offers":1,"id_values":"[5057195062301, sppct335a2bl]","id":7,"categoryDensity":1,"category":"Office_Products"}
{"size":3,"cluster_size_in_offers":1,"id_values":"[ 845173001269, mpnlkbusmokeam89us, lkbusmokeam89]","id":8,"categoryDensity":1,"category":"Computers_and_Accessories"}
{"size":2,"cluster_size_in_offers":1,"id_values":"[ksw26r0100, g1104817]","id":9,"categoryDensity":1,"category":"Other_Electronics"}
{"size":2,"cluster_size_in_offers":1,"id_values":"[5054328719897, ltr12x31r685c15]","id":11,"categoryDensity":1,"category":"Office_Products"}
{"size":2,"cluster_size_in_offers":1,"id_values":"[model82226, sirsir822261]","id":15,"categoryDensity":1,"category":"Tools_and_Home_Improvement"}
{"size":2,"cluster_size_in_offers":1,"id_values":"[5054328970724, sscl3816114a2bl]","id":17,"categoryDensity":1,"category":"Office_Products"}
{"size":2,"cluster_size_in_offers":1,"id_values":"[814882011647, 203932664]","id":20,"categoryDensity":1,"category":"Tools_and_Home_Improvement"}
但是当代码在这个数据上运行时,会产生如下错误:
com.google.gson.JsonSyntaxException: java.lang.IllegalStateException: Expected BEGIN_OBJECT but was NAME at line 1 column 3 path $.
at com.google.gson.internal.bind.ReflectiveTypeAdapterFactory$Adapter.read(ReflectiveTypeAdapterFactory.java:226)
at com.google.gson.Gson.fromJson(Gson.java:927)
at uk.ac.shef.inf.wop.indexing.WDCProdMatchDatasetIndexer_2.readClusterMetadata(WDCProdMatchDatasetIndexer_2.java:38)
at uk.ac.shef.inf.wop.indexing.WDCProdMatchDatasetIndexer_2.main(WDCProdMatchDatasetIndexer_2.java:25)
Caused by: java.lang.IllegalStateException: Expected BEGIN_OBJECT but was NAME at line 1 column 3 path $.
at com.google.gson.stream.JsonReader.beginObject(JsonReader.java:385)
at com.google.gson.internal.bind.ReflectiveTypeAdapterFactory$Adapter.read(ReflectiveTypeAdapterFactory.java:215)
... 3 more
有什么建议吗?
在一个文件中,每一行都是一个单独的 JSON Object
。它的一个问题是 JSON Array
包含在引号中,这使得它成为 String
原语。您需要为其提供自定义反序列化器,从引号中解包数组并用逗号 (,
) 手动拆分项目。示例解决方案如下所示:
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import com.google.gson.JsonDeserializationContext;
import com.google.gson.JsonDeserializer;
import com.google.gson.JsonElement;
import com.google.gson.JsonParseException;
import com.google.gson.annotations.JsonAdapter;
import com.google.gson.annotations.SerializedName;
import lombok.Data;
import lombok.ToString;
import java.io.File;
import java.io.IOException;
import java.lang.reflect.Type;
import java.nio.file.Files;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
public class GsonApp {
public static void main(String[] args) throws Exception {
File jsonFile = new File("./resource/test.json").getAbsoluteFile();
List<Cluster> clusters = readClusters(jsonFile);
clusters.forEach(System.out::println);
}
private static List<Cluster> readClusters(File jsonFile) throws IOException {
Gson gson = new GsonBuilder().create();
try (Stream<String> lines = Files.lines(jsonFile.toPath())) {
return lines
.map(line -> gson.fromJson(line, Cluster.class))
.collect(Collectors.toList());
}
}
}
class ArrayAsStringJsonDeserializer implements JsonDeserializer<List<String>> {
@Override
public List<String> deserialize(JsonElement json, Type typeOfT, JsonDeserializationContext context) throws JsonParseException {
String value = json.getAsString().trim();
value = value.substring(1, value.length() - 1);
return Arrays.stream(value.split(",")).map(String::trim).collect(Collectors.toList());
}
}
@Data
@ToString
class Cluster {
protected long id;
protected long size;
@SerializedName("cluster_size_in_offers")
protected long clusterSizeInOffers;
@JsonAdapter(ArrayAsStringJsonDeserializer.class)
@SerializedName("id_values")
protected List<String> idValues;
@SerializedName("categoryDensity")
protected int catDensity;
@SerializedName("category")
protected String cat;
}
以上代码打印:
Cluster(id=2, size=4, clusterSizeInOffers=1, idValues=[814914023129, w2190254, pfl60gs25ssdr, pfl60gs25ssdr], catDensity=1, cat=Computers_and_Accessories)
Cluster(id=3, size=2, clusterSizeInOffers=1, idValues=[hst322440ss, g1042641], catDensity=1, cat=Office_Products)
Cluster(id=4, size=4, clusterSizeInOffers=1, idValues=[4051329063869, t24datr01765, t24datr01763, datr01763], catDensity=1, cat=Automotive)
...