需要帮助来构建 Cloud Datastore 数据对象
Need help to build Cloud Datastore data object
我正在尝试构建一个数据流管道,它会在 JSON 文件上传到 Google 云存储并将其写入云数据存储时触发。
根据 Dataflow template json file must have each line in Datastore data object format, defined here.
这就是我的 json 文件的样子,我正在尝试使其适应 Datastore 数据对象:
{
"userId": "u-skjbdw34jh3gx",
"rowRanks:": [
{
"originalTrigger": "recent",
"programmedRowPos": "VR1",
"reoderedRowPos": 0
},
{
"originalTrigger": "discovery",
"programmedRowPos": "VR1",
"reoderedRowPos": 1
}
]
}
以下是我尝试使它适应上述链接数据对象所达到的程度。
{
"key": {
"partitionId": {
"projectId": "gcp-project-id",
"namespaceId": "spring-demo"
},
"path":
{
"kind": "demo",
"name": "userId"
}
},
"properties": {
"userId": {
"stringValue": "01348c2f-9a20-4ad2-b95d-b3e29f6fc2d1"
}
}
}
以下是我在尝试写入 Datastore 时在 Dataflow 中遇到的错误:
com.google.protobuf.InvalidProtocolBufferException: java.io.EOFException: End of input at line 1 column 2 path $.
at com.google.protobuf.util.JsonFormat$ParserImpl.merge(JsonFormat.java:1195)
at com.google.protobuf.util.JsonFormat$Parser.merge(JsonFormat.java:370)
at com.google.cloud.teleport.templates.common.DatastoreConverters$EntityJsonParser.merge(DatastoreConverters.java:497)
at com.google.cloud.teleport.templates.common.DatastoreConverters$JsonToEntity.processElement(DatastoreConverters.java:351)
json 文件应该在一行中包含 google 云数据存储对象。因此,所引用的错误是:End of input at line 1 column 2 path $.
应该是这样的:
{"key":{"partitionId":{"projectId":"gcp-project-id","namespaceId":"spring-demo"},"path":[{"kind":"demo","name":"userId"}]},"properties":{"userId":{"stringValue":"01348c2f-9a20-4ad2-b95d-b3e29f6fc2d1"},"rowRanks":{"arrayValue":{"values":[{"entityValue":{"properties":{"originalTrigger":{"stringValue":"recent"},"programmedRowPos":{"stringValue":"VR1"},"reoderedRowPos":{"integerValue":1}}}}]}}}}
显然,json 文件将包含数千个对象,但每个对象都必须在一行中:
{"key":{"partitionId":{"projectId":"gcp-project-id","namespaceId":"spring-demo"},"path":[{"kind":"demo","name":"userId"}]},"properties":{"userId":{"stringValue":"01348c2f-9a20-4ad2-b95d-b3e29f6fc2d1"},"rowRanks":{"arrayValue":{"values":[{"entityValue":{"properties":{"originalTrigger":{"stringValue":"recent"},"programmedRowPos":{"stringValue":"VR1"},"reoderedRowPos":{"integerValue":1}}}}]}}}}
{"key":{"partitionId":{"projectId":"gcp-project-id","namespaceId":"spring-demo"},"path":[{"kind":"demo","name":"userId"}]},"properties":{"userId":{"stringValue":"01348c2f-9a20-4ad2-b95d-b3e29f6fc2d1"},"rowRanks":{"arrayValue":{"values":[{"entityValue":{"properties":{"originalTrigger":{"stringValue":"recent"},"programmedRowPos":{"stringValue":"VR1"},"reoderedRowPos":{"integerValue":1}}}}]}}}}
{"key":{"partitionId":{"projectId":"gcp-project-id","namespaceId":"spring-demo"},"path":[{"kind":"demo","name":"userId"}]},"properties":{"userId":{"stringValue":"01348c2f-9a20-4ad2-b95d-b3e29f6fc2d1"},"rowRanks":{"arrayValue":{"values":[{"entityValue":{"properties":{"originalTrigger":{"stringValue":"recent"},"programmedRowPos":{"stringValue":"VR1"},"reoderedRowPos":{"integerValue":1}}}}]}}}}
{"key":{"partitionId":{"projectId":"gcp-project-id","namespaceId":"spring-demo"},"path":[{"kind":"demo","name":"userId"}]},"properties":{"userId":{"stringValue":"01348c2f-9a20-4ad2-b95d-b3e29f6fc2d1"},"rowRanks":{"arrayValue":{"values":[{"entityValue":{"properties":{"originalTrigger":{"stringValue":"recent"},"programmedRowPos":{"stringValue":"VR1"},"reoderedRowPos":{"integerValue":1}}}}]}}}}
{"key":{"partitionId":{"projectId":"gcp-project-id","namespaceId":"spring-demo"},"path":[{"kind":"demo","name":"userId"}]},"properties":{"userId":{"stringValue":"01348c2f-9a20-4ad2-b95d-b3e29f6fc2d1"},"rowRanks":{"arrayValue":{"values":[{"entityValue":{"properties":{"originalTrigger":{"stringValue":"recent"},"programmedRowPos":{"stringValue":"VR1"},"reoderedRowPos":{"integerValue":1}}}}]}}}}
{"key":{"partitionId":{"projectId":"gcp-project-id","namespaceId":"spring-demo"},"path":[{"kind":"demo","name":"userId"}]},"properties":{"userId":{"stringValue":"01348c2f-9a20-4ad2-b95d-b3e29f6fc2d1"},"rowRanks":{"arrayValue":{"values":[{"entityValue":{"properties":{"originalTrigger":{"stringValue":"recent"},"programmedRowPos":{"stringValue":"VR1"},"reoderedRowPos":{"integerValue":1}}}}]}}}}
如果我正确理解了您的输入数据格式和所需的输出,那么这段 js 代码应该可以解决问题:
var data = {
"userId": "u-skjbdw34jh3gx",
"rowRanks": [
{
"originalTrigger": "recent",
"programmedRowPos": "VR1",
"reorderedRowPos": 0
},
{
"originalTrigger": "discovery",
"programmedRowPos": "VR1",
"reorderedRowPos": 1
}
]
}
var entity = {};
entity.key = {};
entity.key.partitionId = {};
entity.key.partitionId.projectId = "gcp-project-id";
entity.key.partitionId.namespaceId = "spring-demo";
var path = {}
path.kind = "demo";
path.name = "userId";
entity.key.path = [];
entity.key.path.push(path);
entity.properties = {};
entity.properties.userId = {};
entity.properties.userId.stringValue = data.userId;
entity.properties.rowRanks = {};
entity.properties.rowRanks.arrayValue = {};
var arrayValues = [];
data.rowRanks.forEach(buildArrayValue);
function buildArrayValue(row) {
var temp = {};
temp.entityValue = {};
temp.entityValue.properties = {};
temp.entityValue.properties.originalTrigger = {};
temp.entityValue.properties.originalTrigger.stringValue = row.originalTrigger;
temp.entityValue.properties.programmedRowPos = {};
temp.entityValue.properties.programmedRowPos.stringValue = row.programmedRowPos;
temp.entityValue.properties.reorderedRowPos = {};
temp.entityValue.properties.reorderedRowPos.integerValue = row.reorderedRowPos;
arrayValues.push(temp);
}
entity.properties.rowRanks.arrayValue.values = arrayValues;
document.write(JSON.stringify(entity));
由于 forEach()
循环,基本上构建了 rowRanks
数组。请注意,尽管 path
需要是一个数组 (reference).
现在我们稍微修改为运行在模板代码中而不是在浏览器中,将文件上传到GCS并按照说明here执行它:
gcloud dataflow jobs run test-datastore \
--gcs-location=gs://dataflow-templates/latest/GCS_Text_to_Datastore \
--parameters=javascriptTextTransformGcsPath=gs://$BUCKET/*.js,errorWritePath=gs://$BUCKET/errors.txt,javascriptTextTransformFunctionName=transform,textReadPattern=gs://$BUCKET/*.json,datastoreWriteProjectId=$PROJECT
上传到GCS的js文件完整内容为:
function transform(elem) {
var data = JSON.parse(elem);
var entity = {};
entity.key = {};
entity.key.partitionId = {};
entity.key.partitionId.projectId = "gcp-project-id";
entity.key.partitionId.namespaceId = "spring-demo";
var path = {}
path.kind = "demo";
path.name = "userId";
entity.key.path = [];
entity.key.path.push(path);
entity.properties = {};
entity.properties.userId = {};
entity.properties.userId.stringValue = data.userId;
entity.properties.rowRanks = {};
entity.properties.rowRanks.arrayValue = {};
var arrayValues = [];
data.rowRanks.forEach(buildArrayValue);
function buildArrayValue(row) {
var temp = {};
temp.entityValue = {};
temp.entityValue.properties = {};
temp.entityValue.properties.originalTrigger = {};
temp.entityValue.properties.originalTrigger.stringValue = row.originalTrigger;
temp.entityValue.properties.programmedRowPos = {};
temp.entityValue.properties.programmedRowPos.stringValue = row.programmedRowPos;
temp.entityValue.properties.reorderedRowPos = {};
temp.entityValue.properties.reorderedRowPos.integerValue = row.reorderedRowPos;
arrayValues.push(temp);
}
entity.properties.rowRanks.arrayValue.values = arrayValues;
return JSON.stringify(entity);
}
我的工作 运行 成功:
并将数据写入数据存储区:
如果对你有帮助,请告诉我。
我正在尝试构建一个数据流管道,它会在 JSON 文件上传到 Google 云存储并将其写入云数据存储时触发。
根据 Dataflow template json file must have each line in Datastore data object format, defined here.
这就是我的 json 文件的样子,我正在尝试使其适应 Datastore 数据对象:
{
"userId": "u-skjbdw34jh3gx",
"rowRanks:": [
{
"originalTrigger": "recent",
"programmedRowPos": "VR1",
"reoderedRowPos": 0
},
{
"originalTrigger": "discovery",
"programmedRowPos": "VR1",
"reoderedRowPos": 1
}
]
}
以下是我尝试使它适应上述链接数据对象所达到的程度。
{
"key": {
"partitionId": {
"projectId": "gcp-project-id",
"namespaceId": "spring-demo"
},
"path":
{
"kind": "demo",
"name": "userId"
}
},
"properties": {
"userId": {
"stringValue": "01348c2f-9a20-4ad2-b95d-b3e29f6fc2d1"
}
}
}
以下是我在尝试写入 Datastore 时在 Dataflow 中遇到的错误:
com.google.protobuf.InvalidProtocolBufferException: java.io.EOFException: End of input at line 1 column 2 path $.
at com.google.protobuf.util.JsonFormat$ParserImpl.merge(JsonFormat.java:1195)
at com.google.protobuf.util.JsonFormat$Parser.merge(JsonFormat.java:370)
at com.google.cloud.teleport.templates.common.DatastoreConverters$EntityJsonParser.merge(DatastoreConverters.java:497)
at com.google.cloud.teleport.templates.common.DatastoreConverters$JsonToEntity.processElement(DatastoreConverters.java:351)
json 文件应该在一行中包含 google 云数据存储对象。因此,所引用的错误是:End of input at line 1 column 2 path $.
应该是这样的:
{"key":{"partitionId":{"projectId":"gcp-project-id","namespaceId":"spring-demo"},"path":[{"kind":"demo","name":"userId"}]},"properties":{"userId":{"stringValue":"01348c2f-9a20-4ad2-b95d-b3e29f6fc2d1"},"rowRanks":{"arrayValue":{"values":[{"entityValue":{"properties":{"originalTrigger":{"stringValue":"recent"},"programmedRowPos":{"stringValue":"VR1"},"reoderedRowPos":{"integerValue":1}}}}]}}}}
显然,json 文件将包含数千个对象,但每个对象都必须在一行中:
{"key":{"partitionId":{"projectId":"gcp-project-id","namespaceId":"spring-demo"},"path":[{"kind":"demo","name":"userId"}]},"properties":{"userId":{"stringValue":"01348c2f-9a20-4ad2-b95d-b3e29f6fc2d1"},"rowRanks":{"arrayValue":{"values":[{"entityValue":{"properties":{"originalTrigger":{"stringValue":"recent"},"programmedRowPos":{"stringValue":"VR1"},"reoderedRowPos":{"integerValue":1}}}}]}}}}
{"key":{"partitionId":{"projectId":"gcp-project-id","namespaceId":"spring-demo"},"path":[{"kind":"demo","name":"userId"}]},"properties":{"userId":{"stringValue":"01348c2f-9a20-4ad2-b95d-b3e29f6fc2d1"},"rowRanks":{"arrayValue":{"values":[{"entityValue":{"properties":{"originalTrigger":{"stringValue":"recent"},"programmedRowPos":{"stringValue":"VR1"},"reoderedRowPos":{"integerValue":1}}}}]}}}}
{"key":{"partitionId":{"projectId":"gcp-project-id","namespaceId":"spring-demo"},"path":[{"kind":"demo","name":"userId"}]},"properties":{"userId":{"stringValue":"01348c2f-9a20-4ad2-b95d-b3e29f6fc2d1"},"rowRanks":{"arrayValue":{"values":[{"entityValue":{"properties":{"originalTrigger":{"stringValue":"recent"},"programmedRowPos":{"stringValue":"VR1"},"reoderedRowPos":{"integerValue":1}}}}]}}}}
{"key":{"partitionId":{"projectId":"gcp-project-id","namespaceId":"spring-demo"},"path":[{"kind":"demo","name":"userId"}]},"properties":{"userId":{"stringValue":"01348c2f-9a20-4ad2-b95d-b3e29f6fc2d1"},"rowRanks":{"arrayValue":{"values":[{"entityValue":{"properties":{"originalTrigger":{"stringValue":"recent"},"programmedRowPos":{"stringValue":"VR1"},"reoderedRowPos":{"integerValue":1}}}}]}}}}
{"key":{"partitionId":{"projectId":"gcp-project-id","namespaceId":"spring-demo"},"path":[{"kind":"demo","name":"userId"}]},"properties":{"userId":{"stringValue":"01348c2f-9a20-4ad2-b95d-b3e29f6fc2d1"},"rowRanks":{"arrayValue":{"values":[{"entityValue":{"properties":{"originalTrigger":{"stringValue":"recent"},"programmedRowPos":{"stringValue":"VR1"},"reoderedRowPos":{"integerValue":1}}}}]}}}}
{"key":{"partitionId":{"projectId":"gcp-project-id","namespaceId":"spring-demo"},"path":[{"kind":"demo","name":"userId"}]},"properties":{"userId":{"stringValue":"01348c2f-9a20-4ad2-b95d-b3e29f6fc2d1"},"rowRanks":{"arrayValue":{"values":[{"entityValue":{"properties":{"originalTrigger":{"stringValue":"recent"},"programmedRowPos":{"stringValue":"VR1"},"reoderedRowPos":{"integerValue":1}}}}]}}}}
如果我正确理解了您的输入数据格式和所需的输出,那么这段 js 代码应该可以解决问题:
var data = {
"userId": "u-skjbdw34jh3gx",
"rowRanks": [
{
"originalTrigger": "recent",
"programmedRowPos": "VR1",
"reorderedRowPos": 0
},
{
"originalTrigger": "discovery",
"programmedRowPos": "VR1",
"reorderedRowPos": 1
}
]
}
var entity = {};
entity.key = {};
entity.key.partitionId = {};
entity.key.partitionId.projectId = "gcp-project-id";
entity.key.partitionId.namespaceId = "spring-demo";
var path = {}
path.kind = "demo";
path.name = "userId";
entity.key.path = [];
entity.key.path.push(path);
entity.properties = {};
entity.properties.userId = {};
entity.properties.userId.stringValue = data.userId;
entity.properties.rowRanks = {};
entity.properties.rowRanks.arrayValue = {};
var arrayValues = [];
data.rowRanks.forEach(buildArrayValue);
function buildArrayValue(row) {
var temp = {};
temp.entityValue = {};
temp.entityValue.properties = {};
temp.entityValue.properties.originalTrigger = {};
temp.entityValue.properties.originalTrigger.stringValue = row.originalTrigger;
temp.entityValue.properties.programmedRowPos = {};
temp.entityValue.properties.programmedRowPos.stringValue = row.programmedRowPos;
temp.entityValue.properties.reorderedRowPos = {};
temp.entityValue.properties.reorderedRowPos.integerValue = row.reorderedRowPos;
arrayValues.push(temp);
}
entity.properties.rowRanks.arrayValue.values = arrayValues;
document.write(JSON.stringify(entity));
由于 forEach()
循环,基本上构建了 rowRanks
数组。请注意,尽管 path
需要是一个数组 (reference).
现在我们稍微修改为运行在模板代码中而不是在浏览器中,将文件上传到GCS并按照说明here执行它:
gcloud dataflow jobs run test-datastore \
--gcs-location=gs://dataflow-templates/latest/GCS_Text_to_Datastore \
--parameters=javascriptTextTransformGcsPath=gs://$BUCKET/*.js,errorWritePath=gs://$BUCKET/errors.txt,javascriptTextTransformFunctionName=transform,textReadPattern=gs://$BUCKET/*.json,datastoreWriteProjectId=$PROJECT
上传到GCS的js文件完整内容为:
function transform(elem) {
var data = JSON.parse(elem);
var entity = {};
entity.key = {};
entity.key.partitionId = {};
entity.key.partitionId.projectId = "gcp-project-id";
entity.key.partitionId.namespaceId = "spring-demo";
var path = {}
path.kind = "demo";
path.name = "userId";
entity.key.path = [];
entity.key.path.push(path);
entity.properties = {};
entity.properties.userId = {};
entity.properties.userId.stringValue = data.userId;
entity.properties.rowRanks = {};
entity.properties.rowRanks.arrayValue = {};
var arrayValues = [];
data.rowRanks.forEach(buildArrayValue);
function buildArrayValue(row) {
var temp = {};
temp.entityValue = {};
temp.entityValue.properties = {};
temp.entityValue.properties.originalTrigger = {};
temp.entityValue.properties.originalTrigger.stringValue = row.originalTrigger;
temp.entityValue.properties.programmedRowPos = {};
temp.entityValue.properties.programmedRowPos.stringValue = row.programmedRowPos;
temp.entityValue.properties.reorderedRowPos = {};
temp.entityValue.properties.reorderedRowPos.integerValue = row.reorderedRowPos;
arrayValues.push(temp);
}
entity.properties.rowRanks.arrayValue.values = arrayValues;
return JSON.stringify(entity);
}
我的工作 运行 成功:
并将数据写入数据存储区:
如果对你有帮助,请告诉我。