解决 Overembedding MongoDB with Slow Down performance
Solving Overembedding MongoDB with Slow Down performance
目前我在一个存在严重过嵌入问题的项目中工作,因为只有一个集合位于其中 3 objects(数组),其中几乎包含了 70% 的业务模型该应用程序(我们从其他开发团队收到了这个项目,是一个完整的挑战)。另一个问题是,该应用程序使用实时跟踪地理定位,并持续使用此集合。
我的提示是,我完全确定数据库服务器中的过载问题和应用程序在几个小时内变慢的原因是过度嵌入。
我们认为解决方案是创建一个新的数据库模式(理解 MongoDB 是无模式的,但不是限制),因为它试图在具有低引用的树集合中规范化这三个 objects(像关系模型一样模拟外键),但是,例如,您建议使用旧的(当前的)数据库设计和制作一个数据仓库,只读查询并仅迁移用户数据或将所有数据库迁移到新模型(这可能非常非常复杂......或者不是?)...
附加信息:
公交车统计
{
"ns" : "pruebas.buses",
"count" : 1343,
"size" : 38393616,
"avgObjSize" : 28587,
"numExtents" : 7,
"storageSize" : 58277888,
"lastExtentSize" : 20643840.0,
"paddingFactor" : 1.0,
"paddingFactorNote" : "paddingFactor is unused and unmaintained in 3.0. It remains hard coded to 1.0 for compatibility only.",
"userFlags" : 1,
"capped" : false,
"nindexes" : 1,
"totalIndexSize" : 65408,
"indexSizes" : {
"_id_" : 65408
},
"ok" : 1.0
}
这是来自这个名为 Buses 的集合的文档示例:
{
"_id" : "BAOB-02",
"school" : "BAOBAB",
"licensePlate" : "UFS 118",
"color" : "BLANCO",
"model" : 2002,
"username" : "baobab02",
"students" : [
{
"firstNames" : "MATTHIAS ",
"lastNames" : "GARCIA VELANDIA",
"_id" : "1002",
"classroom" : "",
"blood" : "",
"telephone" : null,
"cellphone" : null,
"guardians" : [
{
"firstNames" : "GUSTAVO ",
"lastNames" : "GARCIA GARAVITO",
"_id" : ObjectId("553515248a854eba40c1d2fc")
},
{
"firstNames" : "CLAUDIA ",
"lastNames" : "VELANDIA ",
"_id" : ObjectId("553515248a854eba40c1d2fb")
}
],
"parents" : [
{
"firstNames" : "GUSTAVO ",
"lastNames" : "GARCIA GARAVITO",
"telephone" : null,
"cellphone" : 3103247894.0,
"email" : "gggzipa@gmail.com",
"_id" : ObjectId("553515248a854eba40c1d2fe")
},
{
"firstNames" : "CLAUDIA ",
"lastNames" : "VELANDIA ",
"telephone" : null,
"cellphone" : 3102487056.0,
"email" : "ar.claudiavelandia@gmail.com",
"_id" : ObjectId("553515248a854eba40c1d2fd")
}
],
"addressInfo" : {
"pm" : {
"address" : "KM 2 TABIO - CAJICA",
"apartment" : "",
"neighborhood" : "VIA TABIO",
"monday" : true,
"tuesday" : true,
"wednesday" : true,
"thursday" : true,
"friday" : true,
"saturday" : false,
"coords" : [
4.9242399390697,
-74.0441983938217
],
"stopOrder" : 1
},
"am" : {
"address" : "NA",
"apartment" : "",
"neighborhood" : "",
"monday" : false,
"tuesday" : false,
"wednesday" : false,
"thursday" : false,
"friday" : false,
"saturday" : false,
"coords" : []
}
},
"code" : "1002"
},
{
"firstNames" : "JUAN PABLO",
"lastNames" : "ROMERO GUZMAN",
"_id" : "1003",
"classroom" : "",
"blood" : "",
"telephone" : null,
"cellphone" : null,
"guardians" : [
{
"firstNames" : "NELSON ANDRES",
"lastNames" : "ROMERO ",
"_id" : ObjectId("5535158b8a854eba40c1d300")
},
{
"firstNames" : "ANA MARIA",
"lastNames" : "GUZMAN MORENO",
"_id" : ObjectId("5535158b8a854eba40c1d2ff")
}
],
"parents" : [
{
"firstNames" : "NELSON ANDRES",
"lastNames" : "ROMERO ",
"telephone" : null,
"cellphone" : 3192997309.0,
"email" : "nelsonandresromerojimenez@hotmail.com",
"_id" : ObjectId("5535158b8a854eba40c1d302")
},
{
"firstNames" : "ANA MARIA",
"lastNames" : "GUZMAN MORENO",
"telephone" : null,
"cellphone" : 3143095644.0,
"email" : "ananita28@hotmail.com",
"_id" : ObjectId("5535158b8a854eba40c1d301")
}
],
"addressInfo" : {
"pm" : {
"address" : "CRR 7 2 46",
"apartment" : "APT. 404 INT. 8",
"neighborhood" : "CAPELLANIA",
"monday" : true,
"tuesday" : true,
"wednesday" : true,
"thursday" : true,
"friday" : true,
"saturday" : false,
"coords" : [
4.91861203215498,
-74.0340435504913
],
"stopOrder" : 2
},
"am" : {
"address" : "NA",
"apartment" : "",
"neighborhood" : "",
"monday" : false,
"tuesday" : false,
"wednesday" : false,
"thursday" : false,
"friday" : false,
"saturday" : false,
"coords" : []
}
},
"code" : "1003"
}
],
"auxiliary" : {
"firstNames" : "LEIDY VIVIANA",
"lastNames" : "MORANTES BARON",
"telephone" : null,
"cellphone" : 3203178186.0,
"email" : "vivis_120490@hotmail.com"
},
"driver" : {
"firstNames" : "VICTOR JULIO",
"lastNames" : "MORANTES MORANTES",
"telephone" : null,
"cellphone" : 3118955381.0
},
"__v" : 13
}
此合集包含内部 Students +- 18,一般每个 Student 有 2 parents。目前存在 1300 份文件。实时地理定位跟踪的数据分配在另一个集合中,但该项目使用另一台服务器进行 REDIS 缓存(我知道缓存所有数据库不是一个好的做法,但我们计划将此缓存分段仅用于跟踪服务)
所有数据库的统计>
{
"db" : "pruebas",
"collections" : 20,
"objects" : 5785288,
"avgObjSize" : 285.557788652873,
"dataSize" : 1652034048.0,
"storageSize" : 2388484096.0,
"numExtents" : 112,
"indexes" : 18,
"indexSize" : 176544368.0,
"fileSize" : 4226809856.0,
"nsSizeMB" : 16,
"extentFreeList" : {
"num" : 0,
"totalSize" : 0
},
"dataFileVersion" : {
"major" : 4,
"minor" : 22
},
"ok" : 1.0
}
PD/ 一个月前,我们可能会为 MongoDB 应用优化技术,例如使用负载均衡器和 Mongos 进行分片或复制......但无论如何,我们明白如果数据库设计错误,最好的解决问题的形式是制作一个新的模型。
谢谢,如果有人花时间阅读所有这些奇怪的案例.. 如果提出建设性的评论和建议,请提前致谢
在上面的描述中,根本原因没有被提及,并且基于假设,brodriguezs 正在走向架构更改
schema修改前的一些小技巧
- 在 MongoDB 分析器中,您是否检查了缓慢的 运行 查询。
- 您是否尝试为文档编制索引(使用上述步骤中的输入)
- 您使用的 MongoDB 是哪个版本,哪个是存储引擎。
- 您是否完成了服务器的复制。如果是,请重新访问写关注部分https://docs.mongodb.com/manual/core/replica-set-write-concern/
- 你能检查一下 mongodb 内存中实现是否可以帮助 https://docs.mongodb.com/manual/core/inmemory/
您可以在此处查看一些重要提示 - https://docs.mongodb.com/manual/administration/analyzing-mongodb-performance/
目前我在一个存在严重过嵌入问题的项目中工作,因为只有一个集合位于其中 3 objects(数组),其中几乎包含了 70% 的业务模型该应用程序(我们从其他开发团队收到了这个项目,是一个完整的挑战)。另一个问题是,该应用程序使用实时跟踪地理定位,并持续使用此集合。
我的提示是,我完全确定数据库服务器中的过载问题和应用程序在几个小时内变慢的原因是过度嵌入。
我们认为解决方案是创建一个新的数据库模式(理解 MongoDB 是无模式的,但不是限制),因为它试图在具有低引用的树集合中规范化这三个 objects(像关系模型一样模拟外键),但是,例如,您建议使用旧的(当前的)数据库设计和制作一个数据仓库,只读查询并仅迁移用户数据或将所有数据库迁移到新模型(这可能非常非常复杂......或者不是?)...
附加信息: 公交车统计
{
"ns" : "pruebas.buses",
"count" : 1343,
"size" : 38393616,
"avgObjSize" : 28587,
"numExtents" : 7,
"storageSize" : 58277888,
"lastExtentSize" : 20643840.0,
"paddingFactor" : 1.0,
"paddingFactorNote" : "paddingFactor is unused and unmaintained in 3.0. It remains hard coded to 1.0 for compatibility only.",
"userFlags" : 1,
"capped" : false,
"nindexes" : 1,
"totalIndexSize" : 65408,
"indexSizes" : {
"_id_" : 65408
},
"ok" : 1.0
}
这是来自这个名为 Buses 的集合的文档示例:
{
"_id" : "BAOB-02",
"school" : "BAOBAB",
"licensePlate" : "UFS 118",
"color" : "BLANCO",
"model" : 2002,
"username" : "baobab02",
"students" : [
{
"firstNames" : "MATTHIAS ",
"lastNames" : "GARCIA VELANDIA",
"_id" : "1002",
"classroom" : "",
"blood" : "",
"telephone" : null,
"cellphone" : null,
"guardians" : [
{
"firstNames" : "GUSTAVO ",
"lastNames" : "GARCIA GARAVITO",
"_id" : ObjectId("553515248a854eba40c1d2fc")
},
{
"firstNames" : "CLAUDIA ",
"lastNames" : "VELANDIA ",
"_id" : ObjectId("553515248a854eba40c1d2fb")
}
],
"parents" : [
{
"firstNames" : "GUSTAVO ",
"lastNames" : "GARCIA GARAVITO",
"telephone" : null,
"cellphone" : 3103247894.0,
"email" : "gggzipa@gmail.com",
"_id" : ObjectId("553515248a854eba40c1d2fe")
},
{
"firstNames" : "CLAUDIA ",
"lastNames" : "VELANDIA ",
"telephone" : null,
"cellphone" : 3102487056.0,
"email" : "ar.claudiavelandia@gmail.com",
"_id" : ObjectId("553515248a854eba40c1d2fd")
}
],
"addressInfo" : {
"pm" : {
"address" : "KM 2 TABIO - CAJICA",
"apartment" : "",
"neighborhood" : "VIA TABIO",
"monday" : true,
"tuesday" : true,
"wednesday" : true,
"thursday" : true,
"friday" : true,
"saturday" : false,
"coords" : [
4.9242399390697,
-74.0441983938217
],
"stopOrder" : 1
},
"am" : {
"address" : "NA",
"apartment" : "",
"neighborhood" : "",
"monday" : false,
"tuesday" : false,
"wednesday" : false,
"thursday" : false,
"friday" : false,
"saturday" : false,
"coords" : []
}
},
"code" : "1002"
},
{
"firstNames" : "JUAN PABLO",
"lastNames" : "ROMERO GUZMAN",
"_id" : "1003",
"classroom" : "",
"blood" : "",
"telephone" : null,
"cellphone" : null,
"guardians" : [
{
"firstNames" : "NELSON ANDRES",
"lastNames" : "ROMERO ",
"_id" : ObjectId("5535158b8a854eba40c1d300")
},
{
"firstNames" : "ANA MARIA",
"lastNames" : "GUZMAN MORENO",
"_id" : ObjectId("5535158b8a854eba40c1d2ff")
}
],
"parents" : [
{
"firstNames" : "NELSON ANDRES",
"lastNames" : "ROMERO ",
"telephone" : null,
"cellphone" : 3192997309.0,
"email" : "nelsonandresromerojimenez@hotmail.com",
"_id" : ObjectId("5535158b8a854eba40c1d302")
},
{
"firstNames" : "ANA MARIA",
"lastNames" : "GUZMAN MORENO",
"telephone" : null,
"cellphone" : 3143095644.0,
"email" : "ananita28@hotmail.com",
"_id" : ObjectId("5535158b8a854eba40c1d301")
}
],
"addressInfo" : {
"pm" : {
"address" : "CRR 7 2 46",
"apartment" : "APT. 404 INT. 8",
"neighborhood" : "CAPELLANIA",
"monday" : true,
"tuesday" : true,
"wednesday" : true,
"thursday" : true,
"friday" : true,
"saturday" : false,
"coords" : [
4.91861203215498,
-74.0340435504913
],
"stopOrder" : 2
},
"am" : {
"address" : "NA",
"apartment" : "",
"neighborhood" : "",
"monday" : false,
"tuesday" : false,
"wednesday" : false,
"thursday" : false,
"friday" : false,
"saturday" : false,
"coords" : []
}
},
"code" : "1003"
}
],
"auxiliary" : {
"firstNames" : "LEIDY VIVIANA",
"lastNames" : "MORANTES BARON",
"telephone" : null,
"cellphone" : 3203178186.0,
"email" : "vivis_120490@hotmail.com"
},
"driver" : {
"firstNames" : "VICTOR JULIO",
"lastNames" : "MORANTES MORANTES",
"telephone" : null,
"cellphone" : 3118955381.0
},
"__v" : 13
}
此合集包含内部 Students +- 18,一般每个 Student 有 2 parents。目前存在 1300 份文件。实时地理定位跟踪的数据分配在另一个集合中,但该项目使用另一台服务器进行 REDIS 缓存(我知道缓存所有数据库不是一个好的做法,但我们计划将此缓存分段仅用于跟踪服务)
所有数据库的统计>
{
"db" : "pruebas",
"collections" : 20,
"objects" : 5785288,
"avgObjSize" : 285.557788652873,
"dataSize" : 1652034048.0,
"storageSize" : 2388484096.0,
"numExtents" : 112,
"indexes" : 18,
"indexSize" : 176544368.0,
"fileSize" : 4226809856.0,
"nsSizeMB" : 16,
"extentFreeList" : {
"num" : 0,
"totalSize" : 0
},
"dataFileVersion" : {
"major" : 4,
"minor" : 22
},
"ok" : 1.0
}
PD/ 一个月前,我们可能会为 MongoDB 应用优化技术,例如使用负载均衡器和 Mongos 进行分片或复制......但无论如何,我们明白如果数据库设计错误,最好的解决问题的形式是制作一个新的模型。 谢谢,如果有人花时间阅读所有这些奇怪的案例.. 如果提出建设性的评论和建议,请提前致谢
在上面的描述中,根本原因没有被提及,并且基于假设,brodriguezs 正在走向架构更改
schema修改前的一些小技巧
- 在 MongoDB 分析器中,您是否检查了缓慢的 运行 查询。
- 您是否尝试为文档编制索引(使用上述步骤中的输入)
- 您使用的 MongoDB 是哪个版本,哪个是存储引擎。
- 您是否完成了服务器的复制。如果是,请重新访问写关注部分https://docs.mongodb.com/manual/core/replica-set-write-concern/
- 你能检查一下 mongodb 内存中实现是否可以帮助 https://docs.mongodb.com/manual/core/inmemory/
您可以在此处查看一些重要提示 - https://docs.mongodb.com/manual/administration/analyzing-mongodb-performance/