我如何映射使用复杂的子文档相互关联的对象

首先这可能是一个误导的问题,如果是这样的话,我将不胜感激一些指导我应该如何进行。

从我在网上find的,似乎MongoDB / mongoose mapReduce是做到这一点的最好方法,但我一直在试图把我的头围绕它,我努力去理解它的任何不平凡的东西,我想知道是否有人可以帮助解释我的问题。 我不一定在寻找一个完整的解决scheme。 我真的很感谢很好解释的伪代码。 我觉得我特别困惑的是如何处理汇总和组合2个或更多的子文档。

另外我知道这可能是一个糟糕的模型/集合devise,但不幸的是,这是完全没有我的手,所以请不要build议重塑。

我特别的问题是我们有一个现有的模型,如下所示:

survey: { _id: 1111, name: "name", questions: [ {_id: 1, text: "a,b, or c?", type: "multipleChoice", options: [a, b, c,]}, {_id: 2, text: "what do you think", type: "freeform"} ], participants: [{_id: 1, name: "user 1"}, {_id: 2, name: "user 2"}], results: [{_id: 123, userId: 1, questionId: 1, answer: "a"}, {_id: 124, userId: 2, questionId: 1, answer: "b"}, {_id: 125, userId: 1, questionId: 2, answer: "this is some answer"}, {_id: 126, userId: 2, questionId: 2, answer: "this is another answer"}] } 

然后我们有另外一个模型,它是用来跟踪用户在整个调查过程中的进度(这只是一个基本的子集,我们也跟踪不同的事件)

 trackings:{ _id:123, surveyId: 1, userId: 123, starttime: "2015-05-13 10:46:20.347Z" endtime: "2015-05-13 10:59:20.347Z" } 

我想要做什么是得到像这样的东西:

 { survey: "survey name", _id : 1, totalAverageTime: "00:23:00", fastestTime : "00:23:00", slowestTime: "00:25:00", questions: [ { _id: 1, text: "a,b, or c?", type: "multipleChoice", mostPopularAnswer: "a", averageTime: "00:13:00", anwers : [{ userId: 1, answer: "a", time:"00:14:00"}, { userId: 2, answer: "a", time:"00:12:00"}] },{ _id: 2, text:"what do you think", type:"freeform", averageTime : "00:10:00", answers : [{ userId: 1, answer: "this is some answer", time:"00:11:00"}, { userId: 2, answer: "this is another answer", time:"00:09:00"}] } ] } 

以下方法使用聚合框架来提出更接近所需输出的解决scheme。 这取决于第三个收集,这可以被看作是两个收集surveytrackings之间的合并。

首先,假设你有你的问题中的例子的testing文档的以下集合:

 // survey collection db.survey.insert({ _id: 1111, name: "name", questions: [ {_id: 1, text: "a,b, or c?", type: "multipleChoice", options: ["a", "b", "c",]}, {_id: 2, text: "what do you think", type: "freeform"} ], participants: [{_id: 1, name: "user 1"}, {_id: 2, name: "user 2"}], results: [{_id: 123, userId: 1, questionId: 1, answer: "a"}, {_id: 124, userId: 2, questionId: 1, answer: "b"}, {_id: 125, userId: 1, questionId: 2, answer: "this is some answer"}, {_id: 126, userId: 2, questionId: 2, answer: "this is another answer"}] }) // trackings collection db.trackings.insert([ { _id:1, surveyId: 1111, userId: 1, starttime: "2015-05-13 10:46:20.347Z", endtime: "2015-05-13 10:59:20.347Z" }, { _id:2, surveyId: 1111, userId: 2, starttime: "2015-05-13 10:13:06.176Z", endtime: "2015-05-13 10:46:28.176Z" } ]) 

要创build第三个集合(我们称之为output_collection ),需要使用find()游标的forEach()方法遍历trackings集合,将字段与datestring转换为实际的ISODate对象,创build一个数组字段, survey结果,然后将合并的对象保存到第三个集合中。 以下演示了这个操作:

 db.trackings.find().forEach(function(doc){ var survey = db.survey.find({"_id": doc.surveyId}).toArray(); doc.survey = survey; doc["starttime"] = ISODate(doc.starttime); doc["endtime"] = ISODate(doc.endtime); db.output_collection.save(doc); }); 

将两个集合合并到output_collection之后,使用db.output_collection.findOne()查询它将产生:

 { "_id" : 1, "surveyId" : 1111, "userId" : 1, "starttime" : ISODate("2015-05-13T10:46:20.347Z"), "endtime" : ISODate("2015-05-13T10:59:20.347Z"), "survey" : [ { "_id" : 1111, "name" : "name", "questions" : [ { "_id" : 1, "text" : "a,b, or c?", "type" : "multipleChoice", "options" : [ "a", "b", "c" ] }, { "_id" : 2, "text" : "what do you think", "type" : "freeform" } ], "participants" : [ { "_id" : 1, "name" : "user 1" }, { "_id" : 2, "name" : "user 2" } ], "results" : [ { "_id" : 123, "userId" : 1, "questionId" : 1, "answer" : "a" }, { "_id" : 124, "userId" : 2, "questionId" : 1, "answer" : "b" }, { "_id" : 125, "userId" : 1, "questionId" : 2, "answer" : "this is some answer" }, { "_id" : 126, "userId" : 2, "questionId" : 2, "answer" : "this is another answer" } ] } ] } 

然后,您可以在此集合上应用聚合。 聚合pipe道应该由四个$unwind **操作符阶段组成,这些阶段从input文档中解构出数组,并为每个元素输出一个文档。 每个输出文档用一个元素值replace数组。

下一个$project运算符阶段重新devisestream中的每个文档,例如添加一个新的字段duration ,计算starttime和endtimedate字段之间的时间差(以分钟为单位),并使用算术运算符进行计算。

之后是$group操作stream水线阶段,它通过"survey"键对input文档进行分组,并将累加器expression式应用于每个组。 消耗所有input文档并为每个不同的组输出一个文档。

所以你的聚合pipe道应该是这样的:

 db.output_collection.aggregate([ { "$unwind": "$survey" }, { "$unwind": "$survey.questions" }, { "$unwind": "$survey.participants" }, { "$unwind": "$survey.results" }, { "$project": { "survey": 1, "surveyId": 1, "userId": 1, "starttime": 1, "endtime": 1, "duration": { "$divide": [ { "$subtract": [ "$endtime", "$starttime" ] }, 1000 * 60 ] } } }, { "$group": { "_id": "$surveyId", "survey": { "$first": "$survey.name"}, "totalAverageTime": { "$avg": "$duration" }, "fastestTime": { "$min": "$duration" }, "slowestTime": { "$max": "$duration" }, "questions": { "$addToSet": "$survey.questions" }, "answers": { "$addToSet": "$survey.results" } } }, { "$out": "survey_results" } ]) 

db.survey_results.find() 输出

 /* 0 */ { "result" : [ { "_id" : 1111, "survey" : "name", "totalAverageTime" : 23.18333333333334, "fastestTime" : 13, "slowestTime" : 33.36666666666667, "questions" : [ { "_id" : 2, "text" : "what do you think", "type" : "freeform" }, { "_id" : 1, "text" : "a,b, or c?", "type" : "multipleChoice", "options" : [ "a", "b", "c" ] } ], "answers" : [ { "_id" : 126, "userId" : 2, "questionId" : 2, "answer" : "this is another answer" }, { "_id" : 124, "userId" : 2, "questionId" : 1, "answer" : "b" }, { "_id" : 125, "userId" : 1, "questionId" : 2, "answer" : "this is some answer" }, { "_id" : 123, "userId" : 1, "questionId" : 1, "answer" : "a" } ] } ], "ok" : 1 } 

UPDATE

一旦获得聚合输出到另一个集合,通过$out聚合pipe道survey_results ,就可以将一些本地JavaScript函数与find()游标的forEach()方法一起使用,以获得最终对象:

 db.survey_results.find().forEach(function(doc){ var questions = []; doc.questions.forEach(function(q){ var answers = []; doc.answers.forEach(function(a){ if(a.questionId === q._id){ delete a.questionId; answers.push(a); } }); q.answers = answers; questions.push(q); }); delete doc.answers; doc.questions = questions; db.survey_results.save(doc); }); 

输出

 /* 0 */ { "_id" : 1111, "survey" : "name", "totalAverageTime" : 23.18333333333334, "fastestTime" : 13, "slowestTime" : 33.36666666666667, "questions" : [ { "_id" : 2, "text" : "what do you think", "type" : "freeform", "answers" : [ { "_id" : 126, "userId" : 2, "answer" : "this is another answer" }, { "_id" : 125, "userId" : 1, "answer" : "this is some answer" } ] }, { "_id" : 1, "text" : "a,b, or c?", "type" : "multipleChoice", "options" : [ "a", "b", "c" ], "answers" : [ { "_id" : 124, "userId" : 2, "answer" : "b" }, { "_id" : 123, "userId" : 1, "answer" : "a" } ] } ] }