Api v3 Migration (WIP) (#7131)

* v3 migration: remove old code and polish user migration

* v3 migration: start to work on challenges

* wip v3 migration

* wip v3 migration: fix _id -> id for reminders, tags and checklists
This commit is contained in:
Matteo Pagliazzi
2016-04-30 18:34:16 +02:00
parent f69a566178
commit 6380161321
24 changed files with 350 additions and 203 deletions

View File

@@ -9,3 +9,241 @@
memberCount must be checked
prize must be >= 0
*/
// A map of (original taskId) -> [new taskId in challenge, challendId] of tasks belonging to challenges where the task id had to change
// This way later we can have use the right task.challenge.taskId in user's tasks
var duplicateTasks = {};
// ... convert tasks to individual models
async.each(
challenge.dailys
.concat(challenge.habits)
.concat(challenge.rewards)
.concat(challenge.todos),
function(task, cb1) {
task = new TaskModel(task); // this should also fix dailies that wen to the habits array or vice-versa
TaskModel.findOne({_id: task._id}, function(err, taskSameId){
if(err) return cb1(err);
// We already have a task with the same id, change this one
// and will require special handling
if(taskSameId) {
task._id = shared.uuid();
task.legacyId = taskSameId._id; // We set this for challenge tasks too
// we use an array as the same task may have multiple duplicates
duplicateTasks[taskSameId._id] = duplicateTasks[taskSameId._id] || [];
duplicateTasks[taskSameId._id].push([task._id, challenge._id]);
console.log('Duplicate task ', taskSameId._id, 'challenge ', challenge._id, 'new id ', task._id);
}
task.save(function(err, savedTask){
if(err) return cb1(err);
challenge.tasksOrder[savedTask.type + 's'].push(savedTask._id);
cb1();
});
});
}, function(err) {
if(err) return cb(err);
var newChallenge = new NewChallengeModel(challenge); // This will make sure old data is discarded
newChallenge.save(function(err, chal){
if(err) return cb(err);
console.log('Processed: ', chal._id);
cb();
});
});
}, function(err) {
if(err) throw err;
processed = processed + challenges.length;
console.log('Processed ' + challenges.length + ' challenges.', 'Total: ' + processed);
if(lastChal && lastChal._id){
processChal(lastChal._id);
} else {
console.log('Done!');
// outputting the duplicate tasks
console.log(JSON.stringify(duplicateTasks, null, 4));
}
});
});
};
processChal();
// Migrate users collection to new schema
// This should run AFTER challenges migration
// The console-stamp module must be installed (not included in package.json)
// It requires two environment variables: MONGODB_OLD and MONGODB_NEW
// Due to some big user profiles it needs more RAM than is allowed by default by v8 (arounf 1.7GB).
// Run the script with --max-old-space-size=4096 to allow up to 4GB of RAM
console.log('Starting migrations/api_v3/users.js.');
require('babel-register');
var Q = require('q');
var MongoDB = require('mongodb');
var nconf = require('nconf');
var mongoose = require('mongoose');
var _ = require('lodash');
var uuid = require('uuid');
var consoleStamp = require('console-stamp');
// Add timestamps to console messages
consoleStamp(console);
// Initialize configuration
require('../../website/src/libs/api-v3/setupNconf')();
var MONGODB_OLD = nconf.get('MONGODB_OLD');
var MONGODB_NEW = nconf.get('MONGODB_NEW');
var MongoClient = MongoDB.MongoClient;
mongoose.Promise = Q.Promise; // otherwise mongoose models won't work
// Load new models
var NewChallenge = require('../../website/src/models/challenge').model;
var Tasks = require('../../website/src/models/task');
// To be defined later when MongoClient connects
var mongoDbOldInstance;
var oldChallengeCollection;
var mongoDbNewInstance;
var newChallengeCollection;
var newTaskCollection;
var BATCH_SIZE = 1000;
var processedChallenges = 0;
var totoalProcessedTasks = 0;
// Only process challenges that fall in a interval ie -> up to 0000-4000-0000-0000
var AFTER_CHALLENGE_ID = nconf.get('AFTER_CHALLENGE_ID');
var BEFORE_CHALLENGE_ID = nconf.get('BEFORE_CHALLENGE_ID');
function processChallenges (afterId) {
var processedTasks = 0;
var lastChallenge = null;
var oldChallenges;
var query = {};
if (BEFORE_CHALLENGE_ID) {
query._id = {$lte: BEFORE_CHALLENGE_ID};
}
if ((afterId || AFTER_CHALLENGE_ID) && !query._id) {
query._id = {};
}
if (afterId) {
query._id.$gt = afterId;
} else if (AFTER_CHALLENGE_ID) {
query._id.$gt = AFTER_CHALLENGE_ID;
}
var batchInsertTasks = newTaskCollection.initializeUnorderedBulkOp();
var batchInsertChallenges = newChallengeCollection.initializeUnorderedBulkOp();
console.log(`Executing challenges query.\nMatching challenges after ${afterId ? afterId : AFTER_USER_ID} and before ${BEFORE_USER_ID} (included).`);
return oldChallengeCollection
.find(query)
.sort({_id: 1})
.limit(BATCH_SIZE)
.toArray()
.then(function (oldChallengesR) {
oldChallenges = oldChallengesR;
console.log(`Processing ${oldChallenges.length} challenges. Already processed ${processedChallenges} challenges and ${totoalProcessedTasks} tasks.`);
if (oldChallenges.length === BATCH_SIZE) {
lastChallenge = oldChallenges[oldChallenges.length - 1]._id;
}
oldChallenges.forEach(function (oldChallenge) {
var oldTasks = oldChallenge.habits.concat(oldChallenge.dailys).concat(oldChallenge.rewards).concat(oldChallenge.todos);
delete oldChallenge.habits;
delete oldChallenge.dailys;
delete oldChallenge.rewards;
delete oldChallenge.todos;
var newChallenge = new NewChallenge(oldChallenge);
oldTasks.forEach(function (oldTask) {
// TODO
oldTask._id = oldTask.id; // keep the old uuid unless duplicated
delete oldTask.id;
oldTask.challenge = oldTask.challenge || {};
oldTask.challenge.id = oldChallenge.id;
if (!oldTask.text) oldTask.text = 'task text'; // required
oldTask.tags = _.map(oldTask.tags, function (tagPresent, tagId) { // TODO used for challenges' tasks?
return tagPresent && tagId;
});
newChallenge.tasksOrder[`${oldTask.type}s`].push(oldTask._id);
if (oldTask.completed) oldTask.completed = false;
var newTask = new Tasks[oldTask.type](oldTask);
batchInsertTasks.insert(newTask.toObject());
processedTasks++;
});
batchInsertChallenges.insert(newChallenge.toObject());
});
console.log(`Saving ${oldChallenges.length} users and ${processedTasks} tasks.`);
return Q.all([
batchInsertChallenges.execute(),
batchInsertTasks.execute(),
]);
})
.then(function () {
totoalProcessedTasks += processedTasks;
processedChallenges += oldChallenges.length;
console.log(`Saved ${oldChallenges.length} users and their tasks.`);
if (lastUser) {
return processChallenges(lastChallenge);
} else {
return console.log('Done!');
}
});
}
// Connect to the databases
Q.all([
MongoClient.connect(MONGODB_OLD),
MongoClient.connect(MONGODB_NEW),
])
.then(function (result) {
var oldInstance = result[0];
var newInstance = result[1];
mongoDbOldInstance = oldInstance;
oldChallengeCollection = mongoDbOldInstance.collection('challenges');
mongoDbNewInstance = newInstance;
newChallengeCollection = mongoDbNewInstance.collection('challenges');
newTaskCollection = mongoDbNewInstance.collection('tasks');
console.log(`Connected with MongoClient to ${MONGODB_OLD} and ${MONGODB_NEW}.`);
return processChallenges();
})
.catch(function (err) {
console.error(err);
});

View File

@@ -28,11 +28,13 @@ require('../../website/src/libs/api-v3/setupNconf')();
var MONGODB_OLD = nconf.get('MONGODB_OLD');
var MONGODB_NEW = nconf.get('MONGODB_NEW');
var MongoClient = MongoDB.MongoClient;
mongoose.Promise = Q.Promise; // otherwise mongoose models won't work
// Load old and new models
//import { model as NewUser } from '../../website/src/models/user';
//import * as Tasks from '../../website/src/models/task';
// Load new models
var NewUser = require('../../website/src/models/user').model;
var NewTasks = require('../../website/src/models/task');
// To be defined later when MongoClient connects
var mongoDbOldInstance;
@@ -47,16 +49,17 @@ var BATCH_SIZE = 1000;
var processedUsers = 0;
var totoalProcessedTasks = 0;
// Only process users that fall in a interval ie -> 0000-4000-0000-0000
// Only process users that fall in a interval ie up to -> 0000-4000-0000-0000
var AFTER_USER_ID = nconf.get('AFTER_USER_ID');
var BEFORE_USER_ID = nconf.get('BEFORE_USER_ID');
/* TODO
/* TODO compare old and new model
- _id 9
- challenges
- groups
- invitations
- challenges' tasks
- checklists from .id to ._id (reminders too!)
*/
function processUsers (afterId) {
@@ -70,10 +73,14 @@ function processUsers (afterId) {
query._id = {$lte: BEFORE_USER_ID};
}
if ((afterId || AFTER_USER_ID) && !query._id) {
query._id = {};
}
if (afterId) {
query._id = {$gt: afterId};
query._id.$gt = afterId;
} else if (AFTER_USER_ID) {
query._id = {$gt: AFTER_USER_ID};
query._id.$gt = AFTER_USER_ID;
}
var batchInsertTasks = newTaskCollection.initializeUnorderedBulkOp();
@@ -95,17 +102,13 @@ function processUsers (afterId) {
lastUser = oldUsers[oldUsers.length - 1]._id;
}
oldUsers.forEach(function (oldUser) {
var oldTasks = oldUser.habits.concat(oldUser.dailys).concat(oldUser.rewards).concat(oldUser.todos);
oldUser.habits = oldUser.dailys = oldUser.rewards = oldUser.todos = undefined;
delete oldUser.habits;
delete oldUser.dailys;
delete oldUser.rewards;
delete oldUser.todos;
oldUser.challenges = [];
if (oldUser.invitations) {
oldUser.invitations.guilds = [];
oldUser.invitations.party = {};
}
oldUser.party = {};
oldUser.tags = oldUser.tags.map(function (tag) {
return {
_id: tag.id,
@@ -114,37 +117,31 @@ function processUsers (afterId) {
};
});
oldUser.tasksOrder = {
habits: [],
dailys: [],
rewards: [],
todos: [],
};
//let newUser = new NewUser(oldUser);
var newUser = new NewUser(oldUser);
oldTasks.forEach(function (oldTask) {
oldTask._id = uuid.v4(); // create a new unique uuid
oldTask.userId = oldUser._id;
oldTask.userId = newUser._id;
oldTask.legacyId = oldTask.id; // store the old task id
delete oldTask.id;
oldTask.challenge = {};
if (!oldTask.text) oldTask.text = 'text';
if (!oldTask.text) oldTask.text = 'task text'; // required
oldTask.tags = _.map(oldTask.tags, function (tagPresent, tagId) {
return tagPresent && tagId;
});
if (oldTask.type !== 'todo' || (oldTask.type === 'todo' && !oldTask.completed)) {
oldUser.tasksOrder[`${oldTask.type}s`].push(oldTask._id);
newUser.tasksOrder[`${oldTask.type}s`].push(oldTask._id);
}
//let newTask = new Tasks[oldTask.type](oldTask);
var newTask = new NewTasks[oldTask.type](oldTask);
batchInsertTasks.insert(oldTask);
batchInsertTasks.insert(newTask.toObject());
processedTasks++;
});
batchInsertUsers.insert(oldUser);
batchInsertUsers.insert(newUser.toObject());
});
console.log(`Saving ${oldUsers.length} users and ${processedTasks} tasks.`);
@@ -171,126 +168,28 @@ function processUsers (afterId) {
/*
TODO var challengeTasksChangedId = {};
... given a user
let processed = 0;
let batchSize = 1000;
var db; // defined later by MongoClient
var dbNewUsers;
var dbTasks;
var processUser = function(gt) {
var query = {
_id: {}
};
if(gt) query._id.$gt = gt;
console.log('Launching query', query);
// take batchsize docs from users and process them
OldUserModel
.find(query)
.lean() // Use plain JS objects as old user data won't match the new model
.limit(batchSize)
.sort({_id: 1})
.exec(function(err, users) {
if(err) throw err;
console.log('Processing ' + users.length + ' users.', 'Already processed: ' + processed);
var lastUser = null;
if(users.length === batchSize){
lastUser = users[users.length - 1];
}
var tasksToSave = 0;
// Initialize batch operation for later
var batchInsertUsers = dbNewUsers.initializeUnorderedBulkOp();
var batchInsertTasks = dbTasks.initializeUnorderedBulkOp();
users.forEach(function(user){
// user obj is a plain js object because we used .lean()
// add tasks order arrays
user.tasksOrder = {
habits: [],
rewards: [],
todos: [],
dailys: []
};
// ... convert tasks to individual models
var tasksArr = user.dailys
.concat(user.habits)
.concat(user.todos)
.concat(user.rewards);
// free memory?
user.dailys = user.habits = user.todos = user.rewards = undefined;
tasksArr.forEach(function(task){
task.userId = user._id;
task._id = shared.uuid(); // we rely on these to be unique... hopefully!
task.legacyId = task.id;
task.id = undefined;
task.challenge = task.challenge || {};
if(task.challenge.id) {
// If challengeTasksChangedId[task._id] then we got on of the duplicates from the challenges migration
if (challengeTasksChangedId[task.legacyId]) {
var res = _.find(challengeTasksChangedId[task.legacyId], function(arr){
return arr[1] === task.challenge.id;
});
// If res, id changed, otherwise matches the original one
task.challenge.taskId = res ? res[0] : task.legacyId;
} else {
task.challenge.taskId = task.legacyId;
}
}
if(!task.type) console.log('Task without type ', task._id, ' user ', user._id);
task = new TaskModel(task); // this should also fix dailies that wen to the habits array or vice-versa
user.tasksOrder[task.type + 's'].push(task._id);
tasksToSave++;
batchInsertTasks.insert(task.toObject());
});
batchInsertUsers.insert((new NewUserModel(user)).toObject());
tasksArr.forEach(function(task){
task.challenge = task.challenge || {};
if(task.challenge.id) {
// If challengeTasksChangedId[task._id] then we got on of the duplicates from the challenges migration
if (challengeTasksChangedId[task.legacyId]) {
var res = _.find(challengeTasksChangedId[task.legacyId], function(arr){
return arr[1] === task.challenge.id;
});
console.log('Saving', users.length, 'users and', tasksToSave, 'tasks');
// If res, id changed, otherwise matches the original one
task.challenge.taskId = res ? res[0] : task.legacyId;
} else {
task.challenge.taskId = task.legacyId;
}
}
// Save in the background and dispatch another processUser();
batchInsertUsers.execute(function(err, result){
if(err) throw err // we can't simply accept errors
console.log('Saved', result.nInserted, 'users')
});
batchInsertTasks.execute(function(err, result){
if(err) throw err // we can't simply accept errors
console.log('Saved', result.nInserted, 'tasks')
});
processed = processed + users.length;
if(lastUser && lastUser._id){
processUser(lastUser._id);
} else {
console.log('Done!');
}
});
};
if(!task.type) console.log('Task without type ', task._id, ' user ', user._id);
});
*/
// Connect to the databases
var MongoClient = MongoDB.MongoClient;
Q.all([
MongoClient.connect(MONGODB_OLD),
MongoClient.connect(MONGODB_NEW),