E.Y. E.Y. - 1 month ago 12
C++ Question

Insertion and Merge Sorts Does not Work on Big Data Sets C++

I have been trying to write an insertion and a merge sort for a data set that I read from a file. When testing my code I used a small data set( includes 6 numbers) and my program worked perfectly. but when I used a bigger data set with 1000000 inputs the code is not working and I cant see why. I tried to change the type of vectors to double but it does not solve the problem.
Thank you in advance for all your helps.

My data set consists of numbers like: 512069, 12823, 11628

here is my code:

vector<int> readFile(string fileName);
void display(vector<int> &vector);
void insertionSort(vector<int> &vec);
vector<int> merge(vector<int> left, vector<int> right);
vector<int> mergeSort(vector<int> &m);

int main(int argc, const char * argv[]) {

string fileName;
cout<<"Enter input file name :";
cin>>fileName;

vector<int> numbersVec = readFile(fileName);
display(numbersVec);

cout<<"INSERTION SORT"<<"\n";
insertionSort(numbersVec);
display(numbersVec);

cout<<"MERGE SORT"<<"\n";
vector<int> neu = mergeSort(numbersVec);
display(neu);


return 0;
}


vector<int> readFile(string fileName){

vector<int> numbers;
ifstream in(fileName,std::ios::in);

if(!in.is_open())
{
cout << "File Cannot be Opened" << endl;
}

else{

int number;
while (in >> number) {
numbers.push_back(number);
}
}

in.close();
return numbers;
}


void display(vector<int> &vec) {

for(int i = 0; i < vec.size(); i++)
{
cout << vec[i] << " ";
}
cout << "\n" << endl;

}


void insertionSort(vector<int> &vec) {

long double i, j, tmp;

for (i = 1; i < vec.size(); i++) {

j = i;

while (j > 0 && vec[j - 1] > vec[j]) {

tmp = vec[j];
vec[j] = vec[j - 1];
vec[j - 1] = tmp;
j--;

}
}
}


vector<int> merge(vector<int> tmpl, vector<int> tmpr){

vector<int> res;

while ((int)tmpl.size() > 0 || (int)tmpr.size() > 0) {

if ((int)tmpl.size() > 0 && (int)tmpr.size() > 0) {

if ((int)tmpl.front() <= (int)tmpr.front()) {

res.push_back((int)tmpl.front());
tmpl.erase(tmpl.begin());

}

else {

res.push_back((int)tmpr.front());
tmpr.erase(tmpr.begin());

}

}
else if ((int)tmpl.size() > 0) {

for (int i = 0; i < (int)tmpl.size(); i++)

res.push_back(tmpl[i]);

break;
}

else if ((int)tmpr.size() > 0) {

for (int i = 0; i < (int)tmpr.size(); i++)

res.push_back(tmpr[i]);

break;

}

}

return res;

}


vector<int> mergeSort(vector<int> &vec)
{
if (vec.size() <= 1)

return vec;

vector<int> tmpl, tmpr, res;

int mid = ((int)vec.size()+ 1) / 2;

for (int i = 0; i < mid; i++) {

tmpl.push_back(vec[i]);

}

for (int i = mid; i < (int)vec.size(); i++) {

tmpr.push_back(vec[i]);

}

tmpl = mergeSort(tmpl);

tmpr = mergeSort(tmpr);

res = merge(tmpl, tmpr);

return res;
}

Answer

Your algorithms seem fine. It is only a matter of complexity. If you count the number of times the while of the insertion sort algorithm is executed, on average, it is close to n(n-1)/2 where n is the size of your data set (see insertion sort).

If n=1.000.000, the complexity is close to 500.000.000.000 which is very long.

Just try to comment the call to insertionSort in main and your main function should end early.

Note that even if you do (too) multiple vector copies in your mergeSort algorithm, it will terminate early. Complexity is 'n * log(n)' (see merge sort).

Comments