Preface:
I do not care about the viability of my particular quick sort algorithm. That is not what the question is about. I am curious as to why the program I have already written behaves the way it does, not about why I should use the STL or whatever. This is for learning purposes. (for instance, I'm aware picking pivot as the head is not the best case -- that's not what I'm asking though)
Question:
I have a linked list composed of Node*
s. I've written a quick sort algorithm for it specifically. It works by taking a given linked list, and recursively splitting it into sublists based on a chosen pivot
. The pivot is always the head
(front) of the list, and the sublists left
and right
are lists that contain values less than and greater than/equal to the pivot, respectively. I've almost gotten it down, but I don't know how to properly add_link the pivots. In a previous iteration, I would just add_link the pivot to the right sublist always, but that caused an infinite loop, so I ended up skipping the pivots entirely when comparing values against it. The resulting list is sorted, but it is missing every value that was used for a pivot. How should I fix this?
Here is a minimal reproducible example:
#include <iostream>
struct Link {
Link(int d=int(), Link* n=nullptr) {
data = d;
next = n;
}
int data;
Link* next;
};
struct LinkList {
Link* sentinel;
size_t size;
LinkList(Link* h=new Link, size_t s=0) {
size = s;
sentinel = h;
}
~LinkList() {
Link* current = sentinel;
while (current != 0) {
Link* next = current->next;
delete current;
current = next;
}
sentinel = 0;
}
};
// Prototypes
Link* sort_q(Link* sentinel);
void divide(Link* sentinel, Link* cmp, Link*& lower, Link*& greater);
Link* concatenate(Link* lower, Link* greater);
// helpful function calls quick sort
void sort(LinkList& l) {
l.sentinel = sort_q(l.sentinel);
}
// returns false if there sentinel = null; true if add_link was successful
bool add_link(Link* sentinel, Link* add_link, Link*& end) {
if (sentinel == nullptr) {
return false;
}
Link* curr = sentinel;
for (; curr->next; curr = curr->next) {}
curr->next = add_link;
end = curr->next;
return true;
}
Link* sort_q(Link* sentinel) {
Link* lower = nullptr;
Link* greater = nullptr;
Link* cmp = sentinel;
// base case LinkList = null or sentinel->null
if (sentinel == nullptr || sentinel->next == nullptr) {
return sentinel;
}
divide(sentinel, cmp, lower, greater);
lower = sort_q(lower);
greater = sort_q(greater);
return concatenate(lower, greater);
}
void divide(Link* sentinel, Link* cmp, Link*& lower, Link*& greater) {
lower = new Link, greater = new Link;
// lend is pointer to end of lower subLinkList
// rend is pointer to end of greater subLinkList
Link* lend = nullptr, * rend = nullptr;
// loop through LinkList until end
while (sentinel != nullptr) {
if (sentinel == cmp) {
sentinel = sentinel->next; continue;
}
if (sentinel->data < cmp->data) {
// break current link
Link* tmp = sentinel;
sentinel = sentinel->next;
tmp->next = nullptr;
// if subLinkList is not empty, add_link current Link to subLinkList and update end pointer
if (add_link(lend, tmp, lend))
continue;
// otherwise, "add_link" current Link to empty subLinkList and update end pointer manually
lower->next = tmp;
lend = lower->next;
}
else {
// break current link
Link* tmp = sentinel;
sentinel = sentinel->next;
tmp->next = nullptr;
// if subLinkList is not empty, add_link current Link to subLinkList and update end pointer
if (add_link(rend, tmp, rend))
continue;
// otherwise, "add_link" current Link to empty subLinkList and update end pointer manually
greater->next = tmp;
rend = greater->next;
}
}
// remove dummy Link(s)
if (lower->next)
lower = lower->next;
else
lower = cmp;
if (greater->next)
greater = greater->next;
else
greater = cmp;
// unlink cmp
cmp->next = nullptr;
}
// connected subLinkLists
Link* concatenate(Link* lower, Link* greater) {
Link* sentinel;
sentinel = lower;
while (lower->next != nullptr) {
lower = lower->next;
}
lower->next = greater;
return sentinel;
}
void print(LinkList &l) {
for (Link* n = l.sentinel; n != NULL; n = n->next) {
std::cout << n->data << '\n';
}
}
int main() {
// set up linked LinkList 8->4->5->11->7->5->3->9->null
Link* sentinel = new Link(8 , new Link(4, new Link(5, new Link(11, new Link(7, new Link(5, new Link(3, new Link(9))))))));
LinkList l(sentinel,5);
sort(l);
print(l);
return 0;
}
The output I want to have is
3
4 // pivot missing from output
5
5
7
8 // pivot missing from output
9
11
but it outputs
3
5
5
7
9
11
Edit #1:
I have tried adding the pivot between concatenations, but that does not work either. It produces different results, but not quite the same. Modifying sort_q()
and partition()
like so:
Link* qsort(Link* sentinel) {
// ... other stuff before modification
return concatenate(lower, cmp); // changed greater argument to pass cmp which is now cmp->next = greater once finishing partition()
}
void partition(Link* sentinel, Link*& cmp, Link*& lower, Link*& greater) {
// .. other stuff before modifications
// remove dummy Link
if (lower->next)
lower = lower->next;
if (greater->next)
greater = greater->next;
cmp->next = greater; // cmp points to greater (greater) sublist
}
The output becomes
3
4
5
7
0
8
11
0
You have assumptions that you can get both a lower and a greater list, and you already make sure you have at least 2 items in the list before you call divide, so that is good.
You just need to handle all the cases at the end of divide. Make sure cmp ends up in one of the two lists. Almost like you had before, but you need to handle the case where both lower and greater lists are non-empty.
// remove dummy node(s)
cmp->next = nullPtr;
if (lower->next && greater->next) {
// we have two lists. put cmp on greater
lower = lower->next;
cmp->next = greater->next;
greater = cmp;
}
else if (lower->next) {
// only a lower list, use cmp on greater
lower = lower->next;
greater = cmp;
}
else if (greater->next) {
// only a greater list, use cmp as lower.
greater = greater->next;
lower = cmp;
}
seeing the above, handling all 3 cases, it can be simplified to this:
// remove dummy node(s)
if (lower->next) {
// we have lower node, so put cmp on greater
lower = lower->next;
cmp->next = greater->next;
greater = cmp;
}
else if (greater->next) {
// only a greater list, use cmp as lower.
greater = greater->next;
lower = cmp;
cmp->next = nullPtr;
}
then use concatenate(lower,greater)
afterwards. Although it could be optimized to have divide
concatenate the lists and just return the sentinel, but that's a bit more of a rewrite.
EDIT: putting it all together to eliminate your memory leak, it should be like this (note, I have not compiled or tested)
void divide(Node* sentinel, Node* cmp, Node*& lower, Node*& greater) {
lower = nullptr, greater = nullptr;
// lend is pointer to end of lower sublist
// rend is pointer to end of greater sublist
Node* lend = nullptr, * rend = nullptr;
// loop through list until end
while (sentinel != nullptr) {
if (sentinel == cmp) {
sentinel = sentinel->next; continue;
}
if (sentinel->data < cmp->data) {
// break current link
Node* tmp = sentinel;
sentinel = sentinel->next;
tmp->next = nullptr;
// if sublist is not empty, append current node to sublist and update end pointer
if (append(lend, tmp, lend))
continue;
// otherwise, "append" current node to empty sublist and update end pointer manually
lend = lower = tmp;
}
else {
// break current link
Node* tmp = sentinel;
sentinel = sentinel->next;
tmp->next = nullptr;
// if sublist is not empty, append current node to sublist and update end pointer
if (append(rend, tmp, rend))
continue;
// otherwise, "append" current node to empty sublist and update end pointer manually
rend = greater = tmp;
}
}
// insert cmp node
if (lower) {
// we have lower node, so put cmp on greater
cmp->next = greater;
greater = cmp;
}
else if (greater) {
// only a greater list, use cmp as lower.
lower = cmp;
cmp->next = nullptr;
}
}