Search code examples
swiftgenericsnestedmergesortnested-generics

Nested function crashing the Swift compiler


I wrote a recursive mergeSort function:

func mergeSort<T: Comparable>(inout array: [T]) {
    if array.count <= 1 {
        return
    }

    var leftSlice = [T](array[0..<array.count / 2])
    var rightSlice = [T](array[array.count / 2...array.endIndex - 1])

    mergeSort(&leftSlice)
    mergeSort(&rightSlice)
    array = merge(leftSlice, rightSlice)
}

func merge<T: Comparable>(var left: [T], var right: [T]) -> [T] {
    var mergedValues = [T]()

    while !left.isEmpty && !right.isEmpty {
        mergedValues.append(left.first! < right.first! ? left.removeAtIndex(0) : right.removeAtIndex(0))
    }

    if !left.isEmpty {
        mergedValues += left
    } else if !right.isEmpty {
        mergedValues += right
    }

    return mergedValues
}

Now, since merge() is only supposed to be used by mergeSort() I placed it inside of mergeSort(), therefore making merge() a nested function:

func mergeSort<T: Comparable>(inout array: [T]) {
    func merge<T: Comparable>(var left: [T], var right: [T]) -> [T] {
        var mergedValues = [T]()

        while !left.isEmpty && !right.isEmpty {
            mergedValues.append(left.first! < right.first! ? left.removeAtIndex(0) : right.removeAtIndex(0))
        }

        if !left.isEmpty {
            mergedValues += left
        } else if !right.isEmpty {
            mergedValues += right
        }

        return mergedValues
    }

    if array.count <= 1 {
        return
    }

    var leftSlice = [T](array[0..<array.count / 2])
    var rightSlice = [T](array[array.count / 2...array.endIndex - 1])

    mergeSort(&leftSlice)
    mergeSort(&rightSlice)
    array = merge(leftSlice, rightSlice)
}

Now the first version works fine, but the second one doesn't.
How can that be?


Solution

  • Looks like you've found a bug in the compiler related to nested generic functions. Here's a reduction that also crashes the 1.2 compiler:

    func f<T>(t: T) {
        func g<U>(u: U) { }
    }
    

    But in this case, you don't actually need a generic version of merge. Its generic parameter is the same as the outer function, therefore just use that:

    func mergeSort<T: Comparable>(inout array: [T]) {
        // no generic placeholder needed, T is the T for mergeSort
        func merge(var left: [T], var right: [T]) -> [T] {
          // etc.
        }
    }
    

    This appears to work fine.

    However, it's also worth pointing out that in your merge function, you're calling removeAtIndex in a loop, which is a O(n) function. This means your merge sort is not going to have the hoped-for complexity.

    Here's an alternative version to consider:

    func mergeSort<T: Comparable>(inout array: [T], range: Range<Int>? = nil) {
    
        func merge(left: Range<Int>, right: Range<Int>) -> [T] {    
            var tmp: [T] = []
            tmp.reserveCapacity(count(left) + count(right))
    
            var l = left.startIndex, r = right.startIndex
    
            while l != left.endIndex && r != right.endIndex {
                if array[l] < array[r] {
                    tmp.append(array[l++])
                }
                else {
                    tmp.append(array[r++])
                }
            }
            // where left or right may be empty, this is a no-op
            tmp += source[l..<left.endIndex]
            tmp += source[r..<right.endIndex]
    
            return tmp
        }
    
        // this allows the original caller to omit the range,
        // the default being the full array
        let r = range ?? indices(array)
        if count(r) > 1 {
            let mid = r.startIndex.advancedBy(r.startIndex.distanceTo(r.endIndex)/2)
            let left = r.startIndex..<mid
            let right = mid..<r.endIndex
    
            mergeSort(&array, range: left)
            mergeSort(&array, range: right)
            let merged = merge(left, right)
            array.replaceRange(r, with: merged)
        }
    }
    

    I'd also say that since merge is probably a generically useful function in its own right, you may as well make it stand-alone rather than nesting it (similarly, partition when implementing a quick sort). The nesting doesn't buy you anything (outside of the trick I used above of referencing the outer parameter from within it, which is probably a bad practice anyway, I mostly did it to show you can :)