today I did a test for a job and was asked to search through an array of integers, this is the question:
The goal of this exercise is to check the presence of a number in an array.
Specifications:
The items are integers arranged in ascending order.
The array can contain up to 1 million items
Implement the function existsInArray(_ numbers: [Int], _ k: Int) so that it returns true if k belongs to numbers, otherwise the function should return false.
Example:
let numbers = [-9, 14, 37, 102] existsInArray(numbers, 102) // returns true existsInArray(numbers, 36) //returns false
Note: Try to save CPU cycles
Alright, so I gave my answer which is the code below and waited for the result
func existsInArray(_ numbers: [Int], _ k: Int) -> Bool {
if numbers.isEmpty {
return false
}
let numbersHalfIndex: Int = (numbers.count/2)
if k == numbers[numbersHalfIndex] {
return true
} else if k != numbers[0] && numbers.count == 1 {
return false
} else if k <= numbers[numbersHalfIndex] {
let leftHalfNumbersArray = numbers[0 ..< numbersHalfIndex]
return existsInArray(Array(leftHalfNumbersArray), k)
} else if k > numbers[numbersHalfIndex] {
let rightHalfNumbersArray = numbers[numbersHalfIndex ..< numbers.count]
return existsInArray(Array(rightHalfNumbersArray), k)
} else {
return false
}
}
So turns out that "The solution doesn't work in a reasonable time with one million items" and now I don't know what I did wrong since binary search is fast as f*ck.
My only guess is that maybe number.count or numbers[0 ...< numbersHalfIndex] or numbers[numbersHalfIndex ...< number.count] makes everything go slower than expected.
Am I tripping or something?
Edit: If anyone is curious I tested my code and Martin R code to see how much of an impact using ArraySlice have in terms of time. I used an array of 100.000.000 itens in ascending order starting from 0. Here is how I captured the time:
print("////////// MINE //////////")
var startTime = CFAbsoluteTimeGetCurrent()
print(existsInArray(numbers, 0))
var timeElapsed = CFAbsoluteTimeGetCurrent() - startTime
print("Time elapsed for mine: \(timeElapsed) s.")
print("////////// Martin R //////////")
counter = 0
startTime = CFAbsoluteTimeGetCurrent()
print(existsInArrayOptimal(numbers, 0))
timeElapsed = CFAbsoluteTimeGetCurrent() - startTime
print("Time elapsed for Martin R: \(timeElapsed) s.")
And here is the result:
////////// MINE //////////
true
Time elapsed for mine:
1.2008800506591797 s.
////////// Martin R //////////
true
Time elapsed for Martin R: 0.00012993812561035156 s.
It's about 1000x faster!
Accessing number.count
is not a problem because that is a O(1) operation for arrays. And slicing with numbers[0 ...< numbersHalfIndex]
is not a problem either. But Array(leftHalfNumbersArray)
creates a new array from the slice, and that copies all the elements.
There are two possible ways to avoid that:
A demonstration of the second approach:
func existsInArray(_ numbers: ArraySlice<Int>, _ k: Int) -> Bool {
if numbers.isEmpty {
return false
}
let numbersHalfIndex = numbers.startIndex + numbers.count / 2
if k == numbers[numbersHalfIndex] {
return true
} else if k < numbers[numbersHalfIndex] {
return existsInArray(numbers[..<numbersHalfIndex], k)
} else {
return existsInArray(numbers[(numbersHalfIndex + 1)...], k)
}
}
Note that array slices share their indices with the original array so that the indices do not necessarily start at zero. That's why numbers.startIndex
is used for the index calculation.
And a wrapper function which takes a “real” array argument:
func existsInArray(_ numbers: [Int], _ k: Int) -> Bool {
return existsInArray(numbers[...], k)
}
As @Leo suggested, you can implement this as a collection method instead of implementing two separate methods. Collection indices are not necessarily integers, but for a RandomAccessCollection
the index calculations are guaranteed to be O(1). You can also generalize it to collections of arbitrary comparable elements instead of integers.
Here is a possible implementation:
extension RandomAccessCollection where Element: Comparable {
/// Returns a Boolean value indicating whether the collection contains the
/// given element. It is assumed that the collection elements are sorted
/// in ascending (non-decreasing) order.
///
/// - Parameter element: The element to find in the collection.
/// - Returns: `true` if the element was found in the collection; otherwise,
/// `false`.
///
/// - Complexity: O(log(*n*)), where *n* is the size of the collection.
func binarySearch(for element: Element) -> Bool {
if isEmpty {
return false
}
let midIndex = index(startIndex, offsetBy: count / 2)
if element == self[midIndex] {
return true
} else if element < self[midIndex] {
return self[..<midIndex].binarySearch(for: element)
} else {
return self[index(after: midIndex)...].binarySearch(for: element)
}
}
}
Usage:
let numbers = [-9, 14, 37, 102]
print(numbers.binarySearch(for: 102)) // true
print(numbers.binarySearch(for: 36)) // false
Alternatively a non-recursive method which updates the indices of the search range:
extension RandomAccessCollection where Element: Comparable {
func binarySearch(for element: Element) -> Bool {
var lo = startIndex
var hi = endIndex
while lo < hi {
let mid = index(lo, offsetBy: distance(from: lo, to: hi) / 2)
if element == self[mid] {
return true
} else if element < self[mid] {
hi = mid
} else {
lo = index(after: mid)
}
}
return false
}
}