Looking through the itertools module, I don't see anything that could be used as a generic, iterable version of str.split
. Is there a simple, idiomatic way of doing this?
These unit tests should demonstrate what I mean:
class SplitAnalog(unittest.TestCase):
def test_splitEmpty(self):
"""
>>> ''.split()
[]
"""
actual = split(None, [])
self.assertEqual(tuple(actual), ())
def test_singleLine(self):
"""
>>> '123\n'.split()
['123']
"""
actual = split(lambda n: n is None, [1, 2, 3, None])
self.assertEqual(tuple(tuple(line) for line in actual), ((1, 2, 3),))
def test_allNones(self):
"""
>>> '\n\n\n'.split()
[]
"""
actual = split(lambda n: n is None, [None] * 3)
self.assertEqual(tuple(actual), ())
def test_splitNumsOnNone(self):
"""
>>> '314159\n26535\n89793'.split()
['314159', '26535', '89793']
"""
nums = [3, 1, 4, 1, 5, 9, None, 2, 6, 5, 3, 5, None, 8, 9, 7, 9, 3]
actual = split(lambda n: n is None, nums)
self.assertEqual(tuple(tuple(line) for line in actual), (
(3, 1, 4, 1, 5, 9),
(2, 6, 5, 3, 5),
(8, 9, 7, 9, 3)))
def test_splitNumsOnNine(self):
nums = [3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5, 9, 8, 7, 3]
actual = split(lambda n: n == 9, nums)
self.assertEqual(tuple(tuple(line) for line in actual), (
(3, 1, 4, 1, 5, ),
(2, 6, 5, 3, 5),
(8, 7, 3)))
What would such a function be called? I can't find an example even when I poke around in other language libraries.
Assuming I understand what you're after, maybe
def pseudosplit(predicate, seq):
return (tuple(g) for k,g in groupby(seq, key=lambda x: not predicate(x)) if k)
which produces
>>> list(pseudosplit(lambda x: x is None, ()))
[]
>>> list(pseudosplit(lambda x: x is None, [1,2,3]))
[(1, 2, 3)]
>>> list(pseudosplit(lambda x: x is None, [None]*3))
[]
>>> list(pseudosplit(lambda x: x is None, [3, 1, 4, 1, 5, 9, None, 2, 6, 5, 3, 5, None, 8, 9, 7, 9, 3, None]))
[(3, 1, 4, 1, 5, 9), (2, 6, 5, 3, 5), (8, 9, 7, 9, 3)]
which seems to split as your test cases do, anyway.