I have a pandas data frame that consists of mobile numbers.
I want to create a new column to classify each number based on its pattern using regex.
numbers = [539249751,530246444,539246655,539209759,538849098]
# Create the pandas DataFrame with column name is provided explicitly
vanity_class= pd.DataFrame(numbers, columns=['MNM_MOBILE_NUMBER'])
I have written a function that iterates through the column MNM_MOBILE_NUMBER
. Identifies the pattern of each number using regex. Then, creates a new column MNC_New_Class
with the relevant classification.
def vanity_def(MNM_MOBILE_NUMBER):
if vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r'^5(\d)\1{7}') | \
vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r'^5(?!(\d)\1)\d(\d)\2{6}$') | \
vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r'.{2}(?!(\d)\1)\d(\d)\2{5}$') | \
vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r'^\d*(\d)(\d)(?:\1\2){3}\d*$') | \
vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r'^5((\d)\2{3})((\d)\4{3})$') | \
vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r'.{3}(1234567$)'):
vanity_class['MNC_New_Class'] = 'Diamond'
elif vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r'.{3}(?!(\d)\1)\d(\d)\2{4}$') | \
vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r'^(?!(\d)\1)\d((\d)\3{6})(?!\3)\d$') | \
vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r'\d(\d)\1(\d)\2(\d)\3(\d)\4'):
vanity_class['MNC_New_Class'] = 'Gold'
elif vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r"^5(?!(\d)\1)\d((\d)\3{5})(?!\3)\d") | \
vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r"\b\d\d(\d)(?!\1)(\d)\2\2(\d)\3\3\b"):
vanity_class['MNC_New_Class'] = 'Silver'
elif vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r'.{3}(123456$)') | \
vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r'^5.{3}(?!(\d)\1)\d(\d)\2{3}$') | \
vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r'\d\d(?!(\d)\1)\d((\d)\3{4})(?!\3)\d') | \
vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r'\b\d\d(\d)(\d(00))\2') | \
vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r'^5(\d(000))(\d(000))'):
vanity_class['MNC_New_Class'] = 'Bronze'
elif vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r"\d\d(?!(\d)\1)\d((\d)\3{3})(?!\3)\d") | \
vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r"\d\d\d(\d\d)(?!\1)(\d)\2(\d)\3\b") | \
vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r"^\d*(\d)(\d)(?:\1\2){2}\d*$"):
vanity_class['MNC_New_Class'] = 'Special'
elif vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r'.{4}(45678$)') | \
vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r'.{5}(?!(\d)\1)\d(\d){3}') | \
vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r'.{5}(1234$)') | \
vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r'(?!.*(\d)\1(\d)\2(\d)\3).{4}\d(\d)\4(\d)\5') |\
vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r'^\d*(\d)(\d)(?:\1\2){1}\d*$'):
vanity_class['MNC_New_Class'] = 'Economy'
else:
vanity_class['MNC_New_Class'] = 'Non Classified'
Then I wrote a code to apply this function to the dataframe and create a new column.
vanity_class['MNC_New_Class'] = vanity_class['MNM_MOBILE_NUMBER'].apply(vanity_def)
However, I keep getting this error as below
ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().
Any advice on how to avoid this error?
Thanks
I don't know why you are selecting the each number from the dataframe which I think the error lies there.
To avoid the error and work as intended below is workaround.
def vanity_def(num):
word = None
if (re.match(r'^5(\d)\1{7}',str(num)) or \
re.match(r'^5(?!(\d)\1)\d(\d)\2{6}$',str(num)) or \
re.match(r'.{2}(?!(\d)\1)\d(\d)\2{5}$',str(num)) or \
re.match(r'^\d*(\d)(\d)(?:\1\2){3}\d*$',str(num)) or \
re.match(r'^5((\d)\2{3})((\d)\4{3})$',str(num)) or \
re.match(r'.{3}(1234567$)',str(num))):
word = 'Diamond'
elif (re.match(r'.{3}(?!(\d)\1)\d(\d)\2{4}$',str(num)) or \
re.match(r'^(?!(\d)\1)\d((\d)\3{6})(?!\3)\d$',str(num)) or \
re.match(r'\d(\d)\1(\d)\2(\d)\3(\d)\4',str(num))):
word = 'Gold'
elif (re.match(r"^5(?!(\d)\1)\d((\d)\3{5})(?!\3)\d",str(num)) or \
re.match(r"\b\d\d(\d)(?!\1)(\d)\2\2(\d)\3\3\b",str(num))):
word = 'Silver'
elif (re.match(r'.{3}(123456$)',str(num)) or \
re.match(r'^5.{3}(?!(\d)\1)\d(\d)\2{3}$',str(num)) or \
re.match(r'\d\d(?!(\d)\1)\d((\d)\3{4})(?!\3)\d',str(num)) or \
re.match(r'\b\d\d(\d)(\d(00))\2',str(num)) or \
re.match(r'^5(\d(000))(\d(000))',str(num))):
word = 'Bronze'
elif (re.match(r"\d\d(?!(\d)\1)\d((\d)\3{3})(?!\3)\d",str(num)) or \
re.match(r"\d\d\d(\d\d)(?!\1)(\d)\2(\d)\3\b",str(num)) or \
re.match(r"^\d*(\d)(\d)(?:\1\2){2}\d*$",str(num))):
word = 'Special'
elif (re.match(r'.{4}(45678$)',str(num)) or \
re.match(r'.{5}(?!(\d)\1)\d(\d){3}',str(num)) or \
re.match(r'.{5}(1234$)',str(num)) or \
re.match(r'(?!.*(\d)\1(\d)\2(\d)\3).{4}\d(\d)\4(\d)\5',str(num)) or\
re.match(r'^\d*(\d)(\d)(?:\1\2){1}\d*$',str(num))):
word = 'Economy'
else:
word = 'Non Classified'
return word
Output:
MNM_MOBILE_NUMBER MNC_New_Class
0 539249751 Economy
1 530246444 Economy
2 539246655 Special
3 539209759 Economy
4 538849098 Economy