Search code examples
pythondataframebooleanuser-defined-functionsvalueerror

ValueError when Applying a Function to a Dataframe Python


I have a pandas data frame that consists of mobile numbers.

I want to create a new column to classify each number based on its pattern using regex.

numbers = [539249751,530246444,539246655,539209759,538849098]
  
# Create the pandas DataFrame with column name is provided explicitly
vanity_class= pd.DataFrame(numbers, columns=['MNM_MOBILE_NUMBER'])

I have written a function that iterates through the column MNM_MOBILE_NUMBER. Identifies the pattern of each number using regex. Then, creates a new column MNC_New_Class with the relevant classification.

def vanity_def(MNM_MOBILE_NUMBER):
    if vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r'^5(\d)\1{7}') | \
            vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r'^5(?!(\d)\1)\d(\d)\2{6}$') | \
            vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r'.{2}(?!(\d)\1)\d(\d)\2{5}$') | \
            vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r'^\d*(\d)(\d)(?:\1\2){3}\d*$') | \
            vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r'^5((\d)\2{3})((\d)\4{3})$') | \
            vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r'.{3}(1234567$)'):
        vanity_class['MNC_New_Class'] = 'Diamond'
    elif vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r'.{3}(?!(\d)\1)\d(\d)\2{4}$') | \
             vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r'^(?!(\d)\1)\d((\d)\3{6})(?!\3)\d$') | \
             vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r'\d(\d)\1(\d)\2(\d)\3(\d)\4'):     
        vanity_class['MNC_New_Class'] = 'Gold'
    elif vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r"^5(?!(\d)\1)\d((\d)\3{5})(?!\3)\d") | \
            vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r"\b\d\d(\d)(?!\1)(\d)\2\2(\d)\3\3\b"):
        vanity_class['MNC_New_Class'] = 'Silver'
    elif vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r'.{3}(123456$)') | \
            vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r'^5.{3}(?!(\d)\1)\d(\d)\2{3}$') | \
            vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r'\d\d(?!(\d)\1)\d((\d)\3{4})(?!\3)\d') | \
            vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r'\b\d\d(\d)(\d(00))\2') | \
            vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r'^5(\d(000))(\d(000))'):
        vanity_class['MNC_New_Class'] = 'Bronze'
    elif vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r"\d\d(?!(\d)\1)\d((\d)\3{3})(?!\3)\d") | \
            vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r"\d\d\d(\d\d)(?!\1)(\d)\2(\d)\3\b") | \
            vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r"^\d*(\d)(\d)(?:\1\2){2}\d*$"):
        vanity_class['MNC_New_Class'] = 'Special'
    elif vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r'.{4}(45678$)') | \
            vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r'.{5}(?!(\d)\1)\d(\d){3}') | \
            vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r'.{5}(1234$)') | \
            vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r'(?!.*(\d)\1(\d)\2(\d)\3).{4}\d(\d)\4(\d)\5') |\
            vanity_class.MNM_MOBILE_NUMBER.astype(str).str.match(r'^\d*(\d)(\d)(?:\1\2){1}\d*$'):
        vanity_class['MNC_New_Class'] = 'Economy'
    else:
        vanity_class['MNC_New_Class'] = 'Non Classified'

Then I wrote a code to apply this function to the dataframe and create a new column.

vanity_class['MNC_New_Class'] = vanity_class['MNM_MOBILE_NUMBER'].apply(vanity_def)

However, I keep getting this error as below

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

Any advice on how to avoid this error?

Thanks


Solution

  • I don't know why you are selecting the each number from the dataframe which I think the error lies there.

    To avoid the error and work as intended below is workaround.

    def vanity_def(num):
    word = None
    if (re.match(r'^5(\d)\1{7}',str(num)) or \
            re.match(r'^5(?!(\d)\1)\d(\d)\2{6}$',str(num)) or \
            re.match(r'.{2}(?!(\d)\1)\d(\d)\2{5}$',str(num)) or \
            re.match(r'^\d*(\d)(\d)(?:\1\2){3}\d*$',str(num)) or \
            re.match(r'^5((\d)\2{3})((\d)\4{3})$',str(num)) or \
            re.match(r'.{3}(1234567$)',str(num))):
        word = 'Diamond'
    elif (re.match(r'.{3}(?!(\d)\1)\d(\d)\2{4}$',str(num)) or \
             re.match(r'^(?!(\d)\1)\d((\d)\3{6})(?!\3)\d$',str(num)) or \
             re.match(r'\d(\d)\1(\d)\2(\d)\3(\d)\4',str(num))):     
        word = 'Gold'
    elif (re.match(r"^5(?!(\d)\1)\d((\d)\3{5})(?!\3)\d",str(num)) or \
            re.match(r"\b\d\d(\d)(?!\1)(\d)\2\2(\d)\3\3\b",str(num))):
        word = 'Silver'
    elif (re.match(r'.{3}(123456$)',str(num)) or \
            re.match(r'^5.{3}(?!(\d)\1)\d(\d)\2{3}$',str(num)) or \
            re.match(r'\d\d(?!(\d)\1)\d((\d)\3{4})(?!\3)\d',str(num)) or \
            re.match(r'\b\d\d(\d)(\d(00))\2',str(num)) or \
            re.match(r'^5(\d(000))(\d(000))',str(num))):
        word = 'Bronze'
    elif (re.match(r"\d\d(?!(\d)\1)\d((\d)\3{3})(?!\3)\d",str(num)) or \
            re.match(r"\d\d\d(\d\d)(?!\1)(\d)\2(\d)\3\b",str(num)) or \
            re.match(r"^\d*(\d)(\d)(?:\1\2){2}\d*$",str(num))):
        word = 'Special'
    elif (re.match(r'.{4}(45678$)',str(num)) or \
            re.match(r'.{5}(?!(\d)\1)\d(\d){3}',str(num)) or \
            re.match(r'.{5}(1234$)',str(num)) or \
            re.match(r'(?!.*(\d)\1(\d)\2(\d)\3).{4}\d(\d)\4(\d)\5',str(num)) or\
            re.match(r'^\d*(\d)(\d)(?:\1\2){1}\d*$',str(num))):
        word = 'Economy'
    else:
        word = 'Non Classified'
    
    return word
    

    Output:

        MNM_MOBILE_NUMBER   MNC_New_Class
    0   539249751   Economy
    1   530246444   Economy
    2   539246655   Special
    3   539209759   Economy
    4   538849098   Economy