Search code examples
javamemory-managementmemory-leaksgarbage-collectionemail-validation

Memory leak issue in small java program


We are using the below code to verify our email address, but when we enter a long email address to test it, the CPU utilization goes 100% and program keeps on running, can you identify the issue.

import java.io.Serializable;
import java.util.regex.Pattern;


public class EmailAddress1 implements Serializable {


private static final boolean ALLOW_DOMAIN_LITERALS = true;


private static final boolean ALLOW_QUOTED_IDENTIFIERS = true;


private static final String wsp = "[ \\t]"; //space or tab
private static final String fwsp = wsp + "*";

private static final String dquote = "\\\"";

private static final String noWsCtl = "\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x7F";

private static final String asciiText = "[\\x01-\\x09\\x0B\\x0C\\x0E-\\x7F]";

private static final String quotedPair = "(\\\\" + asciiText + ")";


private static final String atext = "[a-zA-Z0-9\\!\\#\\$\\%\\&\\'\\*\\+\\-\\/\\=\\?   \\^\\_\\`\\{\\|\\}\\~]";
private static final String atom = fwsp + atext + "+" + fwsp;
private static final String dotAtomText = atext + "+" + "(" + "\\." + atext + "+)*";
private static final String dotAtom = fwsp + "(" + dotAtomText + ")" + fwsp;

private static final String qtext = "[" + noWsCtl + "\\x21\\x23-\\x5B\\x5D-\\x7E]";
private static final String qcontent = "(" + qtext + "|" + quotedPair + ")";
private static final String quotedString = dquote + "(" + fwsp + qcontent + ")*" + fwsp + dquote;

private static final String word = "((" + atom + ")|(" + quotedString + "))";
private static final String phrase = word + "+"; //one or more words.

private static final String letter = "[a-zA-Z]";
private static final String letDig = "[a-zA-Z0-9]";
private static final String letDigHyp = "[a-zA-Z0-9-]";
private static final String rfcLabel = letDig + "(" + letDigHyp + "{0,61}" + letDig + ")?";
private static final String rfc1035DomainName = rfcLabel + "(\\." + rfcLabel + ")*\\." + letter + "{2,6}";

private static final String dtext = "[" + noWsCtl + "\\x21-\\x5A\\x5E-\\x7E]";
private static final String dcontent = dtext + "|" + quotedPair;
private static final String domainLiteral = "\\[" + "(" + fwsp + dcontent + "+)*" + fwsp + "\\]";
private static final String rfc2822Domain = "(" + dotAtom + "|" + domainLiteral + ")";

private static final String domain = ALLOW_DOMAIN_LITERALS ? rfc2822Domain : rfc1035DomainName;

private static final String localPart = "((" + dotAtom + ")|(" + quotedString + "))";
private static final String addrSpec = localPart + "@" + domain;
private static final String angleAddr = "<" + addrSpec + ">";
private static final String nameAddr = "(" + phrase + ")?" + fwsp + angleAddr;
private static final String mailbox = nameAddr + "|" + addrSpec;

private static final String patternString = ALLOW_QUOTED_IDENTIFIERS ? mailbox : addrSpec;
public static final Pattern VALID_PATTERN = Pattern.compile(patternString);

private String text;
private boolean bouncing = true;
private boolean verified = false;
private String label;

public EmailAddress1() {
super();
}

public static boolean isValidText(String email) {
return (email != null) && VALID_PATTERN.matcher(email).matches();
}


public static void main(String[] args) {
String addy =       "1234567asjdkasdjflaslkdjfkajdfhklsadjfhsdkljfhksadjf8901234@askdjfaskdjbfskldjbgfskdjfksdjfklsadjfksadjfkasdjfkasdjfkasjfksdjfskadjfbsdkjbfasdkjasdkjbfksdjf6789012345678901.com";
if (isValidText(addy)) {
System.out.println("Valid email address.");
} else {
System.out.println("Invalid email address!");
}
}
}

Thanks, Varun


Solution

  • Man, this regex is badass ! Look at it with a little bit of formatting :

    (
     (
      (
       [ \t]*[a-zA-Z0-9\!\#\$\%\&\'\*\+\-\/\=\?   \^\_\`\{\|\}\~]+[ \t]*)
      |(
          \"(
        [ \t]*(
                [\x01-\x08\x0B\x0C\x0E-\x1F\x7F\x21\x23-\x5B\x5D-\x7E]|(
                    \\[\x01-\x09\x0B\x0C\x0E-\x7F])
              )
        )
          *[ \t]*\")
     )
     +)
    ?[ \t]*<(
            (
             [ \t]*(
                 [a-zA-Z0-9\!\#\$\%\&\'\*\+\-\/\=\?   \^\_\`\{\|\}\~]+(
                     \.[a-zA-Z0-9\!\#\$\%\&\'\*\+\-\/\=\?   \^\_\`\{\|\}\~]+)
                 *)
             [ \t]*)
            |(
                \"(
        [ \t]*(
                [\x01-\x08\x0B\x0C\x0E-\x1F\x7F\x21\x23-\x5B\x5D-\x7E]|(
                    \\[\x01-\x09\x0B\x0C\x0E-\x7F])
              )
        )
                *[ \t]*\")
            )
    @(
            [ \t]*(
                [a-zA-Z0-9\!\#\$\%\&\'\*\+\-\/\=\?   \^\_\`\{\|\}\~]+(
                    \.[a-zA-Z0-9\!\#\$\%\&\'\*\+\-\/\=\?   \^\_\`\{\|\}\~]+)
                *)
            [ \t]*|\[(
                [ \t]*[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\x21-\x5A\x5E-\x7E]|(
                    \\[\x01-\x09\x0B\x0C\x0E-\x7F])
                +)
            *[ \t]*\])
    >|(
            (
             [ \t]*(
                 [a-zA-Z0-9\!\#\$\%\&\'\*\+\-\/\=\?   \^\_\`\{\|\}\~]+(
                     \.[a-zA-Z0-9\!\#\$\%\&\'\*\+\-\/\=\?   \^\_\`\{\|\}\~]+)
                 *)
             [ \t]*)
            |(
                \"(
        [ \t]*(
                [\x01-\x08\x0B\x0C\x0E-\x1F\x7F\x21\x23-\x5B\x5D-\x7E]|(
                    \\[\x01-\x09\x0B\x0C\x0E-\x7F])
              )
        )
                *[ \t]*\")
      )
    @(
            [ \t]*(
                [a-zA-Z0-9\!\#\$\%\&\'\*\+\-\/\=\?   \^\_\`\{\|\}\~]+(
                    \.[a-zA-Z0-9\!\#\$\%\&\'\*\+\-\/\=\?   \^\_\`\{\|\}\~]+)
                *)
            [ \t]*|\[(
                [ \t]*[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\x21-\x5A\x5E-\x7E]|(
                    \\[\x01-\x09\x0B\x0C\x0E-\x7F])
                +)
            *[ \t]*\])
    

    This is not a memory leak problem. Your regex is just too complex to be matched easily. You program will eventually return a valid result, but only once every valid possibility have been tried.

    You should use Apache Commons EmailValidator, It will be much faster and reliable.