Search code examples
javacharacterconstants

Does Java define constants for any characters such as SPACE?


Does Java include any constants for single characters such as SPACE?

Having names taken from Unicode would be handy when doing string manipulations.

I want this:

String musician = "Lisa" + Character.SPACE + "Coleman" ;

…rather than this:

String musician = "Lisa" + " " + "Coleman" ;

(not to be confused with the java.lang.Character class)

If nothing bundled with Java, alternatives?


Solution

  • Constants for ASCII

    Code with nested classes containing constants

    Here is source code that defines a constant name for each of the 128 US-ASCII characters that make up the beginning of Unicode. These characters are also known as the Basic Latin block of Unicode.

    The approach here uses nested static classes containing static string constants, 128 constants for the 128 US-ASCII characters. By nesting the classes, we enable the intelligent type-ahead fill-in suggestion feature of modern code-editors. For example:

    Ascii.Uppercase.LATIN_CAPITAL_LETTER_A
    

    Nested interfaces could be used instead of classes, but I do not know of any benefits in terms of memory or speed.

    Of course you can modify this code. For example, if you only want names of control characters, excerpt that nested class.

    package work.basil.unicode;
    
    @SuppressWarnings ( "unused" )
    public final class Ascii
    {
        // -----------|  US-ASCII character constants grouped via nested classes  |--------------
        public static final class Control
        {
            public static final String NULL = Character.toString ( 0 );
            public static final String START_OF_HEADING = Character.toString ( 1 );
            public static final String START_OF_TEXT = Character.toString ( 2 );
            public static final String END_OF_TEXT = Character.toString ( 3 );
            public static final String END_OF_TRANSMISSION = Character.toString ( 4 );
            public static final String ENQUIRY = Character.toString ( 5 );
            public static final String ACKNOWLEDGE = Character.toString ( 6 );
            public static final String BEL = Character.toString ( 7 );
            public static final String BACKSPACE = Character.toString ( 8 );
            public static final String CHARACTER_TABULATION = Character.toString ( 9 );
            public static final String LINE_FEED_LF = Character.toString ( 10 );
            public static final String LINE_TABULATION = Character.toString ( 11 );
            public static final String FORM_FEED_FF = Character.toString ( 12 );
            public static final String CARRIAGE_RETURN_CR = Character.toString ( 13 );
            public static final String SHIFT_OUT = Character.toString ( 14 );
            public static final String SHIFT_IN = Character.toString ( 15 );
            public static final String DATA_LINK_ESCAPE = Character.toString ( 16 );
            public static final String DEVICE_CONTROL_ONE = Character.toString ( 17 );
            public static final String DEVICE_CONTROL_TWO = Character.toString ( 18 );
            public static final String DEVICE_CONTROL_THREE = Character.toString ( 19 );
            public static final String DEVICE_CONTROL_FOUR = Character.toString ( 20 );
            public static final String NEGATIVE_ACKNOWLEDGE = Character.toString ( 21 );
            public static final String SYNCHRONOUS_IDLE = Character.toString ( 22 );
            public static final String END_OF_TRANSMISSION_BLOCK = Character.toString ( 23 );
            public static final String CANCEL = Character.toString ( 24 );
            public static final String END_OF_MEDIUM = Character.toString ( 25 );
            public static final String SUBSTITUTE = Character.toString ( 26 );
            public static final String ESCAPE = Character.toString ( 27 );
            public static final String INFORMATION_SEPARATOR_FOUR = Character.toString ( 28 );
            public static final String INFORMATION_SEPARATOR_THREE = Character.toString ( 29 );
            public static final String INFORMATION_SEPARATOR_TWO = Character.toString ( 30 );
            public static final String INFORMATION_SEPARATOR_ONE = Character.toString ( 31 );
            public static final String DELETE = Character.toString ( 127 );
        }
        public static final class Punctuation
        {
            public static final String SPACE = Character.toString ( 32 );
            public static final String EXCLAMATION_MARK = Character.toString ( 33 );
            public static final String QUOTATION_MARK = Character.toString ( 34 );
            public static final String NUMBER_SIGN = Character.toString ( 35 );
            public static final String PERCENT_SIGN = Character.toString ( 37 );
            public static final String AMPERSAND = Character.toString ( 38 );
            public static final String APOSTROPHE = Character.toString ( 39 );
            public static final String LEFT_PARENTHESIS = Character.toString ( 40 );
            public static final String RIGHT_PARENTHESIS = Character.toString ( 41 );
            public static final String ASTERISK = Character.toString ( 42 );
            public static final String COMMA = Character.toString ( 44 );
            public static final String HYPHEN_MINUS = Character.toString ( 45 );
            public static final String FULL_STOP = Character.toString ( 46 );
            public static final String SOLIDUS = Character.toString ( 47 );
            public static final String COLON = Character.toString ( 58 );
            public static final String SEMICOLON = Character.toString ( 59 );
            public static final String QUESTION_MARK = Character.toString ( 63 );
            public static final String COMMERCIAL_AT = Character.toString ( 64 );
            public static final String LEFT_SQUARE_BRACKET = Character.toString ( 91 );
            public static final String REVERSE_SOLIDUS = Character.toString ( 92 );
            public static final String RIGHT_SQUARE_BRACKET = Character.toString ( 93 );
            public static final String LOW_LINE = Character.toString ( 95 );
            public static final String LEFT_CURLY_BRACKET = Character.toString ( 123 );
            public static final String RIGHT_CURLY_BRACKET = Character.toString ( 125 );
        }
        public static final class Symbol
        {
            public static final String DOLLAR_SIGN = Character.toString ( 36 );
            public static final String PLUS_SIGN = Character.toString ( 43 );
            public static final String LESS_THAN_SIGN = Character.toString ( 60 );
            public static final String EQUALS_SIGN = Character.toString ( 61 );
            public static final String GREATER_THAN_SIGN = Character.toString ( 62 );
            public static final String CIRCUMFLEX_ACCENT = Character.toString ( 94 );
            public static final String GRAVE_ACCENT = Character.toString ( 96 );
            public static final String VERTICAL_LINE = Character.toString ( 124 );
            public static final String TILDE = Character.toString ( 126 );
        }
        public static final class Digit
        {
            public static final String DIGIT_ZERO = Character.toString ( 48 );
            public static final String DIGIT_ONE = Character.toString ( 49 );
            public static final String DIGIT_TWO = Character.toString ( 50 );
            public static final String DIGIT_THREE = Character.toString ( 51 );
            public static final String DIGIT_FOUR = Character.toString ( 52 );
            public static final String DIGIT_FIVE = Character.toString ( 53 );
            public static final String DIGIT_SIX = Character.toString ( 54 );
            public static final String DIGIT_SEVEN = Character.toString ( 55 );
            public static final String DIGIT_EIGHT = Character.toString ( 56 );
            public static final String DIGIT_NINE = Character.toString ( 57 );
        }
        public static final class Uppercase
        {
            public static final String LATIN_CAPITAL_LETTER_A = Character.toString ( 65 );
            public static final String LATIN_CAPITAL_LETTER_B = Character.toString ( 66 );
            public static final String LATIN_CAPITAL_LETTER_C = Character.toString ( 67 );
            public static final String LATIN_CAPITAL_LETTER_D = Character.toString ( 68 );
            public static final String LATIN_CAPITAL_LETTER_E = Character.toString ( 69 );
            public static final String LATIN_CAPITAL_LETTER_F = Character.toString ( 70 );
            public static final String LATIN_CAPITAL_LETTER_G = Character.toString ( 71 );
            public static final String LATIN_CAPITAL_LETTER_H = Character.toString ( 72 );
            public static final String LATIN_CAPITAL_LETTER_I = Character.toString ( 73 );
            public static final String LATIN_CAPITAL_LETTER_J = Character.toString ( 74 );
            public static final String LATIN_CAPITAL_LETTER_K = Character.toString ( 75 );
            public static final String LATIN_CAPITAL_LETTER_L = Character.toString ( 76 );
            public static final String LATIN_CAPITAL_LETTER_M = Character.toString ( 77 );
            public static final String LATIN_CAPITAL_LETTER_N = Character.toString ( 78 );
            public static final String LATIN_CAPITAL_LETTER_O = Character.toString ( 79 );
            public static final String LATIN_CAPITAL_LETTER_P = Character.toString ( 80 );
            public static final String LATIN_CAPITAL_LETTER_Q = Character.toString ( 81 );
            public static final String LATIN_CAPITAL_LETTER_R = Character.toString ( 82 );
            public static final String LATIN_CAPITAL_LETTER_S = Character.toString ( 83 );
            public static final String LATIN_CAPITAL_LETTER_T = Character.toString ( 84 );
            public static final String LATIN_CAPITAL_LETTER_U = Character.toString ( 85 );
            public static final String LATIN_CAPITAL_LETTER_V = Character.toString ( 86 );
            public static final String LATIN_CAPITAL_LETTER_W = Character.toString ( 87 );
            public static final String LATIN_CAPITAL_LETTER_X = Character.toString ( 88 );
            public static final String LATIN_CAPITAL_LETTER_Y = Character.toString ( 89 );
            public static final String LATIN_CAPITAL_LETTER_Z = Character.toString ( 90 );
        }
        public static final class Lowercase
        {
            public static final String LATIN_SMALL_LETTER_A = Character.toString ( 97 );
            public static final String LATIN_SMALL_LETTER_B = Character.toString ( 98 );
            public static final String LATIN_SMALL_LETTER_C = Character.toString ( 99 );
            public static final String LATIN_SMALL_LETTER_D = Character.toString ( 100 );
            public static final String LATIN_SMALL_LETTER_E = Character.toString ( 101 );
            public static final String LATIN_SMALL_LETTER_F = Character.toString ( 102 );
            public static final String LATIN_SMALL_LETTER_G = Character.toString ( 103 );
            public static final String LATIN_SMALL_LETTER_H = Character.toString ( 104 );
            public static final String LATIN_SMALL_LETTER_I = Character.toString ( 105 );
            public static final String LATIN_SMALL_LETTER_J = Character.toString ( 106 );
            public static final String LATIN_SMALL_LETTER_K = Character.toString ( 107 );
            public static final String LATIN_SMALL_LETTER_L = Character.toString ( 108 );
            public static final String LATIN_SMALL_LETTER_M = Character.toString ( 109 );
            public static final String LATIN_SMALL_LETTER_N = Character.toString ( 110 );
            public static final String LATIN_SMALL_LETTER_O = Character.toString ( 111 );
            public static final String LATIN_SMALL_LETTER_P = Character.toString ( 112 );
            public static final String LATIN_SMALL_LETTER_Q = Character.toString ( 113 );
            public static final String LATIN_SMALL_LETTER_R = Character.toString ( 114 );
            public static final String LATIN_SMALL_LETTER_S = Character.toString ( 115 );
            public static final String LATIN_SMALL_LETTER_T = Character.toString ( 116 );
            public static final String LATIN_SMALL_LETTER_U = Character.toString ( 117 );
            public static final String LATIN_SMALL_LETTER_V = Character.toString ( 118 );
            public static final String LATIN_SMALL_LETTER_W = Character.toString ( 119 );
            public static final String LATIN_SMALL_LETTER_X = Character.toString ( 120 );
            public static final String LATIN_SMALL_LETTER_Y = Character.toString ( 121 );
            public static final String LATIN_SMALL_LETTER_Z = Character.toString ( 122 );
        }
    
        // Convenience method to return the code point integer number for the first and only character in the submitted string.
        public static int codePoint ( final String s )
        {
            Objects.requireNonNull ( s );
            if ( s.codePoints ( ).count ( ) != 1 )
            {
                throw new IllegalArgumentException ( "String must contain a single character, able to be represented by a single code point." );
            }
            return s.codePointAt ( 0 );
        }
    }
    

    Example usage, producing "Hello World!".

    String helloWorld =
            Ascii.Uppercase.LATIN_CAPITAL_LETTER_H +
                    Ascii.Lowercase.LATIN_SMALL_LETTER_E +
                    Ascii.Lowercase.LATIN_SMALL_LETTER_L +
                    Ascii.Lowercase.LATIN_SMALL_LETTER_L +
                    Ascii.Lowercase.LATIN_SMALL_LETTER_O +
                    Ascii.Punctuation.SPACE +
                    Ascii.Uppercase.LATIN_CAPITAL_LETTER_W +
                    Ascii.Lowercase.LATIN_SMALL_LETTER_O +
                    Ascii.Lowercase.LATIN_SMALL_LETTER_R +
                    Ascii.Lowercase.LATIN_SMALL_LETTER_L +
                    Ascii.Lowercase.LATIN_SMALL_LETTER_D +
                    Ascii.Punctuation.EXCLAMATION_MARK +
                    Ascii.Control.LINE_FEED_LF +
                    Ascii.Symbol.DOLLAR_SIGN +
                    Ascii.Digit.DIGIT_ONE +
                    Ascii.Digit.DIGIT_TWO +
                    Ascii.Punctuation.FULL_STOP +
                    Ascii.Digit.DIGIT_SEVEN +
                    Ascii.Digit.DIGIT_ZERO +
                    Ascii.Control.LINE_FEED_LF +
                    Ascii.Symbol.GREATER_THAN_SIGN +
                    Ascii.Control.LINE_FEED_LF +
                    Ascii.Digit.DIGIT_ZERO;
    
    System.out.println ( helloWorld );
    

    Run.

    Hello World!
    $12.70
    >
    0
    

    You can access the code point for any of these characters using the provided codePoint method. This method expects a String that contains exactly one single character. This method is merely a convenience in place of "A".codePointAt ( 0 ).

    Ascii.codePoint ( Ascii.Uppercase.LATIN_CAPITAL_LETTER_A )
    

    65

    Generator code

    Here is the code I wrote to generate the source code seen above.

    You may find this useful if you want to tweak the results to your own preferences for delimiters, names, grouping, etc.

    package work.basil.unicode;
    
    import java.util.*;
    
    /*
     Generates source code for a .java class filled with constants naming each of the 128 US-ASCII
     characters that make up beginning of Unicode.
     These constant names may be helpful to the developer seeking to avoid "magic numbers"
     in their code when working with individual code points.
     Tip: You can get the code point integer number for any of these constant strings:
     Ascii.Uppercase.LATIN_CAPITAL_LETTER_A.codePointAt ( 0 ) -> 65
    
     For reference, see Unicode Consortium document "C0 Controls and Basic Latin Range: 0000–007F"
     https://unicode.org/charts/PDF/U0000.pdf
    
     By Basil Bourque.
    */
    public class AsciiGenerator
    {
        // Fields
        private final SequencedCollection < Integer > control = new ArrayList <> ( );
        private final SequencedCollection < Integer > punctuation = new ArrayList <> ( );
        private final SequencedCollection < Integer > symbol = new ArrayList <> ( );
        private final SequencedCollection < Integer > digit = new ArrayList <> ( );
        private final SequencedCollection < Integer > uppercase = new ArrayList <> ( );
        private final SequencedCollection < Integer > lowercase = new ArrayList <> ( );
        private final SequencedCollection < SequencedCollection < Integer > > codePointGroups =
                List.of (
                        this.control ,
                        this.punctuation ,
                        this.symbol ,
                        this.digit ,
                        this.uppercase ,
                        this.lowercase
                );
        private final Map < SequencedCollection < Integer >, String > mapCodePointGroupToMethodName;
    
        // Constructor
        public AsciiGenerator ( )
        {
            this.populate ( );
            mapCodePointGroupToMethodName =
                    Map.of (
                            this.control , "Control" ,
                            this.punctuation , "Punctuation" ,
                            this.symbol , "Symbol" ,
                            this.digit , "Digit" ,
                            this.uppercase , "Uppercase" ,
                            this.lowercase , "Lowercase"
                    );
        }
    
        private void populate ( )
        {
            List < Byte > punctuationTypes = List.of (
                    Character.CONNECTOR_PUNCTUATION ,
                    Character.DASH_PUNCTUATION ,
                    Character.END_PUNCTUATION ,
                    Character.FINAL_QUOTE_PUNCTUATION ,
                    Character.INITIAL_QUOTE_PUNCTUATION ,
                    Character.OTHER_PUNCTUATION ,
                    Character.START_PUNCTUATION );
    
            final int COUNT_OF_ASCII_CHARACTERS = 128;
            for ( int codePoint = 0 ; codePoint < COUNT_OF_ASCII_CHARACTERS ; codePoint++ )
            {
                switch ( Character.getType ( codePoint ) )
                {
                    case Character.CONTROL -> this.control.add ( codePoint );
                    case Character.CONNECTOR_PUNCTUATION ,
                            Character.DASH_PUNCTUATION ,
                            Character.END_PUNCTUATION ,
                            Character.FINAL_QUOTE_PUNCTUATION ,
                            Character.INITIAL_QUOTE_PUNCTUATION ,
                            Character.OTHER_PUNCTUATION ,
                            Character.START_PUNCTUATION ,
                            Character.SPACE_SEPARATOR -> this.punctuation.add ( codePoint );
                    case Character.CURRENCY_SYMBOL ,
                            Character.MATH_SYMBOL ,
                            Character.MODIFIER_SYMBOL -> this.symbol.add ( codePoint );
                    case Character.DECIMAL_DIGIT_NUMBER -> this.digit.add ( codePoint );
                    case Character.LOWERCASE_LETTER -> this.lowercase.add ( codePoint );
                    case Character.UPPERCASE_LETTER -> this.uppercase.add ( codePoint );
                    default -> System.out.println ( "ERROR Unexpected codePoint = " + codePoint + " | type = " + Character.getType ( codePoint ) );
                }
            }
        }
    
        public void dump ( )
        {
            this.codePointGroups.forEach ( System.out :: println ); // Dump each collection of code points.
            int total = this.codePointGroups.stream ( ).mapToInt ( Collection :: size ).sum ( ); // Count all the code points across all the collecions. Should be 128.
            System.out.println ( "Total number of code points: " + total );
        }
    
        public String generateSourceCodeForClassOfConstants ( )
        {
            final String INDENT = " ".repeat ( 4 );  // Indent four SPACE characters. Or perhaps one TAB character, if you like.
            final String EOL = "\n";  // End-of-line terminator: LINE FEED. Or perhaps CR-LF, if you like.
            final StringBuilder sourceCode = new StringBuilder ( );
            sourceCode.append ( """
                    package work.basil.unicode;
                              
                    @SuppressWarnings ( "unused" )
                    public final class Ascii
                    {
                    """ );
            sourceCode.append ( """
                        // -----------|  US-ASCII character constants grouped via nested classes  |--------------
                    """ );
    
            for ( SequencedCollection < Integer > codePointGroup : codePointGroups )
            {
                String nestedClassName = mapCodePointGroupToMethodName.get ( codePointGroup );
                sourceCode
                        .append ( INDENT )
                        .append ( "public static final class " )
                        .append ( nestedClassName )
                        .append ( EOL )
                        .append ( INDENT )
                        .append ( "{" )
                        .append ( EOL );
                for ( Integer codePoint : codePointGroup )
                {
                    String name =
                            Character
                                    .getName ( codePoint )
                                    .replace ( " " , "_" )
                                    .replace ( "(" , "" )  // Delete parens from "LINE FEED (LF)", "FORM FEED (FF)", "CARRIAGE RETURN (CR).
                                    .replace ( ")" , "" )
                                    .replace ( "-" , "_" );  // Replace "-" in "HYPHEN-MINUS" with "_" (LOW LINE).;
                    sourceCode
                            .append ( INDENT.repeat ( 2 ) )
                            .append ( "public static final String " )
                            .append ( name )
                            .append ( " = Character.toString ( " )
                            .append ( codePoint )
                            .append ( " );" )
                            .append ( EOL );
                }
                sourceCode
                        .append ( INDENT )
                        .append ( "}" )
                        .append ( EOL );
            }
    
            sourceCode.append ( """
                        
                        // Convenience method to return the code point integer number for the first and only character in the submitted string.
                        public static int codePoint ( final String s )
                        {
                            Objects.requireNonNull ( s );
                            if ( s.codePoints ( ).count ( ) != 1 )
                            {
                                throw new IllegalArgumentException ( "String must contain a single character, able to be represented by a single code point." );
                            }
                            return s.codePointAt ( 0 );
                        }
                    """ );
    
            sourceCode
                    .append ( "}" );
            return sourceCode.toString ( );
        }
    
        public static void main ( String[] args )
        {
            AsciiGenerator asciiGenerator = new AsciiGenerator ( );
            asciiGenerator.dump ( );
            System.out.println ( "-----------|  Class Source Code  |-----------------" );
            System.out.println ( asciiGenerator.generateSourceCodeForClassOfConstants ( ) );
            System.out.println ( "-----------|  end  |-----------------" );
        }
    }