Java String Source Analysis

String Source Analysis

Class structure

public final class String
    implements java.io.Serializable, Comparable<String>, CharSequence 

String class implements that Serializable can be serialized

String class implements Comparable for comparison

The String class implements that CharSequence can operate on Subscripts

String classes are decorated with final and cannot be inherited

attribute

//Each character used to store a string
private final char value[];

//hash value
private int hash; // Default to 0

//Serialized Version Number
private static final long serialVersionUID = -6849794470754667710L;

//From the variable name, we can see that it is related to serialization, which is not clear.
private static final ObjectStreamField[] serialPersistentFields =
        new ObjectStreamField[0];

Construction method

//No parameters, assign directly with empty string, hash 0
public String() {
        this.value = "".value;
    }

//Initialize using an existing string
public String(String original) {
        this.value = original.value;
        this.hash = original.hash;
    }

//Initialized with char array, hash 0
public String(char value[]) {
        this.value = Arrays.copyOf(value, value.length);
    }

//Initialize using character array and specifying offset, number of characters
public String(char value[], int offset, int count) {
        if (offset < 0) {
            throw new StringIndexOutOfBoundsException(offset);
        }
        if (count <= 0) {
            if (count < 0) {
                throw new StringIndexOutOfBoundsException(count);
            }
            if (offset <= value.length) {
                this.value = "".value;
                return;
            }
        }
        // Note: offset or count might be near -1>>>1.
        if (offset > value.length - count) {
            throw new StringIndexOutOfBoundsException(offset + count);
        }
        this.value = Arrays.copyOfRange(value, offset, offset+count);
    }

//Initialize using unicode-encoded arrays and specifying offsets and quantities
public String(int[] codePoints, int offset, int count) {
        if (offset < 0) {
            throw new StringIndexOutOfBoundsException(offset);
        }
        if (count <= 0) {
            if (count < 0) {
                throw new StringIndexOutOfBoundsException(count);
            }
            if (offset <= codePoints.length) {
                this.value = "".value;
                return;
            }
        }
        // Note: offset or count might be near -1>>>1.
        if (offset > codePoints.length - count) {
            throw new StringIndexOutOfBoundsException(offset + count);
        }

        final int end = offset + count;

        // Pass 1: Compute precise size of char[] Calculates the size of the char array
        int n = count;
        for (int i = offset; i < end; i++) {
            int c = codePoints[i];
            if (Character.isBmpCodePoint(c))//Determine if the encoding is BMP (Basic Mutilingual Plane)
                continue;
            else if (Character.isValidCodePoint(c))//Verify that the encoding is within the unicode encoding range
                n++;
            else throw new IllegalArgumentException(Integer.toString(c));
        }

        // Pass 2: Allocate and fill in char[] Declare the char array and fill in the coded corresponding char
        final char[] v = new char[n];

        for (int i = offset, j = 0; i < end; i++, j++) {
            int c = codePoints[i];
            if (Character.isBmpCodePoint(c))//Accept if encoding is a BMP direct character
                v[j] = (char)c;
            else
                Character.toSurrogates(c, v, j++);//Convert to two character storage
        }

        this.value = v;
    }

//Initialize using an ascii code array
@Deprecated
    public String(byte ascii[], int hibyte, int offset, int count) {
        checkBounds(ascii, offset, count);
        char value[] = new char[count];

        if (hibyte == 0) {
            for (int i = count; i-- > 0;) {
                value[i] = (char)(ascii[i + offset] & 0xff);
            }
        } else {
            hibyte <<= 8;
            for (int i = count; i-- > 0;) {
                value[i] = (char)(hibyte | (ascii[i + offset] & 0xff));
            }
        }
        this.value = value;
    }

@Deprecated
    public String(byte ascii[], int hibyte) {
        this(ascii, hibyte, 0, ascii.length);
    }

//Initialize with byte array + character set name
public String(byte bytes[], int offset, int length, String charsetName)
            throws UnsupportedEncodingException {
        if (charsetName == null)
            throw new NullPointerException("charsetName");
        checkBounds(bytes, offset, length);
        this.value = StringCoding.decode(charsetName, bytes, offset, length);
    }

//Initialize with byte array + character set name
public String(byte bytes[], int offset, int length, Charset charset) {
        if (charset == null)
            throw new NullPointerException("charset");
        checkBounds(bytes, offset, length);
        this.value =  StringCoding.decode(charset, bytes, offset, length);
    }

//Initialize with byte array + character set name
public String(byte bytes[], String charsetName)
            throws UnsupportedEncodingException {
        this(bytes, 0, bytes.length, charsetName);
    }
//Initialize with byte array + character set name
public String(byte bytes[], Charset charset) {
        this(bytes, 0, bytes.length, charset);
    }

//Initialize using a byte array
public String(byte bytes[], int offset, int length) {
        checkBounds(bytes, offset, length);
        this.value = StringCoding.decode(bytes, offset, length);
    }

public String(byte bytes[]) {
        this(bytes, 0, bytes.length);
    }

//Initialize with StringBuffer
public String(StringBuffer buffer) {
        synchronized(buffer) {
            this.value = Arrays.copyOf(buffer.getValue(), buffer.length());
        }
    }

//Initialize with StringBuilder
public String(StringBuilder builder) {
        this.value = Arrays.copyOf(builder.getValue(), builder.length());
    }

Method

Static method

join(CharSequence, CharSequence...) splicing strings using delimiters

public static String join(CharSequence delimiter, CharSequence... elements) {
        Objects.requireNonNull(delimiter);
        Objects.requireNonNull(elements);
        // Number of elements not likely worth Arrays.stream overhead.
        StringJoiner joiner = new StringJoiner(delimiter);
        for (CharSequence cs: elements) {
            joiner.add(cs);
        }
        return joiner.toString();
    }

join(CharSequence, Iterable<? Extends CharSequence>) uses delimiters to stitch strings

public static String join(CharSequence delimiter,
            Iterable<? extends CharSequence> elements) {
        Objects.requireNonNull(delimiter);
        Objects.requireNonNull(elements);
        StringJoiner joiner = new StringJoiner(delimiter);
        for (CharSequence cs: elements) {
            joiner.add(cs);
        }
        return joiner.toString();
    }

format(String, Object...) Formats a string by specifying parameters in string format

public static String format(String format, Object... args) {
        return new Formatter().format(format, args).toString();
    }

format(Local, String, Object...) Formats a string based on the environment by specifying parameters in string format

public static String format(Locale l, String format, Object... args) {
        return new Formatter(l).format(format, args).toString();
    }

The valueOf(Object) object is converted to a string if the object is null and the string "null"

public static String valueOf(Object obj) {
        return (obj == null) ? "null" : obj.toString();
    }

valueOf(char[]) char array converted to string

public static String valueOf(char data[]) {
        return new String(data);
    }

valueOf(xxx)xxx data type converted to string

public static String valueOf(boolean b) {
        return b ? "true" : "false";
    }

public static String valueOf(char c) {
        char data[] = {c};
        return new String(data, true);
    }

public static String valueOf(int i) {
        return Integer.toString(i);
    }

public static String valueOf(long l) {
        return Long.toString(l);
    }

public static String valueOf(float f) {
        return Float.toString(f);
    }

 public static String valueOf(double d) {
        return Double.toString(d);
    }

valueOf(char[],int,int) char array converted to string by offset of specified character format

public static String valueOf(char data[], int offset, int count) {
        return new String(data, offset, count);
    }

copyValueOf(char, int, int) uses a specified character array to copy a new string based on offset and number of characters, identical to valueOf

public static String copyValueOf(char data[], int offset, int count) {
        return new String(data, offset, count);
    }

CopValueOf (char[]) Copies a new string using the specified character array

public static String copyValueOf(char data[]) {
        return new String(data);
    }

Member Method

char charAt(int index) Gets the character of the specified subscript

public char charAt(int index) {
        if ((index < 0) || (index >= value.length)) {
            throw new StringIndexOutOfBoundsException(index);
        }
        return value[index];
    }

void getChars(int srcBegin, int srcEnd, char dst[], int dstBegin) copies the specified range of the char array of the current string to the specified location of the target char array

public void getChars(int srcBegin, int srcEnd, char dst[], int dstBegin) {
        if (srcBegin < 0) {
            throw new StringIndexOutOfBoundsException(srcBegin);
        }
        if (srcEnd > value.length) {
            throw new StringIndexOutOfBoundsException(srcEnd);
        }
        if (srcBegin > srcEnd) {
            throw new StringIndexOutOfBoundsException(srcEnd - srcBegin);
        }
  //System.arraycopy(Object src,  int  srcPos,Object dest, int destPos,int length)
  //src: Source array to copy
  //srcPos: The starting location of the source array copy
  //dest: target array
  //destPost: Copy to the start of the target array
  //length: the number of elements to copy
        System.arraycopy(value, srcBegin, dst, dstBegin, srcEnd - srcBegin);
    }

byte[] getBytes(String charsetName) Gets an encoded byte array of a string from a character set

public byte[] getBytes(String charsetName)
            throws UnsupportedEncodingException {
        if (charsetName == null) throw new NullPointerException();
        return StringCoding.encode(charsetName, value, 0, value.length);
    }

The boolean equals(Object anObject) method compares two strings, overriding the Object method

public boolean equals(Object anObject) {
        if (this == anObject) {//Address equality Two objects equals is true
            return true;
        }
        if (anObject instanceof String) {
            String anotherString = (String)anObject;
            int n = value.length;
            if (n == anotherString.value.length) {//Determine the number of characters in two strings
                char v1[] = value;
                char v2[] = anotherString.value;
                int i = 0;
                while (n-- != 0) {
                    if (v1[i] != v2[i])//One character is not equal most directly false
                        return false;
                    i++;
                }
                return true;
            }
        }
        return false;
    }

ContentntEquals (CharSequences) determines whether the current String is equal to other character sequences. Unlike equals, equals is true only when both objects are Strings. ContentntEquals can be used to compare other StringBuffer s, StringBuilder s, and other character sequences.

public boolean contentEquals(CharSequence cs) {
        // Argument is a StringBuffer, StringBuilder
        if (cs instanceof AbstractStringBuilder) {
            if (cs instanceof StringBuffer) {
                synchronized(cs) {//Lock up if it is a StringBuffer
                   return nonSyncContentEquals((AbstractStringBuilder)cs);
                }
            } else {//StringBuilder is unlocked
                return nonSyncContentEquals((AbstractStringBuilder)cs);
            }
        }
        // Argument is a String
        if (cs instanceof String) {
            return equals(cs);
        }
        // Argument is a generic CharSequence
        char v1[] = value;
        int n = v1.length;
        if (n != cs.length()) {
            return false;
        }
        for (int i = 0; i < n; i++) {//Other character sequences, one character for comparison
            if (v1[i] != cs.charAt(i)) {
                return false;
            }
        }
        return true;
    }

equalsIgnoreCase(String anotherString) Whether two strings are equal by ignoring case for comparison

public boolean equalsIgnoreCase(String anotherString) {
        return (this == anotherString) ? true
                : (anotherString != null)//Not empty
                && (anotherString.value.length == value.length)//Equal number of characters
                && regionMatches(true, 0, anotherString, 0, value.length);//Ignore case comparison
    }

public boolean regionMatches(boolean ignoreCase, int toffset,
            String other, int ooffset, int len) {
        char ta[] = value;
        int to = toffset;
        char pa[] = other.value;
        int po = ooffset;
        // Note: toffset, ooffset, or len might be near -1>>>1.
        if ((ooffset < 0) || (toffset < 0)
                || (toffset > (long)value.length - len)
                || (ooffset > (long)other.value.length - len)) {
            return false;
        }
        while (len-- > 0) {
            char c1 = ta[to++];
            char c2 = pa[po++];
            if (c1 == c2) {
                continue;
            }
            if (ignoreCase) {
                // If characters don't match but case may be ignored,
                // try converting both characters to uppercase.
                // If the results match, then the comparison scan should
                // continue.
              //Convert two characters to uppercase
                char u1 = Character.toUpperCase(c1);
                char u2 = Character.toUpperCase(c2);
                if (u1 == u2) {
                    continue;
                }
                // Unfortunately, conversion to uppercase does not work properly
                // for the Georgian alphabet, which has strange rules about case
                // conversion.  So we need to make one last check before
                // exiting.
              //Unequal conversion to uppercase, judgment in lowercase
                if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) {
                    continue;
                }
            }
            return false;
        }
        return true;
    }

int compareTo(String anotherString) for string comparison

public int compareTo(String anotherString) {
        int len1 = value.length;
        int len2 = anotherString.value.length;
        int lim = Math.min(len1, len2);
        char v1[] = value;
        char v2[] = anotherString.value;

        int k = 0;
        while (k < lim) {
            char c1 = v1[k];
            char c2 = v2[k];
            if (c1 != c2) {//Returns a positive number if the character of the current string is larger than the parameter, or a negative number if it is not
                return c1 - c2;
            }
            k++;
        }
  //If two strings are used, a small string is equal to each of the first parts of the larger string. If two strings are equal in length, returns 0, the current string length is greater than the parameter string returns an integer, and the current string length is less than the parameter string returns a negative number
        return len1 - len2;
    }

compareToIgnoreCase(String str) strings ignore case comparison

public int compareToIgnoreCase(String str) {
        return CASE_INSENSITIVE_ORDER.compare(this, str);
    }

public int compare(String s1, String s2) {
            int n1 = s1.length();
            int n2 = s2.length();
            int min = Math.min(n1, n2);
            for (int i = 0; i < min; i++) {
                char c1 = s1.charAt(i);
                char c2 = s2.charAt(i);
                if (c1 != c2) {
                    c1 = Character.toUpperCase(c1);
                    c2 = Character.toUpperCase(c2);
                    if (c1 != c2) {
                        c1 = Character.toLowerCase(c1);
                        c2 = Character.toLowerCase(c2);
                        if (c1 != c2) {
                            //If the two characters are not equal, the final comparison is converted to lowercase
                            return c1 - c2;
                        }
                    }
                }
            }
            return n1 - n2;
        }

startsWith(String prefix) determines whether a string begins with a specified string

public boolean startsWith(String prefix) {
        return startsWith(prefix, 0);
    }

public boolean startsWith(String prefix, int toffset) {
        char ta[] = value;
        int to = toffset;
        char pa[] = prefix.value;
        int po = 0;
        int pc = prefix.value.length;
        // Note: toffset might be near -1>>>1.
        if ((toffset < 0) || (toffset > value.length - pc)) {
            return false;
        }
        while (--pc >= 0) {//Loop given prefix string length
            if (ta[to++] != pa[po++]) {//Prefix string string versus current string string
                return false;
            }
        }
        return true;
    }

boolean endsWith(String suffix) determines whether a string ends with a specified string

public boolean endsWith(String suffix) {
        return startsWith(suffix, value.length - suffix.value.length);
    }

int hashCode() Gets the hashCode of the string

public int hashCode() {
  //The default string hash is 0, which is equal to the hash of another string if another string is used
        int h = hash;
        if (h == 0 && value.length > 0) {
            char val[] = value;

            for (int i = 0; i < value.length; i++) {//A one-character variable
              //hash*31 of the preceding character + ascii of the current character
                h = 31 * h + val[i];
            }
            hash = h;
        }
        return h;
    }

int indexOf(int ch) gets subscripts based on unicode encoding

public int indexOf(int ch) {
        return indexOf(ch, 0);
    }

public int indexOf(int ch, int fromIndex) {
        final int max = value.length;
        if (fromIndex < 0) {
            fromIndex = 0;
        } else if (fromIndex >= max) {//If the starting position of the lookup exceeds the array subscript
            // Note: fromIndex might be near -1>>>1.
            return -1;
        }

        if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
          //Coding is a Basic Multilingual encoding
            // handle most cases here (ch is a BMP code point or a
            // negative value (invalid code point))
            final char[] value = this.value;
            for (int i = fromIndex; i < max; i++) {
                if (value[i] == ch) {
                    return i;
                }
            }
            return -1;
        } else {
          //Gets the subscript for the encoding that needs to be stored using two char s
            return indexOfSupplementary(ch, fromIndex);
        }
    }

private int indexOfSupplementary(int ch, int fromIndex) {
        if (Character.isValidCodePoint(ch)) {//Is a valid unicode encoding
            final char[] value = this.value;
            final char hi = Character.highSurrogate(ch);
            final char lo = Character.lowSurrogate(ch);
            final int max = value.length - 1;
            for (int i = fromIndex; i < max; i++) {
                if (value[i] == hi && value[i + 1] == lo) {
                    return i;
                }
            }
        }
        return -1;
    }

int lastIndexOf(int ch) Gets the first subscript for the specified code to search backwards and forwards

public int lastIndexOf(int ch) {
        return lastIndexOf(ch, value.length - 1);
    }

public int lastIndexOf(int ch, int fromIndex) {
        if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) {//Encoding is a Basic Multilingual unicode encoding that uses a char store
            // handle most cases here (ch is a BMP code point or a
            // negative value (invalid code point))
            final char[] value = this.value;
            int i = Math.min(fromIndex, value.length - 1);
            for (; i >= 0; i--) {
                if (value[i] == ch) {
                    return i;
                }
            }
            return -1;
        } else {
          //Encoding uses two char stores
            return lastIndexOfSupplementary(ch, fromIndex);
        }
    }

private int lastIndexOfSupplementary(int ch, int fromIndex) {
        if (Character.isValidCodePoint(ch)) {
            final char[] value = this.value;
            char hi = Character.highSurrogate(ch);
            char lo = Character.lowSurrogate(ch);
            int i = Math.min(fromIndex, value.length - 2);
            for (; i >= 0; i--) {
                if (value[i] == hi && value[i + 1] == lo) {
                    return i;
                }
            }
        }
        return -1;
    }

int indexOf(String str) Gets the subscript of the current string for the first character of the specified string

public int indexOf(String str) {
        return indexOf(str, 0);
    }

public int indexOf(String str, int fromIndex) {
        return indexOf(value, 0, value.length,
                str.value, 0, str.value.length, fromIndex);
    }

static int indexOf(char[] source, int sourceOffset, int sourceCount,
            char[] target, int targetOffset, int targetCount,
            int fromIndex) {
        if (fromIndex >= sourceCount) {
            return (targetCount == 0 ? sourceCount : -1);
        }
        if (fromIndex < 0) {
            fromIndex = 0;
        }
        if (targetCount == 0) {
            return fromIndex;
        }

        char first = target[targetOffset];
        int max = sourceOffset + (sourceCount - targetCount);

        for (int i = sourceOffset + fromIndex; i <= max; i++) {
            /* Look for first character. */
            if (source[i] != first) {
                while (++i <= max && source[i] != first);
            }

            /* Found first character, now look at the rest of v2 */
            if (i <= max) {
                int j = i + 1;
              //Calculate termination subscript
                int end = j + targetCount - 1;
                for (int k = targetOffset + 1; j < end && source[j]
                        == target[k]; j++, k++);

                if (j == end) {
                    /* Found whole string. */
                    return i - sourceOffset;
                }
            }
        }
        return -1;
    }

String substring(int beginIndex) Gets a string with the specified subscript to the last subscript

public String substring(int beginIndex) {
        if (beginIndex < 0) {
            throw new StringIndexOutOfBoundsException(beginIndex);
        }
  //Calculate Length
        int subLen = value.length - beginIndex;
        if (subLen < 0) {
            throw new StringIndexOutOfBoundsException(subLen);
        }
        return (beginIndex == 0) ? this : new String(value, beginIndex, subLen);
    }

String substring(int beginIndex, int endIndex) Gets a string from the start and end subscripts, containing the start subscript character and the end subscript character

 public String substring(int beginIndex, int endIndex) {
        if (beginIndex < 0) {
            throw new StringIndexOutOfBoundsException(beginIndex);
        }
        if (endIndex > value.length) {
            throw new StringIndexOutOfBoundsException(endIndex);
        }
   //Calculate the number of characters
        int subLen = endIndex - beginIndex;
        if (subLen < 0) {
            throw new StringIndexOutOfBoundsException(subLen);
        }
        return ((beginIndex == 0) && (endIndex == value.length)) ? this
                : new String(value, beginIndex, subLen);
    }

String concat(String str) concatenates parameter strings to the current string

public String concat(String str) {
        int otherLen = str.length();
        if (otherLen == 0) {
            return this;
        }
  //Get the length of the stitching string
        int len = value.length;
  //Copy the characters of the original string into a new array of the size of the original string + the size of the parameter string
        char buf[] = Arrays.copyOf(value, len + otherLen);
  //Copy the characters of a stitching string into an array
        str.getChars(buf, len);
        return new String(buf, true);
    }

String replace(char oldChar, char newChar) replaces the specified character with a new one

public String replace(char oldChar, char newChar) {
        if (oldChar != newChar) {
            int len = value.length;
            int i = -1;
            char[] val = value; /* avoid getfield opcode */

            while (++i < len) {
                if (val[i] == oldChar) {//Location of characters to replace
                    break;
                }
            }
            if (i < len) {
                char buf[] = new char[len];
                for (int j = 0; j < i; j++) {
                    buf[j] = val[j];
                }
                while (i < len) {
                    char c = val[i];
                    buf[i] = (c == oldChar) ? newChar : c;//Replace the original character with a new one
                    i++;
                }
                return new String(buf, true);
            }
        }
        return this;
    }

boolean matches(String regex) to determine if a regular expression matches the current string

public boolean matches(String regex) {
        return Pattern.matches(regex, this);
    }

boolean contains(CharSequence s) determines if the current string contains another character sequence

public boolean contains(CharSequence s) {
        return indexOf(s.toString()) > -1;
    }

String trim() removes spaces before and after a string

public String trim() {
        int len = value.length;
        int st = 0;
        char[] val = value;    /* avoid getfield opcode */

  //Find the first position where the string is not a space after going from there
        while ((st < len) && (val[st] <= ' ')) {
            st++;
        }
  //Find the first non-space position from back to front of the string
        while ((st < len) && (val[len - 1] <= ' ')) {
            len--;
        }
        return ((st > 0) || (len < value.length)) ? substring(st, len) : this;
    }

char[] toCharArray() Converts a string to an array of characters

public char[] toCharArray() {
        // Cannot use Arrays.copyOf because of class initialization order issues
        char result[] = new char[value.length];
  //Copy using the System.arraycopy method
        System.arraycopy(value, 0, result, 0, value.length);
        return result;
    }

Local method

native String intern(); Gets the address of the object in the string constant pool to which the string refers

@Test
	public void test8() {
		String s1="abc";
		String s2=new String("abc");
		System.out.println(s1==s2);//false
		System.out.println(s1==s2.intern());//true
	}

Using s1="abc", the variable S1 in the stack points directly to the constant "abc" in the string constant pool, whereas s2=new String("abc"), the variable S2 in the stack points to the one variable t and t points to the "abc" in the string constant pool, so the addresses S1 and S2 point to are different

s2.intern() takes the address in the constant pool of the string, that is, if the variable points directly to the constant pool, then the address of the variable. If the variable points to the heap, it gets the address in the string constant pool that the heap points to.

Keywords: Java encoding ascii Attribute

Added by jarv on Mon, 27 Apr 2020 20:35:10 +0300