Unicode converteutil: converting Unicode to String

Unicode is a kind of character set coding. It is a character coding scheme developed by international organizations that can accommodate all the words and symbols in the world. It can represent any character. In java language, Unicode is composed of four hexadecimal digits, such as: u597d. java does not provide a tool class to directly convert Unicode and string, so the author wrote a project by himself It is easy to use

1. Unicode converteutil tool class

1.1 API list
Method signature Method description
public static boolean isContainsUnicodeChar(String str) Determines whether a string contains Unicode encoded characters
public static boolean isUnicodeChar(String str) Unicode encoding to determine whether a string is a character
public static String char2Unicode(char c) Convert a character to Unicode encoding
public static String string2Unicode(String str) Convert a string to Unicode encoding
public static char unicode2char(String unicodeChar) Convert a Unicode encoding to a character
public static String unicode2String(String unicode) Convert a Unicode string to a string
1.2 source code of tool class
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Unicode Code conversion tool class
 * @Auth zongf
 * @Time 2019-04-28
 */
public class UnicodeConverteUtil {


    private static String unicodeParttenExp = "\\\\u[0-9a-zA-Z]{4}";

    /**
     * Determine whether the string contains Unicode characters
     * false if the string is empty
     * @param str Character string
     * @return true/false
     * @Auth zongf
     * @Time 2019-04-28
     */
    public static boolean isContainsUnicodeChar(String str) {

        if (null == str || str.isEmpty()) {
            return false;
        }else {
            Pattern pattern = Pattern.compile(unicodeParttenExp);
            return pattern.matcher(str).find();
        }
    }

    /**
     * Whether it is unicode format character, only character format is verified
     * @param str Character string
     * @return true/false
     * @Auth zongf
     * @Time 2019-04-28
     */
    public static boolean isUnicodeChar(String str) {
        if (isContainsUnicodeChar(str)) {
            if (str.length() == 6) {
                return true;
            }
        }
        return false;
    }

    /**
     * Character to Unicode
     * @param c character
     * @return String Unicode Encoding string
     * @Auth zongf
     * @Time 2019-04-28
     */
    public static String char2Unicode(char c) {

        // In case of ASCII characters, return directly
        if (c >= 0 && c <= 127) {
            return String.valueOf(c);
        }

        // Convert hexadecimal characters
        String s = Integer.toHexString(c);

        // Make up for less than four
        if (s.length() < 4) {
            s = "0000".substring(s.length(), 4) + s;
        }

        // Returns a Unicode encoded string
        return "\\u" + s;
    }

    /**
     * String to Unicode
     * @param str Character string
     * @return String
     * @Auth zongf
     * @Time 2019-04-28
     */
    public static String string2Unicode(String str) {

        // If the string is null or the length is 0, it will directly return
        if(str == null || str.length()==0) return str;

        StringBuffer unicodeSb = new StringBuffer();

        for (char aChar : str.toCharArray()) {
            unicodeSb.append(char2Unicode(aChar));
        }

        return unicodeSb.toString();
    }

    /**
     * Convert Unicode characters
     * @param unicodeChar unicode character
     * @return char
     * @Auth zongf
     * @Time 2019-04-28
     */
    public static char unicode2char(String unicodeChar) {

        // If it is not a Unicode character, it returns a character scrambling
        if (!isUnicodeChar(unicodeChar)) return '?';

        char c = (char) Integer.parseInt(unicodeChar.substring(2), 16);

        return c;
    }

    /**
     * unicode encoding in conversion string
     * @param unicode
     * @return String
     * @Auth zongf
     * @Time 2019-04-28
     */
    public static String unicode2String(String unicode) {

        // If it does not contain Unicode encoding, it directly returns
        if (!isContainsUnicodeChar(unicode)) {
            return unicode;
        }

        // Create a capture match expression
        Pattern compile = Pattern.compile("(" + unicodeParttenExp + ")");
        Matcher matcher = compile.matcher(unicode);
        StringBuffer sb = new StringBuffer();
        while (matcher.find()) {
            matcher.appendReplacement(sb, String.valueOf(unicode2char(matcher.group())));
        }
        matcher.appendTail(sb);
        return sb.toString();
    }
}

2. Unit test

public class TestUnicodeUtil {


    // Test character to Unicode string
    @Test
    public void test(){
        String str = UnicodeConverteUtil.char2Unicode('you');
        System.out.println("you:" + str);
    }

    // Test string to Unicode string
    @Test
    public void test2(){
        String str = "How do you do";
        System.out.println(UnicodeConverteUtil.string2Unicode(str));
    }

    // Determine whether the character is a Unicode string
    @Test
    public void test34(){
        String str = "\\u554a";
        System.out.println(str + ":" + UnicodeConverteUtil.isUnicodeChar(str));
    }

    // Unicode conversion character
    @Test
    public void tes2t(){
        String str = "\\u554a";
        char c = UnicodeConverteUtil.unicode2char(str);
        System.out.println(str + ":" + c);

    }

    @Test
    public void te(){
        String str = "Hello\\u4f60, \\u597d\\u554a, Friend";
        System.out.println(UnicodeConverteUtil.unicode2String(str));
    }

}

Keywords: encoding Java ascii less

Added by cody7 on Wed, 20 Nov 2019 21:25:48 +0200