Unicode is a kind of character set coding. It is a character coding scheme developed by international organizations that can accommodate all the words and symbols in the world. It can represent any character. In java language, Unicode is composed of four hexadecimal digits, such as: u597d. java does not provide a tool class to directly convert Unicode and string, so the author wrote a project by himself It is easy to use
1. Unicode converteutil tool class
1.1 API list
Method signature | Method description |
---|---|
public static boolean isContainsUnicodeChar(String str) | Determines whether a string contains Unicode encoded characters |
public static boolean isUnicodeChar(String str) | Unicode encoding to determine whether a string is a character |
public static String char2Unicode(char c) | Convert a character to Unicode encoding |
public static String string2Unicode(String str) | Convert a string to Unicode encoding |
public static char unicode2char(String unicodeChar) | Convert a Unicode encoding to a character |
public static String unicode2String(String unicode) | Convert a Unicode string to a string |
1.2 source code of tool class
import java.util.regex.Matcher; import java.util.regex.Pattern; /** * Unicode Code conversion tool class * @Auth zongf * @Time 2019-04-28 */ public class UnicodeConverteUtil { private static String unicodeParttenExp = "\\\\u[0-9a-zA-Z]{4}"; /** * Determine whether the string contains Unicode characters * false if the string is empty * @param str Character string * @return true/false * @Auth zongf * @Time 2019-04-28 */ public static boolean isContainsUnicodeChar(String str) { if (null == str || str.isEmpty()) { return false; }else { Pattern pattern = Pattern.compile(unicodeParttenExp); return pattern.matcher(str).find(); } } /** * Whether it is unicode format character, only character format is verified * @param str Character string * @return true/false * @Auth zongf * @Time 2019-04-28 */ public static boolean isUnicodeChar(String str) { if (isContainsUnicodeChar(str)) { if (str.length() == 6) { return true; } } return false; } /** * Character to Unicode * @param c character * @return String Unicode Encoding string * @Auth zongf * @Time 2019-04-28 */ public static String char2Unicode(char c) { // In case of ASCII characters, return directly if (c >= 0 && c <= 127) { return String.valueOf(c); } // Convert hexadecimal characters String s = Integer.toHexString(c); // Make up for less than four if (s.length() < 4) { s = "0000".substring(s.length(), 4) + s; } // Returns a Unicode encoded string return "\\u" + s; } /** * String to Unicode * @param str Character string * @return String * @Auth zongf * @Time 2019-04-28 */ public static String string2Unicode(String str) { // If the string is null or the length is 0, it will directly return if(str == null || str.length()==0) return str; StringBuffer unicodeSb = new StringBuffer(); for (char aChar : str.toCharArray()) { unicodeSb.append(char2Unicode(aChar)); } return unicodeSb.toString(); } /** * Convert Unicode characters * @param unicodeChar unicode character * @return char * @Auth zongf * @Time 2019-04-28 */ public static char unicode2char(String unicodeChar) { // If it is not a Unicode character, it returns a character scrambling if (!isUnicodeChar(unicodeChar)) return '?'; char c = (char) Integer.parseInt(unicodeChar.substring(2), 16); return c; } /** * unicode encoding in conversion string * @param unicode * @return String * @Auth zongf * @Time 2019-04-28 */ public static String unicode2String(String unicode) { // If it does not contain Unicode encoding, it directly returns if (!isContainsUnicodeChar(unicode)) { return unicode; } // Create a capture match expression Pattern compile = Pattern.compile("(" + unicodeParttenExp + ")"); Matcher matcher = compile.matcher(unicode); StringBuffer sb = new StringBuffer(); while (matcher.find()) { matcher.appendReplacement(sb, String.valueOf(unicode2char(matcher.group()))); } matcher.appendTail(sb); return sb.toString(); } }
2. Unit test
public class TestUnicodeUtil { // Test character to Unicode string @Test public void test(){ String str = UnicodeConverteUtil.char2Unicode('you'); System.out.println("you:" + str); } // Test string to Unicode string @Test public void test2(){ String str = "How do you do"; System.out.println(UnicodeConverteUtil.string2Unicode(str)); } // Determine whether the character is a Unicode string @Test public void test34(){ String str = "\\u554a"; System.out.println(str + ":" + UnicodeConverteUtil.isUnicodeChar(str)); } // Unicode conversion character @Test public void tes2t(){ String str = "\\u554a"; char c = UnicodeConverteUtil.unicode2char(str); System.out.println(str + ":" + c); } @Test public void te(){ String str = "Hello\\u4f60, \\u597d\\u554a, Friend"; System.out.println(UnicodeConverteUtil.unicode2String(str)); } }