如何检查字符是否来自 Java 中的特定字符集?
How to check if a character is from a specific charset in Java?
我有一种方法可以将文件中的一行从给定字符集(utf-8 或 latin1)转换为另一个字符集。从 utf-8 转换为 latin1 时,我应该只显示无法转换的字符(因为这是尝试从 utf-8 转换为 latin1 时发生的情况)。
我有兴趣检查单个字符是如何被翻译或未被翻译的。
private String transcodeLineFromTo(String string, Charset fromCharset, Charset toCharset) {
try {
ByteBuffer inputBuffer = ByteBuffer.wrap(string.getBytes(fromCharset));
CharBuffer data = fromCharset.decode(inputBuffer);
ByteBuffer outputBuffer = toCharset.encode(data);
byte[] outputData = outputBuffer.array();
String originalLine = new String(outputData);
String convertedLine = new String(outputData, toCharset);
StringBuilder notEncoded = new StringBuilder();
char[] lineCharacters = convertedLine.toCharArray();
for (char ch : lineCharacters) {
if () {
notEncoded.append();
}
}
if (!notEncoded.toString().equals("")) {
return notEncoded.toString();
} else {
return convertedLine;
}
} catch (Exception e) {
throw new IllegalStateException(e);
}
}
您可以通过调用 Charset canEncode
函数来检查编码是否可行。试试这个:
private String transcodeLineFromTo(String string, Charset fromCharset, Charset toCharset) {
try {
StringBuilder notEncoded = new StringBuilder();
char[] lineCharacters = string.toCharArray();
for (char ch : lineCharacters) {
if (!toCharset.newEncoder().canEncode(ch)) {
notEncoded.append(ch);
}
}
if (!notEncoded.toString().equals("")) {
return notEncoded.toString();
} else {
return new String(toCharset.newEncoder().encode(fromCharset.decode(ByteBuffer.wrap(string.getBytes(fromCharset)))).array(),toCharset);
}
} catch (Exception e) {
throw new IllegalStateException(e);
}
}
我有一种方法可以将文件中的一行从给定字符集(utf-8 或 latin1)转换为另一个字符集。从 utf-8 转换为 latin1 时,我应该只显示无法转换的字符(因为这是尝试从 utf-8 转换为 latin1 时发生的情况)。 我有兴趣检查单个字符是如何被翻译或未被翻译的。
private String transcodeLineFromTo(String string, Charset fromCharset, Charset toCharset) {
try {
ByteBuffer inputBuffer = ByteBuffer.wrap(string.getBytes(fromCharset));
CharBuffer data = fromCharset.decode(inputBuffer);
ByteBuffer outputBuffer = toCharset.encode(data);
byte[] outputData = outputBuffer.array();
String originalLine = new String(outputData);
String convertedLine = new String(outputData, toCharset);
StringBuilder notEncoded = new StringBuilder();
char[] lineCharacters = convertedLine.toCharArray();
for (char ch : lineCharacters) {
if () {
notEncoded.append();
}
}
if (!notEncoded.toString().equals("")) {
return notEncoded.toString();
} else {
return convertedLine;
}
} catch (Exception e) {
throw new IllegalStateException(e);
}
}
您可以通过调用 Charset canEncode
函数来检查编码是否可行。试试这个:
private String transcodeLineFromTo(String string, Charset fromCharset, Charset toCharset) {
try {
StringBuilder notEncoded = new StringBuilder();
char[] lineCharacters = string.toCharArray();
for (char ch : lineCharacters) {
if (!toCharset.newEncoder().canEncode(ch)) {
notEncoded.append(ch);
}
}
if (!notEncoded.toString().equals("")) {
return notEncoded.toString();
} else {
return new String(toCharset.newEncoder().encode(fromCharset.decode(ByteBuffer.wrap(string.getBytes(fromCharset)))).array(),toCharset);
}
} catch (Exception e) {
throw new IllegalStateException(e);
}
}