UTF 中的字符串 indexOf 问题
String indexOf issues in UTF
如果第一个包含第二个,我会尝试突出显示字符串中的字符串。问题是,当父字符串包含重音字符时,indexOf
returns me 第一个匹配项的索引出错。 filterText 是要突出显示的字符串。例如。
@Override
public View getView(final int position, View convertView, ViewGroup parent) {
View rowView = convertView;
ViewHolder viewHolder;
if (rowView == null) {
LayoutInflater inflater = getLayoutInflater();
rowView = inflater.inflate(R.layout.list_row_search, parent, false);
// configure view holder
viewHolder = new ViewHolder();
viewHolder.tvName = (TextView) rowView.findViewById(R.id.tv_name);
viewHolder.tvDescription = (TextView) rowView.findViewById(R.id.tv_description);
rowView.setTag(viewHolder);
} else {
viewHolder = (ViewHolder) convertView.getTag();
}
viewHolder.tvName.setTypeface(tfBold);
viewHolder.tvDescription.setTypeface(tfRegular);
viewHolder.tvName.setText(getSpannedFromHtml(parkingList.get(position).getPostTitle()));
viewHolder.tvDescription.setText(getSpannedFromHtml(parkingList.get(position).getPostSubTitle()));
if(getUTF8Length(filterText) > 2){
if(parkingList.get(position).getPostTitle().toLowerCase(Locale.getDefault()).contains(filterText)) {
Log.d("AAA", "length 1: " + parkingList.get(position).getPostTitle().toLowerCase(Locale.getDefault()).length());
Log.d("AAA", "length 2: " + getUTF8Length(parkingList.get(position).getPostTitle().toLowerCase(Locale.getDefault()).toString()));
// Find all occurrences for the fist letter of filterText in Name
for (int cont = -1; (cont = parkingList.get(position).getPostTitle().toLowerCase(Locale.US).indexOf(filterText.toLowerCase(Locale.US), cont + 1)) != -1; ) {
String str1 = "";
String str2 = "";
String str3 = "";
Log.d("AAA", "parkingList.get(position).getPostTitle().toLowerCase(Locale.US).indexOf(filterText.toLowerCase(Locale.US)): " + parkingList.get(position).getPostTitle().toLowerCase(Locale.US).indexOf(filterText.toLowerCase(Locale.US)));
Log.d("AAA", "parkingList.get(position).getPostTitle().toLowerCase(Locale.US).indexOf(filterText.toLowerCase(Locale.US): " + parkingList.get(position).getPostTitle().toLowerCase(Locale.US).indexOf(filterText.toLowerCase(Locale.US)));
if (cont > 0)
str1 = viewHolder.tvName.getText().subSequence(0, cont).toString();
str2 = viewHolder.tvName.getText().subSequence(cont, cont + getUTF8Length(filterText)).toString();
if ((cont + getUTF8Length(filterText)) < getUTF8Length(viewHolder.tvName.getText().toString()))
str3 = viewHolder.tvName.getText().subSequence(cont + getUTF8Length(filterText), getUTF8Length(viewHolder.tvName.getText().toString())).toString();
String highlilightedString = str1 + "<font color = #ef6c00>" + str2 + "</font>" + str3;
viewHolder.tvName.setText(getSpannedFromHtml(highlilightedString));
}
}
if(parkingList.get(position).getPostSubTitle().toLowerCase(Locale.getDefault()).contains(filterText))
// Find all occurrences for the fist letter of filterText in Description
for(int cont = -1; (cont = parkingList.get(position).getPostSubTitle().toLowerCase(Locale.US).indexOf(filterText.toLowerCase(Locale.US), cont + 1)) != -1;){
String str1="";
String str2="";
String str3="";
if(cont > 0)
str1 = viewHolder.tvDescription.getText().subSequence(0, cont).toString();
str2 = viewHolder.tvDescription.getText().subSequence(cont, cont + filterText.length()).toString();
if((cont + filterText.length()) < viewHolder.tvDescription.getText().length() )
str3 = viewHolder.tvDescription.getText().subSequence(cont + filterText.length(), viewHolder.tvDescription.getText().length()).toString();
String highlilightedString = str1 + "<font color = #ef6c00>" + str2 + "</font>" + str3;
viewHolder.tvDescription.setText(getSpannedFromHtml(highlilightedString));
}
}
return rowView;
}
/**
*
*/
private int getUTF8Length (String str) {
int count = 0;
for(int cont = 0, len = str.length(); cont < len; cont++){
char ch = str.charAt(cont);
if(ch <= 0x7F){
count++;
} else if(ch <= 0x7FF){
cont+=2;
} else if(Character.isHighSurrogate(ch)){
count+=4;
cont++;
} else {
count+=3;
}
}
return count;
}
问题是如果 parkingList(位置)包含例如字符串 "Pokémon Shop" 并且 filterText 包含 "mon",indexOf 方法 returns 9 而不是 4。以下日志是例如,如果评估的链是 "Pokémon Shop" 并且要在其中搜索的字符串是 "mon":
D/AAA: length 1: 17
D/AAA: length 2: 17
D/AAA: parkingList.get(position).getPostTitle().toLowerCase(Locale.US).indexOf(filterText.toLowerCase(Locale.US), 0): 9
D/AAA: parkingList.get(position).getPostTitle().toLowerCase(Locale.US).indexOf(filterText.toLowerCase(Locale.US): 9
是因为你的getUTF8Length()。它给你错误的长度值。试试下面的代码。 (您可能需要调整正则表达式以满足您的字符串要求。)
private int getUTF8Length (String str) {
int count = 0;
List<String> characters=new ArrayList<String>();
Pattern pat = Pattern.compile("[\s*\p{L}*]\p{M}*");
Matcher matcher = pat.matcher(str);
while (matcher.find()) {
characters.add(matcher.group());
}
count = characters.size();
String s = characters.toString();
Log.d("LogDebug", s);
return count;
}
示例输出:
D/LogDebug: [P, o, k, é, m, o, n, , S, h, o, p]
D/LogDebug: getUTF8Length() = 12
PS。此代码基于 Java Unicode String length 并将正则表达式调整为字符串输入 "Pokémon Shop".
如果第一个包含第二个,我会尝试突出显示字符串中的字符串。问题是,当父字符串包含重音字符时,indexOf
returns me 第一个匹配项的索引出错。 filterText 是要突出显示的字符串。例如。
@Override
public View getView(final int position, View convertView, ViewGroup parent) {
View rowView = convertView;
ViewHolder viewHolder;
if (rowView == null) {
LayoutInflater inflater = getLayoutInflater();
rowView = inflater.inflate(R.layout.list_row_search, parent, false);
// configure view holder
viewHolder = new ViewHolder();
viewHolder.tvName = (TextView) rowView.findViewById(R.id.tv_name);
viewHolder.tvDescription = (TextView) rowView.findViewById(R.id.tv_description);
rowView.setTag(viewHolder);
} else {
viewHolder = (ViewHolder) convertView.getTag();
}
viewHolder.tvName.setTypeface(tfBold);
viewHolder.tvDescription.setTypeface(tfRegular);
viewHolder.tvName.setText(getSpannedFromHtml(parkingList.get(position).getPostTitle()));
viewHolder.tvDescription.setText(getSpannedFromHtml(parkingList.get(position).getPostSubTitle()));
if(getUTF8Length(filterText) > 2){
if(parkingList.get(position).getPostTitle().toLowerCase(Locale.getDefault()).contains(filterText)) {
Log.d("AAA", "length 1: " + parkingList.get(position).getPostTitle().toLowerCase(Locale.getDefault()).length());
Log.d("AAA", "length 2: " + getUTF8Length(parkingList.get(position).getPostTitle().toLowerCase(Locale.getDefault()).toString()));
// Find all occurrences for the fist letter of filterText in Name
for (int cont = -1; (cont = parkingList.get(position).getPostTitle().toLowerCase(Locale.US).indexOf(filterText.toLowerCase(Locale.US), cont + 1)) != -1; ) {
String str1 = "";
String str2 = "";
String str3 = "";
Log.d("AAA", "parkingList.get(position).getPostTitle().toLowerCase(Locale.US).indexOf(filterText.toLowerCase(Locale.US)): " + parkingList.get(position).getPostTitle().toLowerCase(Locale.US).indexOf(filterText.toLowerCase(Locale.US)));
Log.d("AAA", "parkingList.get(position).getPostTitle().toLowerCase(Locale.US).indexOf(filterText.toLowerCase(Locale.US): " + parkingList.get(position).getPostTitle().toLowerCase(Locale.US).indexOf(filterText.toLowerCase(Locale.US)));
if (cont > 0)
str1 = viewHolder.tvName.getText().subSequence(0, cont).toString();
str2 = viewHolder.tvName.getText().subSequence(cont, cont + getUTF8Length(filterText)).toString();
if ((cont + getUTF8Length(filterText)) < getUTF8Length(viewHolder.tvName.getText().toString()))
str3 = viewHolder.tvName.getText().subSequence(cont + getUTF8Length(filterText), getUTF8Length(viewHolder.tvName.getText().toString())).toString();
String highlilightedString = str1 + "<font color = #ef6c00>" + str2 + "</font>" + str3;
viewHolder.tvName.setText(getSpannedFromHtml(highlilightedString));
}
}
if(parkingList.get(position).getPostSubTitle().toLowerCase(Locale.getDefault()).contains(filterText))
// Find all occurrences for the fist letter of filterText in Description
for(int cont = -1; (cont = parkingList.get(position).getPostSubTitle().toLowerCase(Locale.US).indexOf(filterText.toLowerCase(Locale.US), cont + 1)) != -1;){
String str1="";
String str2="";
String str3="";
if(cont > 0)
str1 = viewHolder.tvDescription.getText().subSequence(0, cont).toString();
str2 = viewHolder.tvDescription.getText().subSequence(cont, cont + filterText.length()).toString();
if((cont + filterText.length()) < viewHolder.tvDescription.getText().length() )
str3 = viewHolder.tvDescription.getText().subSequence(cont + filterText.length(), viewHolder.tvDescription.getText().length()).toString();
String highlilightedString = str1 + "<font color = #ef6c00>" + str2 + "</font>" + str3;
viewHolder.tvDescription.setText(getSpannedFromHtml(highlilightedString));
}
}
return rowView;
}
/**
*
*/
private int getUTF8Length (String str) {
int count = 0;
for(int cont = 0, len = str.length(); cont < len; cont++){
char ch = str.charAt(cont);
if(ch <= 0x7F){
count++;
} else if(ch <= 0x7FF){
cont+=2;
} else if(Character.isHighSurrogate(ch)){
count+=4;
cont++;
} else {
count+=3;
}
}
return count;
}
问题是如果 parkingList(位置)包含例如字符串 "Pokémon Shop" 并且 filterText 包含 "mon",indexOf 方法 returns 9 而不是 4。以下日志是例如,如果评估的链是 "Pokémon Shop" 并且要在其中搜索的字符串是 "mon":
D/AAA: length 1: 17
D/AAA: length 2: 17
D/AAA: parkingList.get(position).getPostTitle().toLowerCase(Locale.US).indexOf(filterText.toLowerCase(Locale.US), 0): 9
D/AAA: parkingList.get(position).getPostTitle().toLowerCase(Locale.US).indexOf(filterText.toLowerCase(Locale.US): 9
是因为你的getUTF8Length()。它给你错误的长度值。试试下面的代码。 (您可能需要调整正则表达式以满足您的字符串要求。)
private int getUTF8Length (String str) {
int count = 0;
List<String> characters=new ArrayList<String>();
Pattern pat = Pattern.compile("[\s*\p{L}*]\p{M}*");
Matcher matcher = pat.matcher(str);
while (matcher.find()) {
characters.add(matcher.group());
}
count = characters.size();
String s = characters.toString();
Log.d("LogDebug", s);
return count;
}
示例输出:
D/LogDebug: [P, o, k, é, m, o, n, , S, h, o, p]
D/LogDebug: getUTF8Length() = 12
PS。此代码基于 Java Unicode String length 并将正则表达式调整为字符串输入 "Pokémon Shop".