使用 JAVA 将字符串拆分成多个部分并存储到 CSV
String splitting into parts and store to CSV using JAVA
我有不同格式的数据。我希望首先从输入字符串中提取 Quantity,然后提取 Unit,其余文本应视为 项目。
Code.java
public String itemsProcesing(List<String> items) throws IOException {
List<String> list = items;
List<String> unitList = Arrays.asList("g", "tbsp", "cm", "kg"");
List<String> quantityList = Arrays.asList("Full", "Quarter", "Half", "3 Quarter", "1", "2", "3", "4", "5", "6", "7", "8", "9", "0");
for (String s : list) {
String[] strArr = s.split(" ");
if (strArr.length == 2) {
String newStr = rewriteString(strArr, quantityList);
strArr = newStr.split(" ");
}
String[] itemLine = new String[3];
for (int i = 0; i < strArr.length; i++) {
String str = strArr[i];
int index = findValueLocation(str, unitList);
itemLine[index] = str;
}
String line = createLineForCSV(itemLine);
writeToFile(line);
}
return "done";
}
private static int findValueLocation(String str, List<String> unitList) {
boolean b = Pattern.matches("\d{1,3}|\d/\d|\d/\d[*]\d|\d{1,3}[*]\d{1,3}|\d{1,3}[*]\d{1,3}|\d{1,3}[-]\d{1,3}|\d{1,3} [-] \d{1,3}|\d.\d|\\d{1,3} - \\d{1,3}| \\d{1,3} - \\d{1,3}| \\d{1,3}-\\d{1,3}", str);
if (b) {
return 0;
}
for (String unit : unitList) {
if (unit.equals(str)) {
return 1;
}
}
return 2;
}
private static String createLineForCSV(String[] itemLine) {
StringBuilder sb = new StringBuilder();
sb.append(itemLine[0]).append(",");
sb.append(itemLine[1]).append(",");
sb.append(itemLine[2]);
return sb.toString();
}
private static void writeToFile(String line) throws IOException {
BufferedWriter writer = new BufferedWriter(new FileWriter("csv_file.csv", true));
writer.write(line);
writer.newLine();
writer.close();
}
private static String rewriteString(String[] arr, List<String> quantityList) {
String strOne = arr[0];
String strTwo = arr[1];
String newStr = "";
for (String quantity : quantityList) {
if (strOne.contains(quantity)) {
// 8g carrots becomes "8 g carrots"
newStr = quantity + " " + strOne.substring(quantity.length()) + " " + strTwo;
break;
} else if (strTwo.contains(quantity)) {
newStr = quantity + " " + strTwo.substring(quantity.length()) + " " + strOne;
break;
}
}
return newStr;
}
Input data
1-2 tbsp soya sauce
Ouptut : 1-2,tbsp,sauce
Required Output: 1-2,tbsp,soya sauce
如何获得所需的结果。
问题出在项目处理的这一行:
int index = findValueLocation(str, unitList);
itemLine[index] = str;
'findValueLocation' 函数将为字符串 'soya' 和 'sauce' return 2。现在它覆盖字符串 'soya' 并在索引 2 上写入 'sauce'。如果 returned 的值为 2,则应连接字符串。
`
int index = findValueLocation(str, unitList);
if(index == 2){
itemLine[index] += str;
}else{
itemLine[index] = str;
}
`
你可以把它全部放在一个可怕的正则表达式中
(可能需要一些微调)
"((\d+([/\\*]\d+)?)\s*(-\s*\d+([/\\*]\d+)?)?\s*(Full|Quarter|Half|3 Quarter)?)\s*(g|tbsp|cm|kg)?\s*(\w*.*)"
然后选择第 1 组、第 7 组和第 8 组。
我不太确定在哪里放置季度等、数量、单位或两者,在您使用它进行格式化的源代码中,正如我所见...
import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class RecepieSplitter {
private static final Pattern INGREDIENT_PATTERN = Pattern.compile(
"((\d+([/\\*]\d+)?)\s*(-\s*\d+([/\\*]\d+)?)?\s*(Full|Quarter|Half|3 Quarter)?)\s*(g|tbsp|cm|kg)?\s*(\w*.*)");
public static List<String> getParts(String ingredient) {
Matcher matcher = INGREDIENT_PATTERN.matcher(ingredient);
if (matcher.matches()) {
return Arrays.asList(matcher.group(1), matcher.group(7), matcher.group(8));
}
return null;
}
public static void main(String[] args) {
System.out.println(RecepieSplitter.getParts("1-2 tbsp soya sauce"));
System.out.println(RecepieSplitter.getParts("8g carorts"));
System.out.println(RecepieSplitter.getParts("1/4kg carorts"));
System.out.println(RecepieSplitter.getParts("1/4 - 1/2g carorts"));
System.out.println(RecepieSplitter.getParts("1 - 2Quarter carorts"));
}
}
输出:
[1-2 , tbsp, soya sauce]
[8, g, carorts]
[1/4, kg, carorts]
[1/4 - 1/2, g, carorts]
[1 - 2Quarter, null, carorts]
我认为仅建立一个详尽的测量单位列表并沿着它分割字符串就足够了。那就不用解析金额了。
像这样:
Pattern p=Pattern.compile("(.*?)\s*(tbsp|k?g|cup|c?m)\s*(.*)");
List<String> tests=Arrays.asList(
"16g salt",
"1 kg apple",
"1 1/2 tbsp sugar");
for(String s:tests){
Matcher m=p.matcher(s);
if(m.matches())
System.out.println(Arrays.asList(m.group(1),m.group(2),m.group(3)));
}
输出(来自https://ideone.com/9kbozK):
[16, g, salt]
[1, kg, apple]
[1 1/2, tbsp, sugar]
在现实生活中应该有一个 else
分支来捕获输入中可能的错误(或无法识别的单元)。
我有不同格式的数据。我希望首先从输入字符串中提取 Quantity,然后提取 Unit,其余文本应视为 项目。
Code.java
public String itemsProcesing(List<String> items) throws IOException {
List<String> list = items;
List<String> unitList = Arrays.asList("g", "tbsp", "cm", "kg"");
List<String> quantityList = Arrays.asList("Full", "Quarter", "Half", "3 Quarter", "1", "2", "3", "4", "5", "6", "7", "8", "9", "0");
for (String s : list) {
String[] strArr = s.split(" ");
if (strArr.length == 2) {
String newStr = rewriteString(strArr, quantityList);
strArr = newStr.split(" ");
}
String[] itemLine = new String[3];
for (int i = 0; i < strArr.length; i++) {
String str = strArr[i];
int index = findValueLocation(str, unitList);
itemLine[index] = str;
}
String line = createLineForCSV(itemLine);
writeToFile(line);
}
return "done";
}
private static int findValueLocation(String str, List<String> unitList) {
boolean b = Pattern.matches("\d{1,3}|\d/\d|\d/\d[*]\d|\d{1,3}[*]\d{1,3}|\d{1,3}[*]\d{1,3}|\d{1,3}[-]\d{1,3}|\d{1,3} [-] \d{1,3}|\d.\d|\\d{1,3} - \\d{1,3}| \\d{1,3} - \\d{1,3}| \\d{1,3}-\\d{1,3}", str);
if (b) {
return 0;
}
for (String unit : unitList) {
if (unit.equals(str)) {
return 1;
}
}
return 2;
}
private static String createLineForCSV(String[] itemLine) {
StringBuilder sb = new StringBuilder();
sb.append(itemLine[0]).append(",");
sb.append(itemLine[1]).append(",");
sb.append(itemLine[2]);
return sb.toString();
}
private static void writeToFile(String line) throws IOException {
BufferedWriter writer = new BufferedWriter(new FileWriter("csv_file.csv", true));
writer.write(line);
writer.newLine();
writer.close();
}
private static String rewriteString(String[] arr, List<String> quantityList) {
String strOne = arr[0];
String strTwo = arr[1];
String newStr = "";
for (String quantity : quantityList) {
if (strOne.contains(quantity)) {
// 8g carrots becomes "8 g carrots"
newStr = quantity + " " + strOne.substring(quantity.length()) + " " + strTwo;
break;
} else if (strTwo.contains(quantity)) {
newStr = quantity + " " + strTwo.substring(quantity.length()) + " " + strOne;
break;
}
}
return newStr;
}
Input data 1-2 tbsp soya sauce
Ouptut : 1-2,tbsp,sauce
Required Output: 1-2,tbsp,soya sauce
如何获得所需的结果。
问题出在项目处理的这一行:
int index = findValueLocation(str, unitList);
itemLine[index] = str;
'findValueLocation' 函数将为字符串 'soya' 和 'sauce' return 2。现在它覆盖字符串 'soya' 并在索引 2 上写入 'sauce'。如果 returned 的值为 2,则应连接字符串。
`
int index = findValueLocation(str, unitList);
if(index == 2){
itemLine[index] += str;
}else{
itemLine[index] = str;
}
`
你可以把它全部放在一个可怕的正则表达式中 (可能需要一些微调)
"((\d+([/\\*]\d+)?)\s*(-\s*\d+([/\\*]\d+)?)?\s*(Full|Quarter|Half|3 Quarter)?)\s*(g|tbsp|cm|kg)?\s*(\w*.*)"
然后选择第 1 组、第 7 组和第 8 组。
我不太确定在哪里放置季度等、数量、单位或两者,在您使用它进行格式化的源代码中,正如我所见...
import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class RecepieSplitter {
private static final Pattern INGREDIENT_PATTERN = Pattern.compile(
"((\d+([/\\*]\d+)?)\s*(-\s*\d+([/\\*]\d+)?)?\s*(Full|Quarter|Half|3 Quarter)?)\s*(g|tbsp|cm|kg)?\s*(\w*.*)");
public static List<String> getParts(String ingredient) {
Matcher matcher = INGREDIENT_PATTERN.matcher(ingredient);
if (matcher.matches()) {
return Arrays.asList(matcher.group(1), matcher.group(7), matcher.group(8));
}
return null;
}
public static void main(String[] args) {
System.out.println(RecepieSplitter.getParts("1-2 tbsp soya sauce"));
System.out.println(RecepieSplitter.getParts("8g carorts"));
System.out.println(RecepieSplitter.getParts("1/4kg carorts"));
System.out.println(RecepieSplitter.getParts("1/4 - 1/2g carorts"));
System.out.println(RecepieSplitter.getParts("1 - 2Quarter carorts"));
}
}
输出:
[1-2 , tbsp, soya sauce]
[8, g, carorts]
[1/4, kg, carorts]
[1/4 - 1/2, g, carorts]
[1 - 2Quarter, null, carorts]
我认为仅建立一个详尽的测量单位列表并沿着它分割字符串就足够了。那就不用解析金额了。
像这样:
Pattern p=Pattern.compile("(.*?)\s*(tbsp|k?g|cup|c?m)\s*(.*)");
List<String> tests=Arrays.asList(
"16g salt",
"1 kg apple",
"1 1/2 tbsp sugar");
for(String s:tests){
Matcher m=p.matcher(s);
if(m.matches())
System.out.println(Arrays.asList(m.group(1),m.group(2),m.group(3)));
}
输出(来自https://ideone.com/9kbozK):
[16, g, salt] [1, kg, apple] [1 1/2, tbsp, sugar]
在现实生活中应该有一个 else
分支来捕获输入中可能的错误(或无法识别的单元)。