Java 8 中的文件泛化函数
Generalizing a Function on Files in Java 8
我正在尝试使用 JDK 8(重新学习语言)在 Java 中编写索引程序。到目前为止,我有以下内容(省略包名):
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.Map;
public class Main {
public static Map<String,Integer> generateConcordance(String fileName) throws IOException {
Map<String,Integer> concordance = new HashMap<>();
for (String line : Files.readAllLines(Paths.get(fileName))) {
for (String word : line.split("[\p{Punct}\s]+")) {
concordance.merge(word, 1, Integer::sum);
}
}
return concordance;
}
public static void main(String[] args) {
if (args.length == 0) {
System.err.println("invalid operation - you must specify an input file");
} else {
try {
Map<String, Integer> concordance = generateConcordance(args[0]);
concordance.forEach((key,value) -> System.out.println(key + "\t" + value));
} catch (IOException error) {
System.err.println("I/O error - unable to read the file: " + args[0]);
}
}
}
}
这可行,但我想概括实现,以便在未传递任何参数的情况下能够从 System.in
读取输入。是否可以修改函数 generateConcordance
的输入以允许它应用于命名文件或 System.in
?
稍加重构并使用 Stream<String>
这应该非常简单。您可以尝试以下(未经测试):
public class Main {
private static final Pattern wordBreak = Pattern.compile("[\p{Punct}\s]+");
public static Map<String, Long> generateConcordance(Stream<String> lines) {
return lines
.flatMap(wordBreak::splitAsStream)
.collect(Collectors.groupingBy(Function.identity(), Collectors.counting()));
}
public static Map<String, Long> generateConcordance(String fileName) throws IOException {
try (Stream<String> lines = Files.lines(Paths.get(fileName))) {
return generateConcordance(lines);
}
}
public static Map<String, Long> generateConcordance(InputStream in) {
InputStreamReader reader = new InputStreamReader(in, StandardCharsets.UTF_8);
return generateConcordance(new BufferedReader(reader).lines());
}
public static void main(String[] args) {
try
{
generateConcordance("SomeRandomFile.txt");
generateConcordance(System.in);
}
catch (IOException e)
{
e.printStackTrace(System.err);
}
}
}
我正在尝试使用 JDK 8(重新学习语言)在 Java 中编写索引程序。到目前为止,我有以下内容(省略包名):
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.Map;
public class Main {
public static Map<String,Integer> generateConcordance(String fileName) throws IOException {
Map<String,Integer> concordance = new HashMap<>();
for (String line : Files.readAllLines(Paths.get(fileName))) {
for (String word : line.split("[\p{Punct}\s]+")) {
concordance.merge(word, 1, Integer::sum);
}
}
return concordance;
}
public static void main(String[] args) {
if (args.length == 0) {
System.err.println("invalid operation - you must specify an input file");
} else {
try {
Map<String, Integer> concordance = generateConcordance(args[0]);
concordance.forEach((key,value) -> System.out.println(key + "\t" + value));
} catch (IOException error) {
System.err.println("I/O error - unable to read the file: " + args[0]);
}
}
}
}
这可行,但我想概括实现,以便在未传递任何参数的情况下能够从 System.in
读取输入。是否可以修改函数 generateConcordance
的输入以允许它应用于命名文件或 System.in
?
稍加重构并使用 Stream<String>
这应该非常简单。您可以尝试以下(未经测试):
public class Main {
private static final Pattern wordBreak = Pattern.compile("[\p{Punct}\s]+");
public static Map<String, Long> generateConcordance(Stream<String> lines) {
return lines
.flatMap(wordBreak::splitAsStream)
.collect(Collectors.groupingBy(Function.identity(), Collectors.counting()));
}
public static Map<String, Long> generateConcordance(String fileName) throws IOException {
try (Stream<String> lines = Files.lines(Paths.get(fileName))) {
return generateConcordance(lines);
}
}
public static Map<String, Long> generateConcordance(InputStream in) {
InputStreamReader reader = new InputStreamReader(in, StandardCharsets.UTF_8);
return generateConcordance(new BufferedReader(reader).lines());
}
public static void main(String[] args) {
try
{
generateConcordance("SomeRandomFile.txt");
generateConcordance(System.in);
}
catch (IOException e)
{
e.printStackTrace(System.err);
}
}
}