过多的迭代使数据结构混乱
superabundance of iterations cluttering data structures
我希望我的输出看起来像这样:
/home/flavius/data/train/politics/p_0.txt, [L'Etat,, c'est, moi.]
/home/flavius/data/train/science/s_0.txt, [If, I, have, seen, further, it, is, by, standing, on, the, shoulders, of, giants.]
/home/flavius/data/train/atheism/a_0.txt, [Gott, ist, tot.]
/home/flavius/data/train/sports/s_1.txt, [You, miss, 100%, of, the, shots, you, don't, take.]
但在这个时候,它看起来像下面这样,前面多了四行:
/home/flavius/data/train/atheism/a_0.txt
/home/flavius/data/train/politics/p_0.txt
/home/flavius/data/train/science/s_0.txt
/home/flavius/data/train/sports/s_1.txt
/home/flavius/data/train/politics/p_0.txt, [L'Etat,, c'est, moi.]
/home/flavius/data/train/science/s_0.txt, [If, I, have, seen, further, it, is, by, standing, on, the, shoulders, of, giants.]
/home/flavius/data/train/atheism/a_0.txt, [Gott, ist, tot.]
/home/flavius/data/train/sports/s_1.txt, [You, miss, 100%, of, the, shots, you, don't, take.]
我的问题是,为什么要添加前四行?
程序读取四个目录下的不同文件,然后为每个文件在hashmap中创建一个条目,以文件名作为键,并将该文件中包含的所有单词存储为数组列表。
这是代码,非常简单。也许有人可以发现我哪里出错了。
public class FileDictCreateur
{
static String PATH = "/home/flavius/data/train";
static Map<File, ArrayList<String> > fileDict = new HashMap<>();
public static void main(String[] args) throws IOException
{
//each of the diferent categories
String[] categories = { "/atheism", "/politics", "/science", "/sports"};
//cycle through all categories once to populate the global dict
for(int cycle = 0; cycle <= 3; cycle++)
{
String general_data_partition = PATH + categories[cycle];
File directory = new File( general_data_partition );
iterateDirectory( directory );
}
for (Map.Entry entry : fileDict.entrySet())
{
System.out.println(entry.getKey() + ", " + entry.getValue());
}
}
private static void iterateDirectory(File directory) throws IOException
{
for (File file : directory.listFiles())
{
if (file.isDirectory())
{
iterateDirectory(directory);
}
else
{
System.out.println(file);
String line;
BufferedReader br = new BufferedReader(new FileReader( file ));
while ((line = br.readLine()) != null)
{
String[] words = line.split(" ");//those are your words
//populate_globo_dict(words);
create_file_dict( file, words );
}
}
}
}
public static void create_file_dict( File file, String[] words ) throws IOException
{
if (!fileDict.containsKey(file))
{
ArrayList document_words = new ArrayList<String>();
String word;
for (int i = 0; i < words.length; i++)
{
word = words[i];
document_words.add(word);
}
fileDict.put(file, document_words);
}
}
}
是的,在 iterateDirectory()
里面 else
你有 System.out.println(file)
.
这意味着每次到达文件而不是目录时,都会打印其名称。
来自这个 for 循环:
for (int cycle = 0; cycle <= 3; cycle++) {
String general_data_partition = PATH + categories[cycle];
File directory = new File(general_data_partition);
System.out.println(directory);
iterateDirectory(directory);
}
删除以下语句:System.out.println(directory);
打印目录名称。
我希望我的输出看起来像这样:
/home/flavius/data/train/politics/p_0.txt, [L'Etat,, c'est, moi.]
/home/flavius/data/train/science/s_0.txt, [If, I, have, seen, further, it, is, by, standing, on, the, shoulders, of, giants.]
/home/flavius/data/train/atheism/a_0.txt, [Gott, ist, tot.]
/home/flavius/data/train/sports/s_1.txt, [You, miss, 100%, of, the, shots, you, don't, take.]
但在这个时候,它看起来像下面这样,前面多了四行:
/home/flavius/data/train/atheism/a_0.txt
/home/flavius/data/train/politics/p_0.txt
/home/flavius/data/train/science/s_0.txt
/home/flavius/data/train/sports/s_1.txt
/home/flavius/data/train/politics/p_0.txt, [L'Etat,, c'est, moi.]
/home/flavius/data/train/science/s_0.txt, [If, I, have, seen, further, it, is, by, standing, on, the, shoulders, of, giants.]
/home/flavius/data/train/atheism/a_0.txt, [Gott, ist, tot.]
/home/flavius/data/train/sports/s_1.txt, [You, miss, 100%, of, the, shots, you, don't, take.]
我的问题是,为什么要添加前四行?
程序读取四个目录下的不同文件,然后为每个文件在hashmap中创建一个条目,以文件名作为键,并将该文件中包含的所有单词存储为数组列表。
这是代码,非常简单。也许有人可以发现我哪里出错了。
public class FileDictCreateur
{
static String PATH = "/home/flavius/data/train";
static Map<File, ArrayList<String> > fileDict = new HashMap<>();
public static void main(String[] args) throws IOException
{
//each of the diferent categories
String[] categories = { "/atheism", "/politics", "/science", "/sports"};
//cycle through all categories once to populate the global dict
for(int cycle = 0; cycle <= 3; cycle++)
{
String general_data_partition = PATH + categories[cycle];
File directory = new File( general_data_partition );
iterateDirectory( directory );
}
for (Map.Entry entry : fileDict.entrySet())
{
System.out.println(entry.getKey() + ", " + entry.getValue());
}
}
private static void iterateDirectory(File directory) throws IOException
{
for (File file : directory.listFiles())
{
if (file.isDirectory())
{
iterateDirectory(directory);
}
else
{
System.out.println(file);
String line;
BufferedReader br = new BufferedReader(new FileReader( file ));
while ((line = br.readLine()) != null)
{
String[] words = line.split(" ");//those are your words
//populate_globo_dict(words);
create_file_dict( file, words );
}
}
}
}
public static void create_file_dict( File file, String[] words ) throws IOException
{
if (!fileDict.containsKey(file))
{
ArrayList document_words = new ArrayList<String>();
String word;
for (int i = 0; i < words.length; i++)
{
word = words[i];
document_words.add(word);
}
fileDict.put(file, document_words);
}
}
}
是的,在 iterateDirectory()
里面 else
你有 System.out.println(file)
.
这意味着每次到达文件而不是目录时,都会打印其名称。
来自这个 for 循环:
for (int cycle = 0; cycle <= 3; cycle++) {
String general_data_partition = PATH + categories[cycle];
File directory = new File(general_data_partition);
System.out.println(directory);
iterateDirectory(directory);
}
删除以下语句:System.out.println(directory);
打印目录名称。