在 Java 中创建 .CSV 文件时日期格式受到干扰

Date Format getting disturb when creating .CSV file in Java

我正在创建网络抓取工具,然后将数据存储在 .CSV 文件中。 我的程序 运行 没问题,但是有一个问题,我从中检索数据的网站有一个 (Month Day, Year) 格式的日期。因此,当我将数据保存在 .CSV 文件中时,它会将 Year 视为另一列,因此所有数据都会被操纵。我实际上想将该数据存储到 (MM-MON-YYYY) 并将有效日期存储在一列中。我在下面 post 编写我的代码。请帮帮我。谢谢!

P.S:很抱歉没有把我想要的格式写在原文中post。

package com.mufapscraping;

//import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
//import java.util.Collections;
import java.util.Iterator;
//import java.util.List;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class ComMufapScraping {

    boolean writeCSVToConsole = true;
    boolean writeCSVToFile = true;
    //String destinationCSVFile = "C:\convertedCSV.csv";
    boolean sortTheList = true;
    boolean writeToConsole;
    boolean writeToFile;
    public static Document doc = null;
    public static Elements tbodyElements = null;
    public static Elements elements = null;
    public static Elements tdElements = null;
    public static Elements trElement2 = null;
    public static String Dcomma = ", 2";
    public static ArrayList<Elements> sampleList = new ArrayList<Elements>();

    public static void createConnection() throws IOException {
        System.setProperty("http.proxyHost", "191.1.1.123");
        System.setProperty("http.proxyPort", "8080");
        String tempUrl = "http://www.mufap.com.pk/nav_returns_performance.php?tab=01";
        doc = Jsoup.connect(tempUrl).get();
    }

    public static void parsingHTML() throws Exception {
        for (int i = 1; i <= 1; i++) {

            tbodyElements = doc.getElementsByTag("tbody");
            //Element table = doc.getElementById("dataTable");

            if (tbodyElements.isEmpty()) {
                throw new Exception("Table is not found");
            }
            elements = tbodyElements.get(0).getElementsByTag("tr");

            for (Element trElement : elements) {
                trElement2 = trElement.getElementsByTag("tr");
                tdElements = trElement.getElementsByTag("td");
                FileWriter sb = new FileWriter("C:\convertedCSV2.csv", true);
                for (Iterator<Element> it = tdElements.iterator(); it.hasNext();) {
                    if (it.hasNext()) {
                        sb.append("  \n  ");
                    }
                    for (Iterator<Element> it2 = trElement2.iterator(); it.hasNext();) {
                        Element tdElement = it.next();
                        sb.append(tdElement.text());
                        if (it2.hasNext()) {
                            sb.append("   ,   ");
                        }

                    }

                    System.out.println(sb.toString());
                    sb.flush();
                    sb.close();
                }

                System.out.println(sampleList.add(tdElements));
                /* for (Elements elements2 : zakazky) {
                System.out.println(elements2);
            }*/

            }
        }
    }

    public static void main(String[] args) throws IOException, Exception {
        createConnection();
        parsingHTML();

    }

}

这可以通过简单地用双引号括起您的数据来实现,因此 month day, year 将变为 "month day, year"。这是为您完成这项工作的修改后的代码:

package com.mufapscraping;

//import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
//import java.util.Collections;
import java.util.Iterator;
//import java.util.List;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class ComMufapScraping {

    boolean writeCSVToConsole = true;
    boolean writeCSVToFile = true;
    //String destinationCSVFile = "C:\convertedCSV.csv";
    boolean sortTheList = true;
    boolean writeToConsole;
    boolean writeToFile;
    public static Document doc = null;
    public static Elements tbodyElements = null;
    public static Elements elements = null;
    public static Elements tdElements = null;
    public static Elements trElement2 = null;
    public static String Dcomma = ", 2";
    public static ArrayList<Elements> sampleList = new ArrayList<Elements>();

    public static void createConnection() throws IOException {
        System.setProperty("http.proxyHost", "191.1.1.123");
        System.setProperty("http.proxyPort", "8080");
        String tempUrl = "http://www.mufap.com.pk/nav_returns_performance.php?tab=01";
        doc = Jsoup.connect(tempUrl).get();
    }

    public static void parsingHTML() throws Exception {
        for (int i = 1; i <= 1; i++) {

            tbodyElements = doc.getElementsByTag("tbody");
            //Element table = doc.getElementById("dataTable");

            if (tbodyElements.isEmpty()) {
                throw new Exception("Table is not found");
            }
            elements = tbodyElements.get(0).getElementsByTag("tr");

            for (Element trElement : elements) {
                trElement2 = trElement.getElementsByTag("tr");
                tdElements = trElement.getElementsByTag("td");
                FileWriter sb = new FileWriter("C:\convertedCSV2.csv", true);
                for (Iterator<Element> it = tdElements.iterator(); it.hasNext();) {
                    if (it.hasNext()) {
                        sb.append("  \n  ");
                    }
                    for (Iterator<Element> it2 = trElement2.iterator(); it.hasNext();) {
                        Element tdElement = it.next();
                        sb.append('\"'); // surround your data
                        sb.append(tdElement.text());
                        sb.append('\"'); // with double quotes
                        if (it2.hasNext()) {

                            sb.append("   ,   ");
                        }

                    }

                    System.out.println(sb.toString());
                    sb.flush();
                    sb.close();
                }

                System.out.println(sampleList.add(tdElements));
                /* for (Elements elements2 : zakazky) {
                System.out.println(elements2);
            }*/

            }
        }
    }

    public static void main(String[] args) throws IOException, Exception {
        createConnection();
        parsingHTML();

    }

}

那么你确实想拆分它。 ok,然后修改第一行,增加"year,"列:

Element tdElement = it.next();
final String content = tdElement.text()
sb.append(content);
if (it2.hasNext()) {
    sb.append("   ,   ");
if (content.equals("Validity Date"))
    sb.append("Validity Year,");

您可能想在 for 之后中断?或者你将覆盖文件 elements.size()-1 次...

FileWriter sb = new FileWriter("C:\convertedCSV2.csv", true);
for (Iterator<Element> it = tdElements.iterator(); it.hasNext();) { ... }
break;

您可以使用 cssSelector 而不是多次使用 getElementsByTag 方法,这会更容易,并且使您能够在几行代码中获得相同的输出

public static void main (String []args) throws IOException{
    String tempUrl = "http://www.mufap.com.pk/nav_returns_performance.php?tab=01";
    Document doc = Jsoup.connect(tempUrl).get();

    Elements trElements = doc.select("#dataTable tbody tr");
    FileWriter sb = new FileWriter("C:\convertedCSV2.csv", true);
    for(Element tr : trElements){
        Elements tdElements = tr.select("td");
        for (Element td : tdElements){
        sb.append(td.text());
        sb.append(";");
        }
        sb.append("\n");
    }
}

不要直接添加 FileWriter 中的元素文本,而是先对其进行格式化,然后再添加。

因此,替换以下行:

sb.append(tdElement.text());

进入

sb.append(formatData(tdElement.text()));

private static final SimpleDateFormat FORMATTER_MMM_d_yyyy = new SimpleDateFormat("MMM d, yyyy", Locale.US);
private static final SimpleDateFormat FORMATTER_dd_MMM_yyyy = new SimpleDateFormat("dd-MMM-YYYY", Locale.US);

public static String formatData(String text) {
    String tmp = null;

    try {
        Date d = FORMATTER_MMM_d_yyyy.parse(text);
        tmp = FORMATTER_dd_MMM_yyyy.format(d);
    } catch (ParseException pe) {
        tmp = text;
    }

    return tmp;
}

样本

public static void main(String[] args) {
    String[] fields = new String[] { //
            "ABL Cash Fund", //
            "AA(f)", //
            "Apr 18, 2016", //
            "10.4729" //
    };

    for (String field : fields) {
        System.out.format("%s\n%s\n\n", field, formatData(field));
    }
}

输出

ABL Cash Fund
ABL Cash Fund

AA(f)
AA(f)

Apr 18, 2016
18-Apr-2016

10.4729
10.4729