Data Extraction from CSV file and loading the data into database table
Question: - What is opencsv?
Answer: - It’s a simple csv parser library for java. It is available from Sourceforge. It supports all the basic features related to CSV files for extracting the data. opencsv was developed by Glen Smith. Kyle Miller contributed the bean binding work, Scott Conway has done bug fixing, and Sean Sullivan is the current maintainer of the project. He has added a feature to CSVWriter so that we can dump CSV file data into the database tables using writeAll() method.
Below dependency can be used to pull out opencsv.jar from maven repo
<dependency>
<groupId>net.sf.opencsv</groupId>
<artifactId>opencsv</artifactId>
<version>2.0</version>
</dependency>
Reference taken from: - http://opencsv.sourceforge.net/#what-is-opencsv
Opencsv is providing simple java utility that can be used to extract data from csv files and load into the database tables. The CSV files are parsed line by line and depends on that SQL query is generated. The SQL query is bonded with values and next the SQL query is added in the SQL batch. Each SQL batch is executed when the batch limit reached to 50.
Example of Data Extraction from csv (comma-separated values) file and inserting them into database table in Java.
System Requirements:-
Eclipse Editor or any other.
JDK 1.5 or higher(I am using jdk 1.7.0_03)
External jars.
Required Jars are:-
opencsv-1.8.jar
commons-lang.jar
mysql-connector-java-3.1.12.jar
Steps for creating Eclipse java project for CSV data extraction and inserting the data as records in the database table:-
- Create a java project in eclipse.
- Create a package in the src folder with the name as com.gaurav.csvload.
- Add the above required jars in the build path of the project.
- Create the DBConnection.java, DateExtracterAndParser.java, CSVReaderWriter.java and DumpCsvDataIntoDatabase.java files in this package and place the corresponding below available code in those files.
- Execute the DumpCsvDataIntoDatabase.java by selecting the option Run as Java Application.
CSV file Details:-
CSV Data:-
STUDENT_ID,FIRSTNAME,LASTNAME,BIRTHDATE,TOTAL_MARKS,DIVISION
1,Kumar,Gaurav,12.05.1978,920,FIRST
2,Gaurav,Kumar,19.11.1980,880,FIRST
3,Shivam,Kumar,11.07.1989,670,SECOND
Note: - Save above data as Student.csv file
Student Table Structure:-
/* DataBase Table description
CREATE TABLE Student (
STUDENT_ID NUMERIC(20),
FIRSTNAME VARCHAR(50) NOT NULL,
LASTNAME VARCHAR(50) NOT NULL,
BIRTHDATE DATE,
TOTAL_MARKS NUMERIC(20),
DIVISION VARCHAR(50) NOT NULL
)
*
*/
DBConnection.java
packagecom.gaurav.csvload;
importjava.sql.Connection;
importjava.sql.DriverManager;
importjava.sql.SQLException;
importjava.util.logging.Logger;
public classDBConnection {
privatestatic String JDBC_CONNECTION_URL = "jdbc:mysql://localhost:3306/test";
privatestatic String JDBC_USERNAME = "root";
privatestatic String JDBC_PASSWORD = "root";
privatefinal static Logger logger = Logger.getLogger(DBConnection.class
.getName());
/** This method will provide the database connection for mysql database */
publicstatic Connection getConnection() {
Connection con = null;
try{
Class.forName("com.mysql.jdbc.Driver");
con = DriverManager.getConnection(JDBC_CONNECTION_URL,
JDBC_USERNAME, JDBC_PASSWORD);
} catch (ClassNotFoundException e) {
logger.severe(e.getMessage());
} catch (SQLException e) {
logger.severe(e.getMessage());
}
returncon;
}
}
DateExtracterAndParser.java
packagecom.gaurav.csvload;
importjava.text.ParseException;
importjava.text.SimpleDateFormat;
importjava.util.ArrayList;
import java.util.Date;
import java.util.List;
importjava.util.logging.Logger;
public classDateExtracterAndParser {
privatefinal static Logger logger = Logger.getLogger(DateExtracterAndParser.class
.getName());
// This is the List of all date formats that we want to parse.
// Here, We can also add our own format for parsing purpose.
@SuppressWarnings("serial")
staticList<SimpleDateFormat> dateFormatsLst = newArrayList<SimpleDateFormat>() {
{
add(newSimpleDateFormat("dd.M.yyyy"));
add(newSimpleDateFormat("M/dd/yyyy"));
add(newSimpleDateFormat("dd.M.yyyy hh:mm:ss a"));
add(newSimpleDateFormat("M/dd/yyyy hh:mm:ss a"));
add(newSimpleDateFormat("dd.MMM.yyyy"));
add(newSimpleDateFormat("dd-MMM-yyyy"));
}
};
/**
* Convert String with various formats into java.util.Date
*
* @param strInput
* Date as a string
* @return java.util.Date object if strInput string which is data can be
* parsed successfully otherwise it will returns null
*/
publicstatic Date dateConversion(String strInput) {
Date date = null;
if(null == strInput) {
returnnull;
}
if(strInput.contains(".") || strInput.contains("/")
|| strInput.contains("-")) {
for(SimpleDateFormat format : dateFormatsLst) {
try{
format.setLenient(false);
date = format.parse(strInput);
} catch (ParseException e) {
logger.info(strInput
+ " is not matched with the Expected format of "
+ format.toPattern());
}
if(date != null) {
break;
}
}
}
returndate;
}
}
CSVReaderWriter.java
packagecom.gaurav.csvload;
importjava.io.FileNotFoundException;
importjava.io.FileReader;
importjava.sql.Connection;
importjava.sql.PreparedStatement;
import java.util.Date;
importjava.util.logging.Logger;
importorg.apache.commons.lang.StringUtils;
importau.com.bytecode.opencsv.CSVReader;
/**
* @author Gaurav
*
*/
public classCSVReaderWriter {
privatestatic final String PARSE_FIELD_NAME = "\\$\\{keys\\}";
privatestatic final String PARSE_FIELD_VALUE = "\\$\\{values\\}";
privatestatic final String SQL_INSERT_QUERY = "INSERT INTO ${table}(${keys}) VALUES(${values})";
privatestatic final String PARSE_TABLE_NAME = "\\$\\{table\\}";
privatefinal static Logger logger = Logger.getLogger(CSVReaderWriter.class
.getName());
privatefinal Connection con;
privatechar seprator;
/**
* This Constructor will help to build this class object with DB connection.
*
*/
publicCSVReaderWriter(Connection connection) {
this.con = connection;
this.seprator = ','; // It's a default separator
}
publicchar getSeprator() {
returnseprator;
}
publicvoid setSeprator(char seprator) {
this.seprator = seprator;
}
/**
* This method will help to Parse CSV file using OpenCSV library and load
* the CSV data in given database table.
*
* @param String
* csvFilename:- Name of the CSV file
* @param String
* dbTableName:- Name of the Database table where we want to
* insert data.
* @param boolean deleteTableDataBeforeLoad:- Delete the table data before inserting
* new records.
* @throws Exception
*/
publicvoid readWriteCSV(String csvFilename, String dbTableName,
booleandeleteTableDataBeforeLoad) throws Exception {
CSVReader csvReader = null;
if(null == this.con) {
thrownew Exception("Not a valid database connection.");
}
try{
csvReader = new CSVReader(new FileReader(csvFilename),
this.seprator);
} catch (Exception e) {
logger.severe(e.getMessage());
thrownew Exception("Error occured while executing file. "
+ e.getMessage());
}
String[] headerRow = csvReader.readNext();
if(null == headerRow) {
thrownew FileNotFoundException(
"No header column found in given CSV file."
+ "Please modify properly the CSV file format and provide the Header Column.");
}
String questionmarks = StringUtils.repeat("?,", headerRow.length);
questionmarks = (String) questionmarks.subSequence(0,
questionmarks.length() - 1);
String query = SQL_INSERT_QUERY.replaceFirst(PARSE_TABLE_NAME,
dbTableName);
query = query.replaceFirst(PARSE_FIELD_NAME,
StringUtils.join(headerRow, ","));
query = query.replaceFirst(PARSE_FIELD_VALUE, questionmarks);
logger.info("Query: " + query);
String[] nextLine;
Connection con = null;
PreparedStatement pstmt = null;
try{
con = this.con;
con.setAutoCommit(false);
pstmt = con.prepareStatement(query);
/**
* deleting the data from the existing table before loading csv
* file, if this boolean value is passed as true.
*/
if(deleteTableDataBeforeLoad) {
con.createStatement().execute("DELETE FROM " + dbTableName);
}
finalint batchSize = 50;
intcount = 0;
Date date = null;
while((nextLine = csvReader.readNext()) != null) {
if(null != nextLine) {
intindex = 1;
for(String string : nextLine) {
date = DateExtracterAndParser.dateConversion(string);
if(null != date) {
pstmt.setDate(index++,
newjava.sql.Date(date.getTime()));
} else {
pstmt.setString(index++, string);
}
}
pstmt.addBatch();
}
if(++count % batchSize == 0) {
pstmt.executeBatch();
}
}
// For insertion of remaining records
pstmt.executeBatch();
con.commit();
} catch (Exception e) {
con.rollback();
logger.severe(e.getMessage());
thrownew Exception(
"Error during loading data from CSV file into database table."
+ e.getMessage());
} finally {
if(null != pstmt)
pstmt.close();
if(null != con)
con.close();
csvReader.close();
}
}
}
DumpCsvDataIntoDatabase.java
packagecom.gaurav.csvload;
import java.util.logging.Logger;
public classDumpCsvDataIntoDatabase {
privatefinal static Loggerlogger = Logger
.getLogger(DumpCsvDataIntoDatabase.class.getName());
publicstatic void main(String args[]) {
try{
CSVReaderWriter csvReaderWriter = new CSVReaderWriter(
DBConnection.getConnection());
csvReaderWriter.readWriteCSV("C:/Student.csv", "Student", true);
logger.info("Data inserted successfully into database from Student.csv file");
} catch (Exception e) {
logger.severe(e.getMessage());
}
}
}
Result:-
Jul 06, 2013 6:01:10 AM com.gaurav.csvload.CSVReaderWriter readWriteCSV
INFO: Query: INSERT INTO Student(STUDENT_ID,FIRSTNAME,LASTNAME,BIRTHDATE,TOTAL_MARKS,DIVISION) VALUES(?,?,?,?,?,?)
Jul 06, 2013 6:01:10 AM com.gaurav.csvload.DumpCsvDataIntoDatabase main
INFO: Data inserted successfully into database from Student.csv file
Student table status in database after program execution:-
very useful stuff
ReplyDelete