Akash Akash - 26 days ago 9
Apache Configuration Question

Unable to write new excel using Apache POI after removing duplicate rows

I am new to Apache POI.

I have written a small code for removing duplicate records from a excel file. I am successfully able to identify the duplicate records across sheets but when writing to a new file after removing records, no output is being generated.

Please help where I am goin wrong?

Am I writing properly ?? Or am missing something?

public static void main(String args[]) {
DataFormatter formatter = new DataFormatter();
HSSFWorkbook input_workbook;
HSSFWorkbook workbook_Output_Final;

HSSFSheet input_workbook_sheet;

HSSFRow row_Output;
HSSFRow row_1_index;
HSSFRow row_2_index;

String value1 = "";
String value2 = "";
int count;


//main try catch block starts
try {

FileInputStream input_file = new FileInputStream("E:\\TEST\\Output.xls"); //reading from input file
input_workbook = new HSSFWorkbook(new POIFSFileSystem(input_file));

for (int sheetnum = 0; sheetnum < input_workbook.getNumberOfSheets(); sheetnum++) { //traversing sheets

input_workbook_sheet = input_workbook.getSheetAt(sheetnum);

int input_workbook_sheet_total_row = input_workbook_sheet.getLastRowNum(); //fetching last row nmber

for (int input_workbook_sheet_row_1 = 0; input_workbook_sheet_row_1 <= input_workbook_sheet_total_row; input_workbook_sheet_row_1++) { //traversing row 1

for (int input_workbook_sheet_row_2 = 0; input_workbook_sheet_row_2 <= input_workbook_sheet_total_row; input_workbook_sheet_row_2++) {

row_1_index = input_workbook_sheet.getRow(input_workbook_sheet_row_1); //fetching one iteration row index
row_2_index = input_workbook_sheet.getRow(input_workbook_sheet_row_2); //fetching sec iteration row index

if (row_1_index != row_2_index) {
count = 0;
value1 = "";
value2 = "";
for (int row_1_index_cell = 0; row_1_index_cell < row_1_index.getLastCellNum(); row_1_index_cell++) { //traversing cell for each row
try {
value1 = value1 + formatter.formatCellValue(row_1_index.getCell(row_1_index_cell)); //fetching row cells value
value2 = value2 + formatter.formatCellValue(row_2_index.getCell(row_1_index_cell)); //fetching row cells value

} catch (NullPointerException e) {
}
count++;
if (count == row_1_index.getLastCellNum()) {

if (value1.hashCode() == value2.hashCode()) { //remove the duplicate logic
System.out.println("deleted : " + row_2_index);
System.out.println("------------------");
input_workbook_sheet.removeRow(row_2_index);
}

}
}

}
}
}

}
FileOutputStream fileOut = new FileOutputStream("E:\\TEST\\workbook.xls");
input_workbook.write(fileOut);
fileOut.close();
input_file.close();
} catch (Exception e) {
//e.printStackTrace();
}
//main try catch block ends

}

Answer Source

A couple of things to note:

  1. you swallow any kind of Exception; Igotsome nullpointers with my test data, and that would prevent the workbook from being written

  2. when removing rows, it is an old trick to move backwards through the row numbers because then you don't have to adjust for the row number you have just removed

  3. the code empties the row, but it doesn't move all rows upwards (=there is a gap after the delete). If you want to remove that gap, you can work with shiftRows

  4. you compare things by hashcode, which is possible (in some use cases), but I feel like .equals() is what you want to do. See also Relationship between hashCode and equals method in Java

Here's some code that worked for my test data, feel free to comment if something doesn't work with your data:

public static void main(String args[]) throws IOException {
    DataFormatter formatter = new DataFormatter();
    HSSFWorkbook input_workbook;
    HSSFWorkbook workbook_Output_Final;

    HSSFSheet input_workbook_sheet;

    HSSFRow row_Output;
    HSSFRow row_1_index;
    HSSFRow row_2_index;

    String value1 = "";
    String value2 = "";
    int count;

    FileInputStream input_file = new FileInputStream("c:\\temp\\test.xls");
    input_workbook = new HSSFWorkbook(new POIFSFileSystem(input_file));

    for (int sheetnum = 0; sheetnum < input_workbook.getNumberOfSheets(); sheetnum++) {

        input_workbook_sheet = input_workbook.getSheetAt(sheetnum);

        int input_workbook_sheet_total_row = input_workbook_sheet.getLastRowNum(); 

        for (int input_workbook_sheet_row_1 = input_workbook_sheet_total_row; input_workbook_sheet_row_1 >=0; input_workbook_sheet_row_1--) { // traversing

            for (int input_workbook_sheet_row_2 = input_workbook_sheet_total_row; input_workbook_sheet_row_2 >= 0 ; input_workbook_sheet_row_2--) {

                row_1_index = input_workbook_sheet.getRow(input_workbook_sheet_row_1);
                row_2_index = input_workbook_sheet.getRow(input_workbook_sheet_row_2); 

                if (row_1_index != null && row_2_index != null && row_1_index != row_2_index) {
                    count = 0;
                    value1 = "";
                    value2 = "";

                    int row_1_max = row_1_index.getLastCellNum() - 1;
                    for (int row_1_index_cell = 0; row_1_index_cell < row_1_max; row_1_index_cell++) {
                        try {
                            value1 = value1 + formatter.formatCellValue(row_1_index.getCell(row_1_index_cell)); 

                            value2 = value2 + formatter.formatCellValue(row_2_index.getCell(row_1_index_cell)); 

                        } catch (NullPointerException e) {
                            e.printStackTrace();
                        }
                        count++;

                        if (value1.equals(value2)) {
                            System.out.println("deleted : " + row_2_index.getRowNum());
                            System.out.println("------------------");
                            input_workbook_sheet.removeRow(row_2_index);


                            input_workbook_sheet.shiftRows(
                                    row_2_index.getRowNum() + 1, 
                                    input_workbook_sheet_total_row, 
                                    -1, 
                                    true, 
                                    true);
                        }


                    }

                }
            }
        }

    }
    FileOutputStream fileOut = new FileOutputStream("c:\\temp\\workbook.xls");
    input_workbook.write(fileOut);
    fileOut.close();
    input_file.close();
    input_workbook.close();
}