gudii9
asked on
reading xlsx
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;
import javax.xml.parsers.ParserConfiguratio nException ;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.apache.poi.openxml4j.exceptions. OpenXML4JE xception;
import org.apache.poi.openxml4j.opc.OPCPack age;
import org.apache.poi.openxml4j.opc.Package Access;
import org.apache.poi.ss.usermodel.BuiltinF ormats;
import org.apache.poi.ss.usermodel.DataForm atter;
import org.apache.poi.xssf.eventusermodel.R eadOnlySha redStrings Table;
import org.apache.poi.xssf.eventusermodel.X SSFReader;
import org.apache.poi.xssf.model.StylesTabl e;
import org.apache.poi.xssf.usermodel.XSSFCe llStyle;
import org.apache.poi.xssf.usermodel.XSSFRi chTextStri ng;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
/**
 */
public class XLSX2CSV {
  /**
   * The type of the data value is indicated by an attribute on the cell.
   * The value is usually in a "v" element within the cell.
   */
  enum xssfDataType {
    BOOL,
    ERROR,
    FORMULA,
    INLINESTR,
    SSTINDEX,
    NUMBER,
  }
  int countrows = 0;
  /**
   * Derived from http://poi.apache.org/spreadsheet/how-to.html#xssf_sax_api
   * <p/>
   * Also see Standard ECMA-376, 1st edition, part 4, pages 1928ff, at
   * http://www.ecma-international.org/publications/standards/Ecma-376.htm
   * <p/>
   * A web-friendly version is http://openiso.org/Ecma/376/Part4
   */
  class MyXSSFSheetHandler extends DefaultHandler {
    /**
     * Table with styles
     */
    private StylesTable stylesTable;
    /**
     * Table with unique strings
     */
    private ReadOnlySharedStringsTablesharedStringsTable;
    /**
     * Destination for data
     */
    private final PrintStream output;
    /**
     * Number of columns to read starting with leftmost
     */
    private final int minColumnCount;
    // Set when V start element is seen
    private boolean vIsOpen;
    // Set when cell start element is seen;
    // used when cell close element is seen.
    private xssfDataType nextDataType;
    // Used to format numeric cell values.
    private short formatIndex;
    private String formatString;
    private final DataFormatter formatter;
    private int thisColumn = -1;
    // The last column printed to the output stream
    private int lastColumnNumber = -1;
    // Gathers characters as they are seen.
    private StringBuffer value;
    /**
     * Accepts objects needed while parsing.
     *
     * @param styles  Table of styles
     * @param strings Table of shared strings
     * @param cols   Minimum number of columns to show
     * @param target  Sink for output
     */
    public MyXSSFSheetHandler(
        StylesTable styles,
        ReadOnlySharedStringsTablestrings,
        int cols,
        PrintStream target) {
      this.stylesTable = styles;
      this.sharedStringsTable = strings;
      this.minColumnCount = cols;
      this.output = target;
      this.value = new StringBuffer();
      this.nextDataType = xssfDataType.NUMBER;
      this.formatter = new DataFormatter();
    }
    /*
    * (non-Javadoc)
    * @see org.xml.sax.helpers.DefaultHandler#s tartElemen t(java.lan g.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
    */
    public void startElement(String uri, String localName, String name,
                 Attributes attributes) throws SAXException {
      if ("inlineStr".equals(name) || "v".equals(name)) {
        vIsOpen = true;
        // Clear contents cache
        value.setLength(0);
      }
      // c => cell
      else if ("c".equals(name)) {
        // Get the cell reference
        String r = attributes.getValue("r");
        int firstDigit = -1;
        for (int c = 0; c < r.length(); ++c) {
          if (Character.isDigit(r.charAt(c))) {
            firstDigit = c;
            break;
          }
        }
        thisColumn = nameToColumn(r.substring(0, firstDigit));
        // Set up defaults.
        this.nextDataType = xssfDataType.NUMBER;
        this.formatIndex = -1;
        this.formatString = null;
        String cellType = attributes.getValue("t");
        String cellStyleStr = attributes.getValue("s");
        if ("b".equals(cellType))
          nextDataType = xssfDataType.BOOL;
        else if ("e".equals(cellType))
          nextDataType = xssfDataType.ERROR;
        else if ("inlineStr".equals(cellType))
          nextDataType = xssfDataType.INLINESTR;
        else if ("s".equals(cellType))
          nextDataType = xssfDataType.SSTINDEX;
        else if ("str".equals(cellType))
          nextDataType = xssfDataType.FORMULA;
        else if (cellStyleStr != null) {
          // It's a number, but almost certainly one
          //  with a special style or format
          int styleIndex = Integer.parseInt(cellStyleStr);
          XSSFCellStyle style = stylesTable.getStyleAt(styleIndex);
          this.formatIndex = style.getDataFormat();
          this.formatString = style.getDataFormatString();
          if (this.formatString == null)
            this.formatString = BuiltinFormats.getBuiltinFormat(this .formatInd ex);
        }
      }
    }
    /*
    * (non-Javadoc)
    * @see org.xml.sax.helpers.DefaultHandler#e ndElement( java.lang. String, java.lang.String, java.lang.String)
    */
    public void endElement(String uri, String localName, String name)
        throws SAXException {
      String thisStr = null;
      // v => contents of a cell
      if ("v".equals(name)) {
        // Process the value contents as required.
        // Do now, as characters() may be called more than once
        switch (nextDataType) {
          case BOOL:
            char first = value.charAt(0);
            thisStr = first == '0' ? "FALSE" : "TRUE";
            break;
          case ERROR:
            thisStr = "\"ERROR:" + value.toString() + '"';
            break;
          case FORMULA:
            // A formula could result in a string value,
            // so always add double-quote characters.
            thisStr = '"' + value.toString() + '"';
            break;
          case INLINESTR:
            // TODO: have seen an example of this, so it's untested.
            XSSFRichTextString rtsi = new XSSFRichTextString(value.toString()) ;
            thisStr = '"' + rtsi.toString() + '"';
            break;
          case SSTINDEX:
            String sstIndex = value.toString();
            try {
              int idx = Integer.parseInt(sstIndex);
              XSSFRichTextString rtss = new XSSFRichTextString(sharedStringsTabl e.getEntry At(idx));
              thisStr = '"' + rtss.toString() + '"';
            } catch (NumberFormatException ex) {
              output.println("Failed to parse SST index '" + sstIndex + "': " + ex.toString());
            }
            break;
          case NUMBER:
            String n = value.toString();
            if (this.formatString != null)
              thisStr = formatter.formatRawCellContents(Doub le.parseDo uble(n), this.formatIndex, this.formatString);
            else
              thisStr = n;
            break;
          default:
            thisStr = "(TODO: Unexpected type: " + nextDataType + ")";
            break;
        }
        // Output after we've seen the string contents
        // Emit commas for any fields that were missing on this row
        if (lastColumnNumber == -1) {
          lastColumnNumber = 0;
        }
        for (int i = lastColumnNumber; i < thisColumn; ++i)
          output.print(',');
        // Might be the empty string.
        output.print(thisStr);
        // Update column
        if (thisColumn > -1)
          lastColumnNumber = thisColumn;
      } else if ("row".equals(name)) {
        // Print out any missing commas if needed
        if (minColumns > 0) {
          // Columns are 0 based
          if (lastColumnNumber == -1) {
            lastColumnNumber = 0;
          }
          for (int i = lastColumnNumber; i < (this.minColumnCount); i++) {
            output.print(',');
          }
        }
        // We're onto a new row
        output.println();
        output.println(countrows++);
        lastColumnNumber = -1;
      }
    }
    /**
     * Captures characters only if a suitable element is open.
     * Originally was just "v"; extended for inlineStr also.
     */
    public void characters(char[] ch, int start, int length)
        throws SAXException {
      if (vIsOpen)
        value.append(ch, start, length);
    }
    /**
     * Converts an Excel column name like "C" to a zero-based index.
     *
     * @param name
     * @return Index corresponding to the specified name
     */
    private int nameToColumn(String name) {
      int column = -1;
      for (int i = 0; i < name.length(); ++i) {
        int c = name.charAt(i);
        column = (column + 1) * 26 + c - 'A';
      }
      return column;
    }
  }
  //////////////////////////////////// ///
  private OPCPackage xlsxPackage;
  private int minColumns;
  private PrintStream output;
  /**
   * Creates a new XLSX -> CSV converter
   *
   * @param pkg     The XLSX package to process
   * @param output   The PrintStream to output the CSV to
   * @param minColumns The minimum number of columns to output, or -1 for no minimum
   */
  public XLSX2CSV(OPCPackage pkg, PrintStream output, int minColumns) {
    this.xlsxPackage = pkg;
    this.output = output;
    this.minColumns = minColumns;
  }
  /**
   * Parses and shows the content of one sheet
   * using the specified styles and shared-strings tables.
   *
   * @param styles
   * @param strings
   * @param sheetInputStream
   */
  public void processSheet(
      StylesTable styles,
      ReadOnlySharedStringsTablestrings,
      InputStream sheetInputStream)
      throws IOException, ParserConfigurationException, SAXException {
    InputSource sheetSource = new InputSource(sheetInputStream);
    SAXParserFactory saxFactory = SAXParserFactory.newInstance();
    SAXParser saxParser = saxFactory.newSAXParser();
    XMLReader sheetParser = saxParser.getXMLReader();
    ContentHandler handler = new MyXSSFSheetHandler(styles,strings, this.minColumns, this.output);
    sheetParser.setContentHandler(handle r);
    sheetParser.parse(sheetSource);
  }
  /**
   * Initiates the processing of the XLS workbook file to CSV.
   *
   * @throws IOException
   * @throws OpenXML4JException
   * @throws ParserConfigurationException
   * @throws SAXException
   */
  public void process()
      throws IOException, OpenXML4JException, ParserConfigurationException, SAXException {
    ReadOnlySharedStringsTablestrings = new ReadOnlySharedStringsTable (this.xlsx Package);
    XSSFReader xssfReader = new XSSFReader(this.xlsxPackage);
    StylesTable styles = xssfReader.getStylesTable();
    XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator)xssfReader.getSheetsData() ;
    int index = 0;
    while (iter.hasNext()) {
      InputStream stream = iter.next();
      String sheetName = iter.getSheetName();
      this.output.println();
      this.output.println(sheetName + " [index=" + index + "]:");
      processSheet(styles, strings, stream);
      stream.close();
      ++index;
    }
  }
  public static void main(String[] args) throws Exception {
    /* if (args.length < 1) {
      System.err.println("Use:");
      System.err.println("  XLSX2CSV <xlsx file> [min columns]");
      return;
    }*/
    //File xlsxFile = new File(args[0]);
    File xlsxFile = new File("cccc.xlsx");
    if (!xlsxFile.exists()) {
      System.err.println("Not found or not a file: " + xlsxFile.getPath());
      return;
    }
    int minColumns = -1;
    //if (args.length >= 2)
    //minColumns = Integer.parseInt(args[1]);
    minColumns = 2;
    // The package open is instantaneous, as it should be.
    OPCPackage p = OPCPackage.open(xlsxFile.getPath(), PackageAccess.READ);
    XLSX2CSV xlsx2csv = new XLSX2CSV(p, System.out, minColumns);
    xlsx2csv.process();
  }
}
I am using above code to read contents of attached cccc.XLSX as below
22,33,44,"aa",6/6/20 15:49,"77-SS_001","DONE"
23,34,45,"aa",6/6/20 15:49,"77-SS_002","NoDONE"
I would like to read only first two columns  as below in to some object
22 33
23 34
 so that i can query database in later steps
How can I achieve it.
Any ideas, suggestions, sample code, links, source code highly appreciated. Thanks in advance
cccc.xlsx
ASKER CERTIFIED SOLUTION
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
Here, I'm not eligible for a single point.
If you have identified the fix, then you should ask to close the question. Or you can consult the moderator for the suggestions.
If you have identified the fix, then you should ask to close the question. Or you can consult the moderator for the suggestions.
ASKER
I have identifiied row index(0,1), as well as column index (0,1)
while(cells.hasNext())
{
XSSFCell cell = (XSSFCell) cells.next();
String Value=null;
if(cell.getCellType() == cell.CELL_TYPE_STRING) {
Value=cell.getStringCellVa
System.out.println("string
}
else if(cell.getCellType() == cell.CELL_TYPE_NUMERIC) {
//int Value2=(Integer) null;
double Value2=cell.getNumericCell
System.out.println("number
int Value3=cell.getColumnIndex
System.out.println("col index--->"+Value3);
int Value4=cell.getRowIndex();
System.out.println("Row index--->"+Value4);
}
number of rows---->1
numbercal values--->22.0
col index--->0
Row index--->0
numbercal values--->33.0
col index--->1
Row index--->0
numbercal values--->44.0
col index--->2
Row index--->0
string values-->aa
numbercal values--->43988.6590856481
col index--->4
Row index--->0
string values-->77-SS_001
string values-->DONE
numbercal values--->23.0
col index--->0
Row index--->1
numbercal values--->34.0
col index--->1
Row index--->1
numbercal values--->45.0
col index--->2
Row index--->1
string values-->aa
numbercal values--->43988.6590856481
col index--->4
Row index--->1
string values-->77-SS_002
string values-->NoDONE
I need to print
22 33(row index 0 and column index 0.1)
and
23 34(row index 1 and column index 0,1)
I was not sure how to print this kind of 2 dimentional cell value. I could not find corresponding method in API yet. Please advise
XLSX2CSV.java