一、前言
对于csv文件基于Apache常用的解析rg.apache.commons.csv.CSVParser、org.apache.commons.csv.CSVStrategy依赖类,这边基于apache-any23-csvutils-1.1.jar包(源码部分如下所示)对csv常用的判断isCSV、构件CSVParser解析器、获取Apache的策略CSVStrategy处理等操作。
二、源码说明
package org.apache.any23.extractor.csv;@b@@b@import java.io.IOException;@b@import java.io.InputStream;@b@import java.io.InputStreamReader;@b@import org.apache.any23.configuration.DefaultConfiguration;@b@import org.apache.commons.csv.CSVParser;@b@import org.apache.commons.csv.CSVStrategy;@b@@b@public class CSVReaderBuilder@b@{@b@ private static final String DEFAULT_FIELD_DELIMITER = ",";@b@ private static final String DEFAULT_COMMENT_DELIMITER = "#";@b@ public static final char NULL_CHAR = 32;@b@ private static final char[] popularDelimiters = { '\t', '|', ',', ';' };@b@ private static DefaultConfiguration defaultConfiguration = DefaultConfiguration.singleton();@b@ private static final CSVStrategy[] strategies = new CSVStrategy[popularDelimiters.length + 1];@b@@b@ public static CSVParser build(InputStream is)@b@ throws IOException@b@ {@b@ CSVStrategy bestStrategy = getBestStrategy(is);@b@ if (bestStrategy == null) bestStrategy = getCSVStrategyFromConfiguration();@b@ return new CSVParser(new InputStreamReader(is), bestStrategy);@b@ }@b@@b@ public static boolean isCSV(InputStream is)@b@ throws IOException@b@ {@b@ return (getBestStrategy(is) != null);@b@ }@b@@b@ private static CSVStrategy getBestStrategy(InputStream is) throws IOException {@b@ CSVStrategy[] arr$ = strategies; int len$ = arr$.length; for (int i$ = 0; i$ < len$; ++i$) { CSVStrategy strategy = arr$[i$];@b@ if (testStrategy(is, strategy))@b@ return strategy;@b@ }@b@@b@ return null;@b@ }@b@@b@ private static CSVStrategy getCsvStrategy(char delimiter, char comment) {@b@ return new CSVStrategy(delimiter, '\'', comment);@b@ }@b@@b@ private static CSVStrategy getCSVStrategyFromConfiguration() {@b@ char fieldDelimiter = getCharValueFromConfiguration("any23.extraction.csv.field", ",");@b@@b@ char commentDelimiter = getCharValueFromConfiguration("any23.extraction.csv.comment", "#");@b@@b@ return new CSVStrategy(fieldDelimiter, '\'', commentDelimiter);@b@ }@b@@b@ private static char getCharValueFromConfiguration(String property, String defaultValue) {@b@ String delimiter = defaultConfiguration.getProperty(property, defaultValue);@b@@b@ if ((delimiter.length() != 1) || (delimiter.equals("")))@b@ throw new RuntimeException(property + " value must be a single character");@b@@b@ return delimiter.charAt(0);@b@ }@b@@b@ private static boolean testStrategy(InputStream is, CSVStrategy strategy)@b@ throws IOException@b@ {@b@ int MIN_COLUMNS = 2;@b@@b@ is.mark(2147483647);@b@ try {@b@ CSVParser parser = new CSVParser(new InputStreamReader(is), strategy);@b@ int linesToCheck = 5;@b@ int headerColumnCount = -1;@b@ while (linesToCheck > 0)@b@ {@b@ int i;@b@ row = parser.getLine();@b@ if (row == null)@b@ break;@b@@b@ if (row.length < 2) {@b@ i = 0;@b@@b@ return i;@b@ }@b@ if (headerColumnCount == -1) {@b@ headerColumnCount = row.length;@b@ } else {@b@ if (row.length < headerColumnCount) {@b@ i = 0;@b@@b@ return i;@b@ }@b@ if (row.length - 1 > headerColumnCount) {@b@ i = 0;@b@@b@ return i;@b@ }@b@ }@b@ --linesToCheck;@b@ }@b@ String[] row = 1;@b@@b@ return row; } finally { is.reset();@b@ }@b@ }@b@@b@ static@b@ {@b@ strategies[0] = CSVStrategy.DEFAULT_STRATEGY;@b@ int index = 1;@b@ char[] arr$ = popularDelimiters; int len$ = arr$.length; for (int i$ = 0; i$ < len$; ++i$) { char dlmt = arr$[i$];@b@ strategies[(index++)] = getCsvStrategy(dlmt, ' ');@b@ }@b@ }@b@}