./ 0000755 0001750 0001750 00000000000 11061274763 011674 5 ustar janpascal janpascal ./pom.xml 0000644 0001750 0001750 00000005724 11061274763 013221 0 ustar janpascal janpascal
Other options are : {@link #FILLLEFT} and {@link #FILLRIGHT}
* @param fill the fill pattern.
*/
public void setFill(int fill) {
this.fill = fill;
}
/**
*
* @return the fillchar. Defaults to a space.
*/
public char getFillChar() {
return fillChar;
}
/**
* Set the fill char
* @param fillChar the fill char
*/
public void setFillChar(char fillChar) {
this.fillChar = fillChar;
}
/**
* @return the delimeter used.
*/
public char getDelimiter() {
return delimiter;
}
/**
* Set the delimiter to use
* @param delimiter the delimiter character.
*/
public void setDelimiter(char delimiter) {
this.delimiter = delimiter;
}
/**
* @return if the writer should ignore the delimiter character.
*/
public boolean isDelimiterIgnored() {
return ignoreDelimiter;
}
/**
* Specify if the writer should ignore the delimiter.
* @param ignoreDelimiter defaults to false.
*/
public void setIgnoreDelimiter(boolean ignoreDelimiter) {
this.ignoreDelimiter = ignoreDelimiter;
}
/**
* @return the value delimeter used. Defaults to "
*/
public char getValueDelimiter() {
return valueDelimiter;
}
/**
* Set the value delimiter to use
* @param valueDelimiter the value delimiter character.
*/
public void setValueDelimiter(char valueDelimiter) {
this.valueDelimiter = valueDelimiter;
}
/**
* @return if the writer should ignore the value delimiter character.
* Defaults to true.
*/
public boolean isValueDelimiterIgnored() {
return ignoreValueDelimiter;
}
/**
* Specify if the writer should ignore the value delimiter.
* @param ignoreValueDelimiter defaults to false.
*/
public void setIgnoreValueDelimiter(boolean ignoreValueDelimiter) {
this.ignoreValueDelimiter = ignoreValueDelimiter;
}
/**
* @return if a field header is used. Defaults to false
*/
public boolean isFieldHeader() {
return fieldHeader;
}
/**
* Specify if you want to use a field header.
* @param fieldHeader true or false.
*/
public void setFieldHeader(boolean fieldHeader) {
this.fieldHeader = fieldHeader;
}
/**
* TODO..
* @see java.lang.Object#equals(java.lang.Object)
*/
public boolean equals(Object obj) {
if (obj == null && !(obj instanceof CSVConfig)) {
return false;
}
return super.equals(obj);
// CSVConfig config = (CSVConfig) obj;
// getFill() == config.getFill()
// getFields().equals(config.getFields())
}
/**
* Creates a config based on a stream. It tries to guess
* NOTE : The stream will be closed.
* @param inputStream the inputstream.
* @return the guessed config.
*/
public static CSVConfig guessConfig(InputStream inputStream) {
return null;
}
/**
* @return if the end of the line should be trimmed. Default is false.
*/
public boolean isEndTrimmed() {
return endTrimmed;
}
/**
* Specify if the end of the line needs to be trimmed. Defaults to false.
* @param endTrimmed
*/
public void setEndTrimmed(boolean endTrimmed) {
this.endTrimmed = endTrimmed;
}
}
./src/java/org/apache/commons/csv/writer/CSVWriter.java 0000644 0001750 0001750 00000010315 11061274762 023670 0 ustar janpascal janpascal /*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.commons.csv.writer;
import java.io.Writer;
import java.util.Arrays;
import java.util.Map;
/**
* CSVWriter
*
* @author Martin van den Bemt
* @version $Id: $
*/
public class CSVWriter {
/** The CSV config **/
private CSVConfig config;
/** The writer **/
private Writer writer;
/**
*
*/
public CSVWriter() {
}
public CSVWriter(CSVConfig config) {
setConfig(config);
}
public void writeRecord(Map map) {
CSVField[] fields = config.getFields();
try {
StringBuffer sb = new StringBuffer();
for (int i = 0; i < fields.length; i++) {
Object o = map.get(fields[i].getName());
if (o != null) {
String value = o.toString();
value = writeValue(fields[i], value);
sb.append(value);
}
if (!config.isDelimiterIgnored() && fields.length != (i+1)) {
sb.append(config.getDelimiter());
}
}
if (config.isEndTrimmed()) {
for (int i = sb.length()-1; i >= 0; i--) {
System.out.println("i : " + i);
if (Character.isWhitespace(sb.charAt(i))) {
sb.deleteCharAt(i);
} else {
break;
}
}
}
sb.append("\n");
String line = sb.toString();
writer.write(line);
} catch(Exception e) {
e.printStackTrace();
}
}
protected String writeValue(CSVField field, String value) throws Exception {
if (config.isFixedWidth()) {
if (value.length() < field.getSize()) {
int fillPattern = config.getFill();
if (field.overrideFill()) {
fillPattern = field.getFill();
}
StringBuffer sb = new StringBuffer();
int fillSize = (field.getSize() - value.length());
char[] fill = new char[fillSize];
Arrays.fill(fill, config.getFillChar());
if (fillPattern == CSVConfig.FILLLEFT) {
sb.append(fill);
sb.append(value);
value = sb.toString();
} else {
// defaults to fillpattern FILLRIGHT when fixedwidth is used
sb.append(value);
sb.append(fill);
value = sb.toString();
}
} else if (value.length() > field.getSize()) {
// value to big..
value = value.substring(0, field.getSize());
}
if (!config.isValueDelimiterIgnored()) {
// add the value delimiter..
value = config.getValueDelimiter()+value+config.getValueDelimiter();
}
}
return value;
}
/**
* @return the CVSConfig or null if not present
*/
public CSVConfig getConfig() {
return config;
}
/**
* Set the CSVConfig
* @param config the CVSConfig
*/
public void setConfig(CSVConfig config) {
this.config = config;
}
/**
* Set the writer to write the CSV file to.
* @param writer the writer.
*/
public void setWriter(Writer writer) {
this.writer = writer;
}
}
./src/java/org/apache/commons/csv/writer/CSVField.java 0000644 0001750 0001750 00000004655 11061274762 023451 0 ustar janpascal janpascal /*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.commons.csv.writer;
/**
*
* @author Martin van den Bemt
* @version $Id: $
*/
public class CSVField {
private String name;
private int size;
private int fill;
private boolean overrideFill;
/**
*
*/
public CSVField() {
}
/**
* @param name the name of the field
*/
public CSVField(String name) {
setName(name);
}
/**
* @param name the name of the field
* @param size the size of the field
*/
public CSVField(String name, int size) {
setName(name);
setSize(size);
}
/**
* @return the name of the field
*/
public String getName() {
return name;
}
/**
* Set the name of the field
* @param name the name
*/
public void setName(String name) {
this.name = name;
}
/**
*
* @return the size of the field
*/
public int getSize() {
return size;
}
/**
* Set the size of the field.
* The size will be ignored when fixedwidth is set to false in the CSVConfig
* @param size the size of the field.
*/
public void setSize(int size) {
this.size = size;
}
/**
* @return the fill pattern.
*/
public int getFill() {
return fill;
}
/**
* Sets overrideFill to true.
* @param fill the file pattern
*/
public void setFill(int fill) {
overrideFill = true;
this.fill = fill;
}
/**
* Does this field override fill ?
*
* @return
*/
public boolean overrideFill() {
return overrideFill;
}
}
./src/java/org/apache/commons/csv/CharBuffer.java 0000644 0001750 0001750 00000014310 11061274763 022533 0 ustar janpascal janpascal /*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.commons.csv;
/**
* A simple StringBuffer replacement that aims to
* reduce copying as much as possible. The buffer
* grows as necessary.
* This class is not thread safe.
*
* @author Ortwin Gl�ck
*/
public class CharBuffer {
private char[] c;
/**
* Actually used number of characters in the array.
* It is also the index at which
* a new character will be inserted into c
.
*/
private int length;
/**
* Creates a new CharBuffer with an initial capacity of 32 characters.
*/
public CharBuffer() {
this(32);
}
/**
* Creates a new CharBuffer with an initial capacity
* of length
characters.
*/
public CharBuffer(final int length) {
if (length == 0) {
throw new IllegalArgumentException("Can't create an empty CharBuffer");
}
this.c = new char[length];
}
/**
* Empties the buffer. The capacity still remains the same, so no memory is freed.
*/
public void clear() {
length = 0;
}
/**
* Returns the number of characters in the buffer.
* @return the number of characters
*/
public int length() {
return length;
}
/**
* Returns the current capacity of the buffer.
* @return the maximum number of characters that can be stored in this buffer without
* resizing it.
*/
public int capacity() {
return c.length;
}
/**
* Appends the contents of cb
to the end of this CharBuffer.
* @param cb the CharBuffer to append or null
*/
public void append(final CharBuffer cb) {
if (cb == null) {
return;
}
provideCapacity(length + cb.length);
System.arraycopy(cb.c, 0, c, length, cb.length);
length += cb.length;
}
/**
* Appends s
to the end of this CharBuffer.
* This method involves copying the new data once!
* @param s the String to append or null
*/
public void append(final String s) {
if (s == null) {
return;
}
append(s.toCharArray());
}
/**
* Appends sb
to the end of this CharBuffer.
* This method involves copying the new data once!
* @param sb the StringBuffer to append or null
*/
public void append(final StringBuffer sb) {
if (sb == null) {
return;
}
provideCapacity(length + sb.length());
sb.getChars(0, sb.length(), c, length);
length += sb.length();
}
/**
* Appends data
to the end of this CharBuffer.
* This method involves copying the new data once!
* @param data the char[] to append or null
*/
public void append(final char[] data) {
if (data == null) {
return;
}
provideCapacity(length + data.length);
System.arraycopy(data, 0, c, length, data.length);
length += data.length;
}
/**
* Appends a single character to the end of this CharBuffer.
* This method involves copying the new data once!
* @param data the char to append
*/
public void append(final char data) {
provideCapacity(length + 1);
c[length] = data;
length++;
}
/**
* Shrinks the capacity of the buffer to the current length if necessary.
* This method involves copying the data once!
*/
public void shrink() {
if (c.length == length) {
return;
}
char[] newc = new char[length];
System.arraycopy(c, 0, newc, 0, length);
c = newc;
}
/**
* Removes trailing whitespace.
*/
public void trimTrailingWhitespace() {
while (length>0 && Character.isWhitespace(c[length-1])) {
length--;
}
}
/**
* Returns the contents of the buffer as a char[]. The returned array may
* be the internal array of the buffer, so the caller must take care when
* modifying it.
* This method allows to avoid copying if the caller knows the exact capacity
* before.
* @return
*/
public char[] getCharacters() {
if (c.length == length) {
return c;
}
char[] chars = new char[length];
System.arraycopy(c, 0, chars, 0, length);
return chars;
}
/**
* Returns the character at the specified position.
*/
public char charAt(int pos) {
return c[pos];
}
/**
* Converts the contents of the buffer into a StringBuffer.
* This method involves copying the new data once!
* @return
*/
public StringBuffer toStringBuffer() {
StringBuffer sb = new StringBuffer(length);
sb.append(c, 0, length);
return sb;
}
/**
* Converts the contents of the buffer into a StringBuffer.
* This method involves copying the new data once!
* @return
*/
public String toString() {
return new String(c, 0, length);
}
/**
* Copies the data into a new array of at least capacity
size.
* @param capacity
*/
public void provideCapacity(final int capacity) {
if (c.length >= capacity) {
return;
}
int newcapacity = ((capacity*3)>>1) + 1;
char[] newc = new char[newcapacity];
System.arraycopy(c, 0, newc, 0, length);
c = newc;
}
}
./src/java/org/apache/commons/csv/ExtendedBufferedReader.java 0000644 0001750 0001750 00000020474 11061274763 025062 0 ustar janpascal janpascal /*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.csv;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
/**
* ExtendedBufferedReader
*
* A special reader decorater which supports more
* sophisticated access to the underlying reader object.
*
* In particular the reader supports a look-ahead option,
* which allows you to see the next char returned by
* next().
* Furthermore the skip-method supports skipping until
* (but excluding) a given char. Similar functionality
* is supported by the reader as well.
*
*/
class ExtendedBufferedReader extends BufferedReader {
/** the end of stream symbol */
public static final int END_OF_STREAM = -1;
/** undefined state for the lookahead char */
public static final int UNDEFINED = -2;
/** the lookahead chars */
private int lookaheadChar = UNDEFINED;
/** the last char returned */
private int lastChar = UNDEFINED;
/** the line counter */
private int lineCounter = 0;
private CharBuffer line = new CharBuffer();
/**
* Created extended buffered reader using default buffer-size
*
*/
public ExtendedBufferedReader(Reader r) {
super(r);
/* note uh: do not fetch the first char here,
* because this might block the method!
*/
}
/**
* Create extended buffered reader using the given buffer-size
*/
public ExtendedBufferedReader(Reader r, int bufSize) {
super(r, bufSize);
/* note uh: do not fetch the first char here,
* because this might block the method!
*/
}
/**
* Reads the next char from the input stream.
* @return the next char or END_OF_STREAM if end of stream has been reached.
*/
public int read() throws IOException {
// initalize the lookahead
if (lookaheadChar == UNDEFINED) {
lookaheadChar = super.read();
}
lastChar = lookaheadChar;
if (super.ready()) {
lookaheadChar = super.read();
} else {
lookaheadChar = UNDEFINED;
}
if (lastChar == '\n') {
lineCounter++;
}
return lastChar;
}
/**
* Returns the last read character again.
*
* @return the last read char or UNDEFINED
*/
public int readAgain() {
return lastChar;
}
/**
* Non-blocking reading of len chars into buffer buf starting
* at bufferposition off.
*
* performs an iteratative read on the underlying stream
* as long as the following conditions hold:
* - less than len chars have been read
* - end of stream has not been reached
* - next read is not blocking
*
* @return nof chars actually read or END_OF_STREAM
*/
public int read(char[] buf, int off, int len) throws IOException {
// do not claim if len == 0
if (len == 0) {
return 0;
}
// init lookahead, but do not block !!
if (lookaheadChar == UNDEFINED) {
if (ready()) {
lookaheadChar = super.read();
} else {
return -1;
}
}
// 'first read of underlying stream'
if (lookaheadChar == -1) {
return -1;
}
// continue until the lookaheadChar would block
int cOff = off;
while (len > 0 && ready()) {
if (lookaheadChar == -1) {
// eof stream reached, do not continue
return cOff - off;
} else {
buf[cOff++] = (char) lookaheadChar;
if (lookaheadChar == '\n') {
lineCounter++;
}
lastChar = lookaheadChar;
lookaheadChar = super.read();
len--;
}
}
return cOff - off;
}
/**
* Reads all characters up to (but not including) the given character.
*
* @param c the character to read up to
* @return the string up to the character c
* @throws IOException
*/
public String readUntil(char c) throws IOException {
if (lookaheadChar == UNDEFINED) {
lookaheadChar = super.read();
}
line.clear(); // reuse
while (lookaheadChar != c && lookaheadChar != END_OF_STREAM) {
line.append((char) lookaheadChar);
if (lookaheadChar == '\n') {
lineCounter++;
}
lastChar = lookaheadChar;
lookaheadChar = super.read();
}
return line.toString();
}
/**
* @return A String containing the contents of the line, not
* including any line-termination characters, or null
* if the end of the stream has been reached
*/
public String readLine() throws IOException {
if (lookaheadChar == UNDEFINED) {
lookaheadChar = super.read();
}
line.clear(); //reuse
// return null if end of stream has been reached
if (lookaheadChar == END_OF_STREAM) {
return null;
}
// do we have a line termination already
char laChar = (char) lookaheadChar;
if (laChar == '\n' || laChar == '\r') {
lastChar = lookaheadChar;
lookaheadChar = super.read();
// ignore '\r\n' as well
if ((char) lookaheadChar == '\n') {
lastChar = lookaheadChar;
lookaheadChar = super.read();
}
lineCounter++;
return line.toString();
}
// create the rest-of-line return and update the lookahead
line.append(laChar);
String restOfLine = super.readLine(); // TODO involves copying
lastChar = lookaheadChar;
lookaheadChar = super.read();
if (restOfLine != null) {
line.append(restOfLine);
}
lineCounter++;
return line.toString();
}
/**
* Skips char in the stream
*
* ATTENTION: invalidates the line-counter !!!!!
*
* @return nof skiped chars
*/
public long skip(long n) throws IllegalArgumentException, IOException {
if (lookaheadChar == UNDEFINED) {
lookaheadChar = super.read();
}
// illegal argument
if (n < 0) {
throw new IllegalArgumentException("negative argument not supported");
}
// no skipping
if (n == 0 || lookaheadChar == END_OF_STREAM) {
return 0;
}
// skip and reread the lookahead-char
long skiped = 0;
if (n > 1) {
skiped = super.skip(n - 1);
}
lookaheadChar = super.read();
// fixme uh: we should check the skiped sequence for line-terminations...
lineCounter = Integer.MIN_VALUE;
return skiped + 1;
}
/**
* Skips all chars in the input until (but excluding) the given char
*
* @param c
* @return
* @throws IllegalArgumentException
* @throws IOException
*/
public long skipUntil(char c) throws IllegalArgumentException, IOException {
if (lookaheadChar == UNDEFINED) {
lookaheadChar = super.read();
}
long counter = 0;
while (lookaheadChar != c && lookaheadChar != END_OF_STREAM) {
if (lookaheadChar == '\n') {
lineCounter++;
}
lookaheadChar = super.read();
counter++;
}
return counter;
}
/**
* Returns the next char in the stream without consuming it.
*
* Remember the next char read by read(..) will always be
* identical to lookAhead().
*
* @return the next char (without consuming it) or END_OF_STREAM
*/
public int lookAhead() throws IOException {
if (lookaheadChar == UNDEFINED) {
lookaheadChar = super.read();
}
return lookaheadChar;
}
/**
* Returns the nof line read
* ATTENTION: the skip-method does invalidate the line-number counter
*
* @return the current-line-number (or -1)
*/
public int getLineNumber() {
if (lineCounter > -1) {
return lineCounter;
} else {
return -1;
}
}
public boolean markSupported() {
/* note uh: marking is not supported, cause we cannot
* see into the future...
*/
return false;
}
}
./src/java/org/apache/commons/csv/package.html 0000644 0001750 0001750 00000007160 11061274763 022147 0 ustar janpascal janpascal
CSV (or its dialects) are widely used as interfaces to legacy systems or manual data-imports. Basically CSV stands for "Comma Separated Values" but this simple abbreviation leads to more confusion than definitions.
Common to all file dialects is its basic structure: The CSV data-format is record oriented, whereas each record starts on a new textual line. A record is build of a list of values. Keep in mind that not all records must have an equal number of values:
csv := records* record := values*
The following list contains the csv aspects the WAKE CSV parser supports:
In addition to individually defined dialects, two predefined dialects (strict-csv, and excel-csv) can be set directly.
Example usage:
./src/java/org/apache/commons/csv/CSVPrinter.java 0000644 0001750 0001750 00000016570 11061274763 022535 0 ustar janpascal janpascal /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.commons.csv; import java.io.OutputStream; import java.io.PrintWriter; import java.io.Writer; /** * Print values as a comma separated list. */ public class CSVPrinter { /** The place that the values get written. */ protected PrintWriter out; /** True if we just began a new line. */ protected boolean newLine = true; private CSVStrategy strategy = CSVStrategy.DEFAULT_STRATEGY; /** * Create a printer that will print values to the given * stream. Character to byte conversion is done using * the default character encoding. Comments will be * written using the default comment character '#'. * * @param out stream to which to print. */ public CSVPrinter(OutputStream out) { this.out = new PrintWriter(out); } /** * Create a printer that will print values to the given * stream. Comments will be * written using the default comment character '#'. * * @param out stream to which to print. */ public CSVPrinter(Writer out) { if (out instanceof PrintWriter) { this.out = (PrintWriter) out; } else { this.out = new PrintWriter(out); } } // ====================================================== // strategies // ====================================================== /** * Sets the specified CSV Strategy * * @return current instance of CSVParser to allow chained method calls */ public CSVPrinter setStrategy(CSVStrategy strategy) { this.strategy = strategy; return this; } /** * Obtain the specified CSV Strategy * * @return strategy currently being used */ public CSVStrategy getStrategy() { return this.strategy; } // ====================================================== // printing implementation // ====================================================== /** * Print the string as the last value on the line. The value * will be quoted if needed. * * @param value value to be outputted. */ public void println(String value) { print(value); out.println(); out.flush(); newLine = true; } /** * Output a blank line */ public void println() { out.println(); out.flush(); newLine = true; } /** * Print a single line of comma separated values. * The values will be quoted if needed. Quotes and * newLine characters will be escaped. * * @param values values to be outputted. */ public void println(String[] values) { for (int i = 0; i < values.length; i++) { print(values[i]); } out.println(); out.flush(); newLine = true; } /** * Print several lines of comma separated values. * The values will be quoted if needed. Quotes and * newLine characters will be escaped. * * @param values values to be outputted. */ public void println(String[][] values) { for (int i = 0; i < values.length; i++) { println(values[i]); } if (values.length == 0) { out.println(); } out.flush(); newLine = true; } /** * Put a comment among the comma separated values. * Comments will always begin on a new line and occupy a * least one full line. The character specified to star * comments and a space will be inserted at the beginning of * each new line in the comment. * * @param comment the comment to output */ public void printlnComment(String comment) { if(this.strategy.isCommentingDisabled()) { return; } if (!newLine) { out.println(); } out.print(this.strategy.getCommentStart()); out.print(' '); for (int i = 0; i < comment.length(); i++) { char c = comment.charAt(i); switch (c) { case '\r' : if (i + 1 < comment.length() && comment.charAt(i + 1) == '\n') { i++; } // break intentionally excluded. case '\n' : out.println(); out.print(this.strategy.getCommentStart()); out.print(' '); break; default : out.print(c); break; } } out.println(); out.flush(); newLine = true; } /** * Print the string as the next value on the line. The value * will be quoted if needed. * * @param value value to be outputted. */ public void print(String value) { boolean quote = false; if (value.length() > 0) { char c = value.charAt(0); if (newLine && (c < '0' || (c > '9' && c < 'A') || (c > 'Z' && c < 'a') || (c > 'z'))) { quote = true; } if (c == ' ' || c == '\f' || c == '\t') { quote = true; } for (int i = 0; i < value.length(); i++) { c = value.charAt(i); if (c == '"' || c == this.strategy.getDelimiter() || c == '\n' || c == '\r') { quote = true; c = value.charAt( value.length() - 1 ); break; } } if (c == ' ' || c == '\f' || c == '\t') { quote = true; } } else if (newLine) { // always quote an empty token that is the first // on the line, as it may be the only thing on the // line. If it were not quoted in that case, // an empty line has no tokens. quote = true; } if (newLine) { newLine = false; } else { out.print(this.strategy.getDelimiter()); } if (quote) { out.print(escapeAndQuote(value)); } else { out.print(value); } out.flush(); } /** * Enclose the value in quotes and escape the quote * and comma characters that are inside. * * @param value needs to be escaped and quoted * @return the value, escaped and quoted */ private String escapeAndQuote(String value) { // the initial count is for the preceding and trailing quotes int count = 2; for (int i = 0; i < value.length(); i++) { switch (value.charAt(i)) { case '\"' : case '\n' : case '\r' : case '\\' : count++; break; default: break; } } StringBuffer sb = new StringBuffer(value.length() + count); sb.append(strategy.getEncapsulator()); for (int i = 0; i < value.length(); i++) { char c = value.charAt(i); if (c == strategy.getEncapsulator()) { sb.append('\\').append(c); continue; } switch (c) { case '\n' : sb.append("\\n"); break; case '\r' : sb.append("\\r"); break; case '\\' : sb.append("\\\\"); break; default : sb.append(c); } } sb.append(strategy.getEncapsulator()); return sb.toString(); } } ./src/java/org/apache/commons/csv/CSVUtils.java 0000644 0001750 0001750 00000010121 11061274763 022174 0 ustar janpascal janpascal /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.commons.csv; import java.io.StringWriter; import java.io.StringReader; import java.io.IOException; /** * Utility methods for dealing with CSV files */ public class CSVUtils { private static final String[] EMPTY_STRING_ARRAY = new String[0]; private static final String[][] EMPTY_DOUBLE_STRING_ARRAY = new String[0][0]; /** *String[] parsedLine = CSVParser.parseLine("a,b,c"); for (int i = 0; i < parsedLine.length; ++i) { System.out.println("value " + i + "=" + parsedLine[i]); }
CSVUtils
instances should NOT be constructed in
* standard programming.
*
*
This constructor is public to permit tools that require a JavaBean * instance to operate.
*/ public CSVUtils() { } /** * Converts an array of string values into a single CSV line. All *null
values are converted to the string "null"
,
* all strings equal to "null"
will additionally get quotes
* around.
*
* @param values the value array
* @return the CSV string, will be an empty string if the length of the
* value array is 0
*/
public static String printLine(String[] values) {
// set up a CSVUtils
StringWriter stringWriter = new StringWriter();
CSVPrinter csvPrinter = new CSVPrinter(stringWriter);
// check for null values an "null" as strings and convert them
// into the strings "null" and "\"null\""
for (int i = 0; i < values.length; i++) {
if (values[i] == null) {
values[i] = "null";
} else if (values[i].equals("null")) {
values[i] = "\"null\"";
}
}
// convert to CSV
csvPrinter.println(values);
// as the resulting string has \r\n at the end, we will trim that away
return stringWriter.toString().trim();
}
// ======================================================
// static parsers
// ======================================================
/**
* Parses the given String according to the default {@link CSVStrategy}.
*
* @param s CSV String to be parsed.
* @return parsed String matrix (which is never null)
* @throws IOException in case of error
*/
public static String[][] parse(String s) throws IOException {
if (s == null) {
throw new IllegalArgumentException("Null argument not allowed.");
}
String[][] result = (new CSVParser(new StringReader(s))).getAllValues();
if (result == null) {
// since CSVStrategy ignores empty lines an empty array is returned
// (i.e. not "result = new String[][] {{""}};")
result = EMPTY_DOUBLE_STRING_ARRAY;
}
return result;
}
/**
* Parses the first line only according to the default {@link CSVStrategy}.
*
* Parsing empty string will be handled as valid records containing zero
* elements, so the following property holds: parseLine("").length == 0.
*
* @param s CSV String to be parsed.
* @return parsed String vector (which is never null)
* @throws IOException in case of error
*/
public static String[] parseLine(String s) throws IOException {
if (s == null) {
throw new IllegalArgumentException("Null argument not allowed.");
}
// uh,jh: make sure that parseLine("").length == 0
if (s.length() == 0) {
return EMPTY_STRING_ARRAY;
}
return (new CSVParser(new StringReader(s))).getLine();
}
}
./src/java/org/apache/commons/csv/CSVStrategy.java 0000644 0001750 0001750 00000013403 11061274763 022704 0 ustar janpascal janpascal /*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.csv;
import java.io.Serializable;
/**
* CSVStrategy
*
* Represents the strategy for a CSV.
*/
public class CSVStrategy implements Cloneable, Serializable {
private char delimiter;
private char encapsulator;
private char commentStart;
private char escape;
private boolean ignoreLeadingWhitespaces;
private boolean ignoreTrailingWhitespaces;
private boolean interpretUnicodeEscapes;
private boolean ignoreEmptyLines;
// -2 is used to signal disabled, because it won't be confused with
// an EOF signal (-1), and because \ufffe in UTF-16 would be
// encoded as two chars (using surrogates) and thus there should never
// be a collision with a real text char.
public static char COMMENTS_DISABLED = (char)-2;
public static char ESCAPE_DISABLED = (char)-2;
public static CSVStrategy DEFAULT_STRATEGY = new CSVStrategy(',', '"', COMMENTS_DISABLED, ESCAPE_DISABLED, true,
true, false, true);
public static CSVStrategy EXCEL_STRATEGY = new CSVStrategy(',', '"', COMMENTS_DISABLED, ESCAPE_DISABLED, false,
false, false, false);
public static CSVStrategy TDF_STRATEGY = new CSVStrategy('\t', '"', COMMENTS_DISABLED, ESCAPE_DISABLED, true,
true, false, true);
public CSVStrategy(char delimiter, char encapsulator, char commentStart) {
this(delimiter, encapsulator, commentStart, true, false, true);
}
/**
* Customized CSV strategy setter.
*
* @param delimiter a Char used for value separation
* @param encapsulator a Char used as value encapsulation marker
* @param commentStart a Char used for comment identification
* @param ignoreLeadingWhitespace TRUE when leading whitespaces should be
* ignored
* @param interpretUnicodeEscapes TRUE when unicode escapes should be
* interpreted
* @param ignoreEmptyLines TRUE when the parser should skip emtpy lines
*/
public CSVStrategy(
char delimiter,
char encapsulator,
char commentStart,
char escape,
boolean ignoreLeadingWhitespace,
boolean ignoreTrailingWhitespace,
boolean interpretUnicodeEscapes,
boolean ignoreEmptyLines)
{
setDelimiter(delimiter);
setEncapsulator(encapsulator);
setCommentStart(commentStart);
setEscape(escape);
setIgnoreLeadingWhitespaces(ignoreLeadingWhitespace);
setIgnoreTrailingWhitespaces(ignoreTrailingWhitespace);
setUnicodeEscapeInterpretation(interpretUnicodeEscapes);
setIgnoreEmptyLines(ignoreEmptyLines);
}
/** @deprecated */
public CSVStrategy(
char delimiter,
char encapsulator,
char commentStart,
boolean ignoreLeadingWhitespace,
boolean interpretUnicodeEscapes,
boolean ignoreEmptyLines)
{
this(delimiter, encapsulator, commentStart, CSVStrategy.ESCAPE_DISABLED, ignoreLeadingWhitespace,
true, interpretUnicodeEscapes, ignoreEmptyLines);
}
public void setDelimiter(char delimiter) { this.delimiter = delimiter; }
public char getDelimiter() { return this.delimiter; }
public void setEncapsulator(char encapsulator) { this.encapsulator = encapsulator; }
public char getEncapsulator() { return this.encapsulator; }
public void setCommentStart(char commentStart) { this.commentStart = commentStart; }
public char getCommentStart() { return this.commentStart; }
public boolean isCommentingDisabled() { return this.commentStart == COMMENTS_DISABLED; }
public void setEscape(char escape) { this.escape = escape; }
public char getEscape() { return this.escape; }
public void setIgnoreLeadingWhitespaces(boolean ignoreLeadingWhitespaces) {
this.ignoreLeadingWhitespaces = ignoreLeadingWhitespaces;
}
public boolean getIgnoreLeadingWhitespaces() { return this.ignoreLeadingWhitespaces; }
public void setIgnoreTrailingWhitespaces(boolean ignoreTrailingWhitespaces) {
this.ignoreTrailingWhitespaces = ignoreTrailingWhitespaces;
}
public boolean getIgnoreTrailingWhitespaces() { return this.ignoreTrailingWhitespaces; }
public void setUnicodeEscapeInterpretation(boolean interpretUnicodeEscapes) {
this.interpretUnicodeEscapes = interpretUnicodeEscapes;
}
public boolean getUnicodeEscapeInterpretation() { return this.interpretUnicodeEscapes; }
public void setIgnoreEmptyLines(boolean ignoreEmptyLines) { this.ignoreEmptyLines = ignoreEmptyLines; }
public boolean getIgnoreEmptyLines() { return this.ignoreEmptyLines; }
public Object clone() {
try {
return super.clone();
} catch (CloneNotSupportedException e) {
throw new RuntimeException(e); // impossible
}
}
}
./src/java/org/apache/commons/csv/CSVParser.java 0000644 0001750 0001750 00000046720 11061274763 022346 0 ustar janpascal janpascal /*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.csv;
import java.io.IOException;
import java.io.Reader;
import java.io.InputStreamReader;
import java.io.InputStream;
import java.util.ArrayList;
/**
* Parses CSV files according to the specified configuration.
*
* Because CSV appears in many different dialects, the parser supports many
* configuration settings by allowing the specification of a {@link CSVStrategy}.
*
* Parsing of a csv-string having tabs as separators, * '"' as an optional value encapsulator, and comments starting with '#':
** String[][] data = * (new CSVParser(new StringReader("a\tb\nc\td"), new CSVStrategy('\t','"','#'))).getAllValues(); ** *
Parsing of a csv-string in Excel CSV format
** String[][] data = * (new CSVParser(new StringReader("a;b\nc;d"), CSVStrategy.EXCEL_STRATEGY)).getAllValues(); ** *
* Internal parser state is completely covered by the strategy * and the reader-state.
* *see package documentation * for more details
*/ public class CSVParser { /** length of the initial token (content-)buffer */ private static final int INITIAL_TOKEN_LENGTH = 50; // the token types /** Token has no valid content, i.e. is in its initilized state. */ protected static final int TT_INVALID = -1; /** Token with content, at beginning or in the middle of a line. */ protected static final int TT_TOKEN = 0; /** Token (which can have content) when end of file is reached. */ protected static final int TT_EOF = 1; /** Token with content when end of a line is reached. */ protected static final int TT_EORECORD = 2; /** Immutable empty String array. */ private static final String[] EMPTY_STRING_ARRAY = new String[0]; // the input stream private final ExtendedBufferedReader in; // TODO: this can be made final if setStrategy is removed private CSVStrategy strategy; // the following objects are shared to reduce garbage /** A record buffer for getLine(). Grows as necessary and is reused. */ private final ArrayList record = new ArrayList(); private final Token reusableToken = new Token(); private final CharBuffer wsBuf = new CharBuffer(); private final CharBuffer code = new CharBuffer(4); /** * Token is an internal token representation. * * It is used as contract between the lexer and the parser. */ static class Token { /** Token type, see TT_xxx constants. */ int type = TT_INVALID; /** The content buffer. */ CharBuffer content = new CharBuffer(INITIAL_TOKEN_LENGTH); /** Token ready flag: indicates a valid token with content (ready for the parser). */ boolean isReady; Token reset() { content.clear(); type = TT_INVALID; isReady = false; return this; } } // ====================================================== // the constructor // ====================================================== /** * Default strategy for the parser follows the default {@link CSVStrategy}. * * @param input an InputStream containing "csv-formatted" stream * @deprecated use {@link #CSVParser(Reader)}. */ public CSVParser(InputStream input) { this(new InputStreamReader(input)); } /** * CSV parser using the default {@link CSVStrategy}. * * @param input a Reader containing "csv-formatted" input */ public CSVParser(Reader input) { // note: must match default-CSV-strategy !! this(input, ','); } /** * Customized value delimiter parser. * * The parser follows the default {@link CSVStrategy} * except for the delimiter setting. * * @param input a Reader based on "csv-formatted" input * @param delimiter a Char used for value separation * @deprecated use {@link #CSVParser(Reader,CSVStrategy)}. */ public CSVParser(Reader input, char delimiter) { this(input, delimiter, '"', CSVStrategy.COMMENTS_DISABLED); } /** * Customized csv parser. * * The parser parses according to the given CSV dialect settings. * Leading whitespaces are truncated, unicode escapes are * not interpreted and empty lines are ignored. * * @param input a Reader based on "csv-formatted" input * @param delimiter a Char used for value separation * @param encapsulator a Char used as value encapsulation marker * @param commentStart a Char used for comment identification * @deprecated use {@link #CSVParser(Reader,CSVStrategy)}. */ public CSVParser(Reader input, char delimiter, char encapsulator, char commentStart) { this(input, new CSVStrategy(delimiter, encapsulator, commentStart)); } /** * Customized CSV parser using the given {@link CSVStrategy} * * @param input a Reader containing "csv-formatted" input * @param strategy the CSVStrategy used for CSV parsing */ public CSVParser(Reader input, CSVStrategy strategy) { this.in = new ExtendedBufferedReader(input); this.strategy = strategy; } // ====================================================== // the parser // ====================================================== /** * Parses the CSV according to the given strategy * and returns the content as an array of records * (whereas records are arrays of single values). *
* The returned content starts at the current parse-position in
* the stream.
*
* @return matrix of records x values ('null' when end of file)
* @throws IOException on parse error or input read-failure
*/
public String[][] getAllValues() throws IOException {
ArrayList records = new ArrayList();
String[] values;
String[][] ret = null;
while ((values = getLine()) != null) {
records.add(values);
}
if (records.size() > 0) {
ret = new String[records.size()][];
records.toArray(ret);
}
return ret;
}
/**
* Parses the CSV according to the given strategy
* and returns the next csv-value as string.
*
* @return next value in the input stream ('null' when end of file)
* @throws IOException on parse error or input read-failure
*/
public String nextValue() throws IOException {
Token tkn = nextToken();
String ret = null;
switch (tkn.type) {
case TT_TOKEN:
case TT_EORECORD:
ret = tkn.content.toString();
break;
case TT_EOF:
ret = null;
break;
case TT_INVALID:
default:
// error no token available (or error)
throw new IOException(
"(line " + getLineNumber()
+ ") invalid parse sequence");
// unreachable: break;
}
return ret;
}
/**
* Parses from the current point in the stream til
* the end of the current line.
*
* @return array of values til end of line
* ('null' when end of file has been reached)
* @throws IOException on parse error or input read-failure
*/
public String[] getLine() throws IOException {
String[] ret = EMPTY_STRING_ARRAY;
record.clear();
while (true) {
reusableToken.reset();
nextToken(reusableToken);
switch (reusableToken.type) {
case TT_TOKEN:
record.add(reusableToken.content.toString());
break;
case TT_EORECORD:
record.add(reusableToken.content.toString());
break;
case TT_EOF:
if (reusableToken.isReady) {
record.add(reusableToken.content.toString());
} else {
ret = null;
}
break;
case TT_INVALID:
default:
// error: throw IOException
throw new IOException("(line " + getLineNumber() + ") invalid parse sequence");
// unreachable: break;
}
if (reusableToken.type != TT_TOKEN) {
break;
}
}
if (!record.isEmpty()) {
ret = (String[]) record.toArray(new String[record.size()]);
}
return ret;
}
/**
* Returns the current line number in the input stream.
*
* ATTENTION: in case your csv has multiline-values the returned
* number does not correspond to the record-number
*
* @return current line number
*/
public int getLineNumber() {
return in.getLineNumber();
}
// ======================================================
// the lexer(s)
// ======================================================
/**
* Convenience method for nextToken(null)
.
*/
protected Token nextToken() throws IOException {
return nextToken(new Token());
}
/**
* Returns the next token.
*
* A token corresponds to a term, a record change or an
* end-of-file indicator.
*
* @param tkn an existing Token object to reuse. The caller is responsible to initialize the
* Token.
* @return the next token found
* @throws IOException on stream access error
*/
protected Token nextToken(Token tkn) throws IOException {
wsBuf.clear(); // resuse
// get the last read char (required for empty line detection)
int lastChar = in.readAgain();
// read the next char and set eol
/* note: unfourtunately isEndOfLine may consumes a character silently.
* this has no effect outside of the method. so a simple workaround
* is to call 'readAgain' on the stream...
* uh: might using objects instead of base-types (jdk1.5 autoboxing!)
*/
int c = in.read();
boolean eol = isEndOfLine(c);
c = in.readAgain();
// empty line detection: eol AND (last char was EOL or beginning)
while (strategy.getIgnoreEmptyLines() && eol
&& (lastChar == '\n'
|| lastChar == ExtendedBufferedReader.UNDEFINED)
&& !isEndOfFile(lastChar)) {
// go on char ahead ...
lastChar = c;
c = in.read();
eol = isEndOfLine(c);
c = in.readAgain();
// reached end of file without any content (empty line at the end)
if (isEndOfFile(c)) {
tkn.type = TT_EOF;
return tkn;
}
}
// did we reached eof during the last iteration already ? TT_EOF
if (isEndOfFile(lastChar) || (lastChar != strategy.getDelimiter() && isEndOfFile(c))) {
tkn.type = TT_EOF;
return tkn;
}
// important: make sure a new char gets consumed in each iteration
while (!tkn.isReady) {
// ignore whitespaces at beginning of a token
while (isWhitespace(c) && !eol) {
wsBuf.append((char) c);
c = in.read();
eol = isEndOfLine(c);
}
// ok, start of token reached: comment, encapsulated, or token
if (c == strategy.getCommentStart()) {
// ignore everything till end of line and continue (incr linecount)
in.readLine();
tkn = nextToken(tkn.reset());
} else if (c == strategy.getDelimiter()) {
// empty token return TT_TOKEN("")
tkn.type = TT_TOKEN;
tkn.isReady = true;
} else if (eol) {
// empty token return TT_EORECORD("")
//noop: tkn.content.append("");
tkn.type = TT_EORECORD;
tkn.isReady = true;
} else if (c == strategy.getEncapsulator()) {
// consume encapsulated token
encapsulatedTokenLexer(tkn, c);
} else if (isEndOfFile(c)) {
// end of file return TT_EOF()
//noop: tkn.content.append("");
tkn.type = TT_EOF;
tkn.isReady = true;
} else {
// next token must be a simple token
// add removed blanks when not ignoring whitespace chars...
if (!strategy.getIgnoreLeadingWhitespaces()) {
tkn.content.append(wsBuf);
}
simpleTokenLexer(tkn, c);
}
}
return tkn;
}
/**
* A simple token lexer
*
* Simple token are tokens which are not surrounded by encapsulators.
* A simple token might contain escaped delimiters (as \, or \;). The
* token is finished when one of the following conditions become true:
*
There are currently no official downloads, and will not be until CSV moves out of the Sandbox.
Nightly Builds are built once a day from the current SVN HEAD. This is (nearly) the latest code and so should be treated with caution.
Apache Commons CSV is hosted on the Apache subversion repository.
The project URL is:
http://svn.apache.org/repos/asf/commons/sandbox/csv/trunk
The best way to view the repository is via the subversion viewer.
The alternative is to use the native subversion display.
For more information on subversion and creating patches see the Apache Contributors Guide.