001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.csv; 019 020import static org.apache.commons.csv.Constants.BACKSLASH; 021import static org.apache.commons.csv.Constants.COMMA; 022import static org.apache.commons.csv.Constants.COMMENT; 023import static org.apache.commons.csv.Constants.EMPTY; 024import static org.apache.commons.csv.Constants.CR; 025import static org.apache.commons.csv.Constants.CRLF; 026import static org.apache.commons.csv.Constants.DOUBLE_QUOTE_CHAR; 027import static org.apache.commons.csv.Constants.LF; 028import static org.apache.commons.csv.Constants.PIPE; 029import static org.apache.commons.csv.Constants.SP; 030import static org.apache.commons.csv.Constants.TAB; 031 032import java.io.File; 033import java.io.FileOutputStream; 034import java.io.IOException; 035import java.io.OutputStreamWriter; 036import java.io.Reader; 037import java.io.Serializable; 038import java.io.StringWriter; 039import java.nio.charset.Charset; 040import java.nio.file.Files; 041import java.nio.file.Path; 042import java.sql.ResultSet; 043import java.sql.ResultSetMetaData; 044import java.sql.SQLException; 045import java.util.Arrays; 046import java.util.HashSet; 047import java.util.Set; 048 049/** 050 * Specifies the format of a CSV file and parses input. 051 * 052 * <h2>Using predefined formats</h2> 053 * 054 * <p> 055 * You can use one of the predefined formats: 056 * </p> 057 * 058 * <ul> 059 * <li>{@link #DEFAULT}</li> 060 * <li>{@link #EXCEL}</li> 061 * <li>{@link #MYSQL}</li> 062 * <li>{@link #RFC4180}</li> 063 * <li>{@link #TDF}</li> 064 * </ul> 065 * 066 * <p> 067 * For example: 068 * </p> 069 * 070 * <pre> 071 * CSVParser parser = CSVFormat.EXCEL.parse(reader); 072 * </pre> 073 * 074 * <p> 075 * The {@link CSVParser} provides static methods to parse other input types, for example: 076 * </p> 077 * 078 * <pre> 079 * CSVParser parser = CSVParser.parse(file, StandardCharsets.US_ASCII, CSVFormat.EXCEL); 080 * </pre> 081 * 082 * <h2>Defining formats</h2> 083 * 084 * <p> 085 * You can extend a format by calling the {@code with} methods. For example: 086 * </p> 087 * 088 * <pre> 089 * CSVFormat.EXCEL.withNullString("N/A").withIgnoreSurroundingSpaces(true); 090 * </pre> 091 * 092 * <h2>Defining column names</h2> 093 * 094 * <p> 095 * To define the column names you want to use to access records, write: 096 * </p> 097 * 098 * <pre> 099 * CSVFormat.EXCEL.withHeader("Col1", "Col2", "Col3"); 100 * </pre> 101 * 102 * <p> 103 * Calling {@link #withHeader(String...)} let's you use the given names to address values in a {@link CSVRecord}, and 104 * assumes that your CSV source does not contain a first record that also defines column names. 105 * 106 * If it does, then you are overriding this metadata with your names and you should skip the first record by calling 107 * {@link #withSkipHeaderRecord(boolean)} with {@code true}. 108 * </p> 109 * 110 * <h2>Parsing</h2> 111 * 112 * <p> 113 * You can use a format directly to parse a reader. For example, to parse an Excel file with columns header, write: 114 * </p> 115 * 116 * <pre> 117 * Reader in = ...; 118 * CSVFormat.EXCEL.withHeader("Col1", "Col2", "Col3").parse(in); 119 * </pre> 120 * 121 * <p> 122 * For other input types, like resources, files, and URLs, use the static methods on {@link CSVParser}. 123 * </p> 124 * 125 * <h2>Referencing columns safely</h2> 126 * 127 * <p> 128 * If your source contains a header record, you can simplify your code and safely reference columns, by using 129 * {@link #withHeader(String...)} with no arguments: 130 * </p> 131 * 132 * <pre> 133 * CSVFormat.EXCEL.withHeader(); 134 * </pre> 135 * 136 * <p> 137 * This causes the parser to read the first record and use its values as column names. 138 * 139 * Then, call one of the {@link CSVRecord} get method that takes a String column name argument: 140 * </p> 141 * 142 * <pre> 143 * String value = record.get("Col1"); 144 * </pre> 145 * 146 * <p> 147 * This makes your code impervious to changes in column order in the CSV file. 148 * </p> 149 * 150 * <h2>Notes</h2> 151 * 152 * <p> 153 * This class is immutable. 154 * </p> 155 */ 156public final class CSVFormat implements Serializable { 157 158 /** 159 * Predefines formats. 160 * 161 * @since 1.2 162 */ 163 public enum Predefined { 164 165 /** 166 * @see CSVFormat#DEFAULT 167 */ 168 Default(CSVFormat.DEFAULT), 169 170 /** 171 * @see CSVFormat#EXCEL 172 */ 173 Excel(CSVFormat.EXCEL), 174 175 /** 176 * @see CSVFormat#INFORMIX_UNLOAD 177 * @since 1.3 178 */ 179 InformixUnload(CSVFormat.INFORMIX_UNLOAD), 180 181 /** 182 * @see CSVFormat#INFORMIX_UNLOAD_CSV 183 * @since 1.3 184 */ 185 InformixUnloadCsv(CSVFormat.INFORMIX_UNLOAD_CSV), 186 187 /** 188 * @see CSVFormat#MYSQL 189 */ 190 MySQL(CSVFormat.MYSQL), 191 192 /** 193 * @see CSVFormat#POSTGRESQL_CSV 194 * @since 1.5 195 */ 196 PostgreSQLCsv(CSVFormat.POSTGRESQL_CSV), 197 198 /** 199 * @see CSVFormat#POSTGRESQL_CSV 200 */ 201 PostgreSQLText(CSVFormat.POSTGRESQL_TEXT), 202 203 /** 204 * @see CSVFormat#RFC4180 205 */ 206 RFC4180(CSVFormat.RFC4180), 207 208 /** 209 * @see CSVFormat#TDF 210 */ 211 TDF(CSVFormat.TDF); 212 213 private final CSVFormat format; 214 215 Predefined(final CSVFormat format) { 216 this.format = format; 217 } 218 219 /** 220 * Gets the format. 221 * 222 * @return the format. 223 */ 224 public CSVFormat getFormat() { 225 return format; 226 } 227 } 228 229 /** 230 * Standard comma separated format, as for {@link #RFC4180} but allowing empty lines. 231 * 232 * <p> 233 * Settings are: 234 * </p> 235 * <ul> 236 * <li>withDelimiter(',')</li> 237 * <li>withQuote('"')</li> 238 * <li>withRecordSeparator("\r\n")</li> 239 * <li>withIgnoreEmptyLines(true)</li> 240 * </ul> 241 * 242 * @see Predefined#Default 243 */ 244 public static final CSVFormat DEFAULT = new CSVFormat(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF, 245 null, null, null, false, false, false, false, false); 246 247 /** 248 * Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is 249 * locale dependent, it might be necessary to customize this format to accommodate to your regional settings. 250 * 251 * <p> 252 * For example for parsing or generating a CSV file on a French system the following format will be used: 253 * </p> 254 * 255 * <pre> 256 * CSVFormat fmt = CSVFormat.EXCEL.withDelimiter(';'); 257 * </pre> 258 * 259 * <p> 260 * Settings are: 261 * </p> 262 * <ul> 263 * <li>{@link #withDelimiter(char) withDelimiter(',')}</li> 264 * <li>{@link #withQuote(char) withQuote('"')}</li> 265 * <li>{@link #withRecordSeparator(String) withRecordSeparator("\r\n")}</li> 266 * <li>{@link #withIgnoreEmptyLines(boolean) withIgnoreEmptyLines(false)}</li> 267 * <li>{@link #withAllowMissingColumnNames(boolean) withAllowMissingColumnNames(true)}</li> 268 * </ul> 269 * <p> 270 * Note: this is currently like {@link #RFC4180} plus {@link #withAllowMissingColumnNames(boolean) 271 * withAllowMissingColumnNames(true)}. 272 * </p> 273 * 274 * @see Predefined#Excel 275 */ 276 // @formatter:off 277 public static final CSVFormat EXCEL = DEFAULT 278 .withIgnoreEmptyLines(false) 279 .withAllowMissingColumnNames(); 280 // @formatter:on 281 282 /** 283 * Default Informix CSV UNLOAD format used by the {@code UNLOAD TO file_name} operation. 284 * 285 * <p> 286 * This is a comma-delimited format with a LF character as the line separator. Values are not quoted and special 287 * characters are escaped with {@code '\'}. The default NULL string is {@code "\\N"}. 288 * </p> 289 * 290 * <p> 291 * Settings are: 292 * </p> 293 * <ul> 294 * <li>withDelimiter(',')</li> 295 * <li>withQuote("\"")</li> 296 * <li>withRecordSeparator('\n')</li> 297 * <li>withEscape('\\')</li> 298 * </ul> 299 * 300 * @see Predefined#MySQL 301 * @see <a href= 302 * "http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm"> 303 * http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm</a> 304 * @since 1.3 305 */ 306 // @formatter:off 307 public static final CSVFormat INFORMIX_UNLOAD = DEFAULT 308 .withDelimiter(PIPE) 309 .withEscape(BACKSLASH) 310 .withQuote(DOUBLE_QUOTE_CHAR) 311 .withRecordSeparator(LF); 312 // @formatter:on 313 314 /** 315 * Default Informix CSV UNLOAD format used by the {@code UNLOAD TO file_name} operation (escaping is disabled.) 316 * 317 * <p> 318 * This is a comma-delimited format with a LF character as the line separator. Values are not quoted and special 319 * characters are escaped with {@code '\'}. The default NULL string is {@code "\\N"}. 320 * </p> 321 * 322 * <p> 323 * Settings are: 324 * </p> 325 * <ul> 326 * <li>withDelimiter(',')</li> 327 * <li>withQuote("\"")</li> 328 * <li>withRecordSeparator('\n')</li> 329 * </ul> 330 * 331 * @see Predefined#MySQL 332 * @see <a href= 333 * "http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm"> 334 * http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm</a> 335 * @since 1.3 336 */ 337 // @formatter:off 338 public static final CSVFormat INFORMIX_UNLOAD_CSV = DEFAULT 339 .withDelimiter(COMMA) 340 .withQuote(DOUBLE_QUOTE_CHAR) 341 .withRecordSeparator(LF); 342 // @formatter:on 343 344 /** 345 * Default MySQL format used by the {@code SELECT INTO OUTFILE} and {@code LOAD DATA INFILE} operations. 346 * 347 * <p> 348 * This is a tab-delimited format with a LF character as the line separator. Values are not quoted and special 349 * characters are escaped with {@code '\'}. The default NULL string is {@code "\\N"}. 350 * </p> 351 * 352 * <p> 353 * Settings are: 354 * </p> 355 * <ul> 356 * <li>withDelimiter('\t')</li> 357 * <li>withQuote(null)</li> 358 * <li>withRecordSeparator('\n')</li> 359 * <li>withIgnoreEmptyLines(false)</li> 360 * <li>withEscape('\\')</li> 361 * <li>withNullString("\\N")</li> 362 * <li>withQuoteMode(QuoteMode.ALL_NON_NULL)</li> 363 * </ul> 364 * 365 * @see Predefined#MySQL 366 * @see <a href="http://dev.mysql.com/doc/refman/5.1/en/load-data.html"> http://dev.mysql.com/doc/refman/5.1/en/load 367 * -data.html</a> 368 */ 369 // @formatter:off 370 public static final CSVFormat MYSQL = DEFAULT 371 .withDelimiter(TAB) 372 .withEscape(BACKSLASH) 373 .withIgnoreEmptyLines(false) 374 .withQuote(null) 375 .withRecordSeparator(LF) 376 .withNullString("\\N") 377 .withQuoteMode(QuoteMode.ALL_NON_NULL); 378 // @formatter:off 379 380 /** 381 * Default PostgreSQL CSV format used by the {@code COPY} operation. 382 * 383 * <p> 384 * This is a comma-delimited format with a LF character as the line separator. Values are double quoted and special 385 * characters are escaped with {@code '"'}. The default NULL string is {@code ""}. 386 * </p> 387 * 388 * <p> 389 * Settings are: 390 * </p> 391 * <ul> 392 * <li>withDelimiter(',')</li> 393 * <li>withQuote('"')</li> 394 * <li>withRecordSeparator('\n')</li> 395 * <li>withIgnoreEmptyLines(false)</li> 396 * <li>withEscape('\\')</li> 397 * <li>withNullString("")</li> 398 * <li>withQuoteMode(QuoteMode.ALL_NON_NULL)</li> 399 * </ul> 400 * 401 * @see Predefined#MySQL 402 * @see <a href="http://dev.mysql.com/doc/refman/5.1/en/load-data.html"> http://dev.mysql.com/doc/refman/5.1/en/load 403 * -data.html</a> 404 * @since 1.5 405 */ 406 // @formatter:off 407 public static final CSVFormat POSTGRESQL_CSV = DEFAULT 408 .withDelimiter(COMMA) 409 .withEscape(DOUBLE_QUOTE_CHAR) 410 .withIgnoreEmptyLines(false) 411 .withQuote(DOUBLE_QUOTE_CHAR) 412 .withRecordSeparator(LF) 413 .withNullString(EMPTY) 414 .withQuoteMode(QuoteMode.ALL_NON_NULL); 415 // @formatter:off 416 417 /** 418 * Default PostgreSQL text format used by the {@code COPY} operation. 419 * 420 * <p> 421 * This is a tab-delimited format with a LF character as the line separator. Values are double quoted and special 422 * characters are escaped with {@code '"'}. The default NULL string is {@code "\\N"}. 423 * </p> 424 * 425 * <p> 426 * Settings are: 427 * </p> 428 * <ul> 429 * <li>withDelimiter('\t')</li> 430 * <li>withQuote('"')</li> 431 * <li>withRecordSeparator('\n')</li> 432 * <li>withIgnoreEmptyLines(false)</li> 433 * <li>withEscape('\\')</li> 434 * <li>withNullString("\\N")</li> 435 * <li>withQuoteMode(QuoteMode.ALL_NON_NULL)</li> 436 * </ul> 437 * 438 * @see Predefined#MySQL 439 * @see <a href="http://dev.mysql.com/doc/refman/5.1/en/load-data.html"> http://dev.mysql.com/doc/refman/5.1/en/load 440 * -data.html</a> 441 * @since 1.5 442 */ 443 // @formatter:off 444 public static final CSVFormat POSTGRESQL_TEXT = DEFAULT 445 .withDelimiter(TAB) 446 .withEscape(DOUBLE_QUOTE_CHAR) 447 .withIgnoreEmptyLines(false) 448 .withQuote(DOUBLE_QUOTE_CHAR) 449 .withRecordSeparator(LF) 450 .withNullString("\\N") 451 .withQuoteMode(QuoteMode.ALL_NON_NULL); 452 // @formatter:off 453 454 /** 455 * Comma separated format as defined by <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. 456 * 457 * <p> 458 * Settings are: 459 * </p> 460 * <ul> 461 * <li>withDelimiter(',')</li> 462 * <li>withQuote('"')</li> 463 * <li>withRecordSeparator("\r\n")</li> 464 * <li>withIgnoreEmptyLines(false)</li> 465 * </ul> 466 * 467 * @see Predefined#RFC4180 468 */ 469 public static final CSVFormat RFC4180 = DEFAULT.withIgnoreEmptyLines(false); 470 471 private static final long serialVersionUID = 1L; 472 473 /** 474 * Tab-delimited format. 475 * 476 * <p> 477 * Settings are: 478 * </p> 479 * <ul> 480 * <li>withDelimiter('\t')</li> 481 * <li>withQuote('"')</li> 482 * <li>withRecordSeparator("\r\n")</li> 483 * <li>withIgnoreSurroundingSpaces(true)</li> 484 * </ul> 485 * 486 * @see Predefined#TDF 487 */ 488 // @formatter:off 489 public static final CSVFormat TDF = DEFAULT 490 .withDelimiter(TAB) 491 .withIgnoreSurroundingSpaces(); 492 // @formatter:on 493 494 /** 495 * Returns true if the given character is a line break character. 496 * 497 * @param c 498 * the character to check 499 * 500 * @return true if <code>c</code> is a line break character 501 */ 502 private static boolean isLineBreak(final char c) { 503 return c == LF || c == CR; 504 } 505 506 /** 507 * Returns true if the given character is a line break character. 508 * 509 * @param c 510 * the character to check, may be null 511 * 512 * @return true if <code>c</code> is a line break character (and not null) 513 */ 514 private static boolean isLineBreak(final Character c) { 515 return c != null && isLineBreak(c.charValue()); 516 } 517 518 /** 519 * Creates a new CSV format with the specified delimiter. 520 * 521 * <p> 522 * Use this method if you want to create a CSVFormat from scratch. All fields but the delimiter will be initialized 523 * with null/false. 524 * </p> 525 * 526 * @param delimiter 527 * the char used for value separation, must not be a line break character 528 * @return a new CSV format. 529 * @throws IllegalArgumentException 530 * if the delimiter is a line break character 531 * 532 * @see #DEFAULT 533 * @see #RFC4180 534 * @see #MYSQL 535 * @see #EXCEL 536 * @see #TDF 537 */ 538 public static CSVFormat newFormat(final char delimiter) { 539 return new CSVFormat(delimiter, null, null, null, null, false, false, null, null, null, null, false, false, 540 false, false, false); 541 } 542 543 /** 544 * Gets one of the predefined formats from {@link CSVFormat.Predefined}. 545 * 546 * @param format 547 * name 548 * @return one of the predefined formats 549 * @since 1.2 550 */ 551 public static CSVFormat valueOf(final String format) { 552 return CSVFormat.Predefined.valueOf(format).getFormat(); 553 } 554 555 private final boolean allowMissingColumnNames; 556 557 private final Character commentMarker; // null if commenting is disabled 558 559 private final char delimiter; 560 561 private final Character escapeCharacter; // null if escaping is disabled 562 563 private final String[] header; // array of header column names 564 565 private final String[] headerComments; // array of header comment lines 566 567 private final boolean ignoreEmptyLines; 568 569 private final boolean ignoreHeaderCase; // should ignore header names case 570 571 private final boolean ignoreSurroundingSpaces; // Should leading/trailing spaces be ignored around values? 572 573 private final String nullString; // the string to be used for null values 574 575 private final Character quoteCharacter; // null if quoting is disabled 576 577 private final QuoteMode quoteMode; 578 579 private final String recordSeparator; // for outputs 580 581 private final boolean skipHeaderRecord; 582 583 private final boolean trailingDelimiter; 584 585 private final boolean trim; 586 587 /** 588 * Creates a customized CSV format. 589 * 590 * @param delimiter 591 * the char used for value separation, must not be a line break character 592 * @param quoteChar 593 * the Character used as value encapsulation marker, may be {@code null} to disable 594 * @param quoteMode 595 * the quote mode 596 * @param commentStart 597 * the Character used for comment identification, may be {@code null} to disable 598 * @param escape 599 * the Character used to escape special characters in values, may be {@code null} to disable 600 * @param ignoreSurroundingSpaces 601 * {@code true} when whitespaces enclosing values should be ignored 602 * @param ignoreEmptyLines 603 * {@code true} when the parser should skip empty lines 604 * @param recordSeparator 605 * the line separator to use for output 606 * @param nullString 607 * the line separator to use for output 608 * @param headerComments 609 * the comments to be printed by the Printer before the actual CSV data 610 * @param header 611 * the header 612 * @param skipHeaderRecord 613 * TODO 614 * @param allowMissingColumnNames 615 * TODO 616 * @param ignoreHeaderCase 617 * TODO 618 * @param trim 619 * TODO 620 * @param trailingDelimiter 621 * TODO 622 * @throws IllegalArgumentException 623 * if the delimiter is a line break character 624 */ 625 private CSVFormat(final char delimiter, final Character quoteChar, final QuoteMode quoteMode, 626 final Character commentStart, final Character escape, final boolean ignoreSurroundingSpaces, 627 final boolean ignoreEmptyLines, final String recordSeparator, final String nullString, 628 final Object[] headerComments, final String[] header, final boolean skipHeaderRecord, 629 final boolean allowMissingColumnNames, final boolean ignoreHeaderCase, final boolean trim, 630 final boolean trailingDelimiter) { 631 this.delimiter = delimiter; 632 this.quoteCharacter = quoteChar; 633 this.quoteMode = quoteMode; 634 this.commentMarker = commentStart; 635 this.escapeCharacter = escape; 636 this.ignoreSurroundingSpaces = ignoreSurroundingSpaces; 637 this.allowMissingColumnNames = allowMissingColumnNames; 638 this.ignoreEmptyLines = ignoreEmptyLines; 639 this.recordSeparator = recordSeparator; 640 this.nullString = nullString; 641 this.headerComments = toStringArray(headerComments); 642 this.header = header == null ? null : header.clone(); 643 this.skipHeaderRecord = skipHeaderRecord; 644 this.ignoreHeaderCase = ignoreHeaderCase; 645 this.trailingDelimiter = trailingDelimiter; 646 this.trim = trim; 647 validate(); 648 } 649 650 @Override 651 public boolean equals(final Object obj) { 652 if (this == obj) { 653 return true; 654 } 655 if (obj == null) { 656 return false; 657 } 658 if (getClass() != obj.getClass()) { 659 return false; 660 } 661 662 final CSVFormat other = (CSVFormat) obj; 663 if (delimiter != other.delimiter) { 664 return false; 665 } 666 if (quoteMode != other.quoteMode) { 667 return false; 668 } 669 if (quoteCharacter == null) { 670 if (other.quoteCharacter != null) { 671 return false; 672 } 673 } else if (!quoteCharacter.equals(other.quoteCharacter)) { 674 return false; 675 } 676 if (commentMarker == null) { 677 if (other.commentMarker != null) { 678 return false; 679 } 680 } else if (!commentMarker.equals(other.commentMarker)) { 681 return false; 682 } 683 if (escapeCharacter == null) { 684 if (other.escapeCharacter != null) { 685 return false; 686 } 687 } else if (!escapeCharacter.equals(other.escapeCharacter)) { 688 return false; 689 } 690 if (nullString == null) { 691 if (other.nullString != null) { 692 return false; 693 } 694 } else if (!nullString.equals(other.nullString)) { 695 return false; 696 } 697 if (!Arrays.equals(header, other.header)) { 698 return false; 699 } 700 if (ignoreSurroundingSpaces != other.ignoreSurroundingSpaces) { 701 return false; 702 } 703 if (ignoreEmptyLines != other.ignoreEmptyLines) { 704 return false; 705 } 706 if (skipHeaderRecord != other.skipHeaderRecord) { 707 return false; 708 } 709 if (recordSeparator == null) { 710 if (other.recordSeparator != null) { 711 return false; 712 } 713 } else if (!recordSeparator.equals(other.recordSeparator)) { 714 return false; 715 } 716 return true; 717 } 718 719 /** 720 * Formats the specified values. 721 * 722 * @param values 723 * the values to format 724 * @return the formatted values 725 */ 726 public String format(final Object... values) { 727 final StringWriter out = new StringWriter(); 728 try (final CSVPrinter csvPrinter = new CSVPrinter(out, this)) { 729 csvPrinter.printRecord(values); 730 return out.toString().trim(); 731 } catch (final IOException e) { 732 // should not happen because a StringWriter does not do IO. 733 throw new IllegalStateException(e); 734 } 735 } 736 737 /** 738 * Specifies whether missing column names are allowed when parsing the header line. 739 * 740 * @return {@code true} if missing column names are allowed when parsing the header line, {@code false} to throw an 741 * {@link IllegalArgumentException}. 742 */ 743 public boolean getAllowMissingColumnNames() { 744 return allowMissingColumnNames; 745 } 746 747 /** 748 * Returns the character marking the start of a line comment. 749 * 750 * @return the comment start marker, may be {@code null} 751 */ 752 public Character getCommentMarker() { 753 return commentMarker; 754 } 755 756 /** 757 * Returns the character delimiting the values (typically ';', ',' or '\t'). 758 * 759 * @return the delimiter character 760 */ 761 public char getDelimiter() { 762 return delimiter; 763 } 764 765 /** 766 * Returns the escape character. 767 * 768 * @return the escape character, may be {@code null} 769 */ 770 public Character getEscapeCharacter() { 771 return escapeCharacter; 772 } 773 774 /** 775 * Returns a copy of the header array. 776 * 777 * @return a copy of the header array; {@code null} if disabled, the empty array if to be read from the file 778 */ 779 public String[] getHeader() { 780 return header != null ? header.clone() : null; 781 } 782 783 /** 784 * Returns a copy of the header comment array. 785 * 786 * @return a copy of the header comment array; {@code null} if disabled. 787 */ 788 public String[] getHeaderComments() { 789 return headerComments != null ? headerComments.clone() : null; 790 } 791 792 /** 793 * Specifies whether empty lines between records are ignored when parsing input. 794 * 795 * @return {@code true} if empty lines between records are ignored, {@code false} if they are turned into empty 796 * records. 797 */ 798 public boolean getIgnoreEmptyLines() { 799 return ignoreEmptyLines; 800 } 801 802 /** 803 * Specifies whether header names will be accessed ignoring case. 804 * 805 * @return {@code true} if header names cases are ignored, {@code false} if they are case sensitive. 806 * @since 1.3 807 */ 808 public boolean getIgnoreHeaderCase() { 809 return ignoreHeaderCase; 810 } 811 812 /** 813 * Specifies whether spaces around values are ignored when parsing input. 814 * 815 * @return {@code true} if spaces around values are ignored, {@code false} if they are treated as part of the value. 816 */ 817 public boolean getIgnoreSurroundingSpaces() { 818 return ignoreSurroundingSpaces; 819 } 820 821 /** 822 * Gets the String to convert to and from {@code null}. 823 * <ul> 824 * <li><strong>Reading:</strong> Converts strings equal to the given {@code nullString} to {@code null} when reading 825 * records.</li> 826 * <li><strong>Writing:</strong> Writes {@code null} as the given {@code nullString} when writing records.</li> 827 * </ul> 828 * 829 * @return the String to convert to and from {@code null}. No substitution occurs if {@code null} 830 */ 831 public String getNullString() { 832 return nullString; 833 } 834 835 /** 836 * Returns the character used to encapsulate values containing special characters. 837 * 838 * @return the quoteChar character, may be {@code null} 839 */ 840 public Character getQuoteCharacter() { 841 return quoteCharacter; 842 } 843 844 /** 845 * Returns the quote policy output fields. 846 * 847 * @return the quote policy 848 */ 849 public QuoteMode getQuoteMode() { 850 return quoteMode; 851 } 852 853 /** 854 * Returns the record separator delimiting output records. 855 * 856 * @return the record separator 857 */ 858 public String getRecordSeparator() { 859 return recordSeparator; 860 } 861 862 /** 863 * Returns whether to skip the header record. 864 * 865 * @return whether to skip the header record. 866 */ 867 public boolean getSkipHeaderRecord() { 868 return skipHeaderRecord; 869 } 870 871 /** 872 * Returns whether to add a trailing delimiter. 873 * 874 * @return whether to add a trailing delimiter. 875 * @since 1.3 876 */ 877 public boolean getTrailingDelimiter() { 878 return trailingDelimiter; 879 } 880 881 /** 882 * Returns whether to trim leading and trailing blanks. 883 * 884 * @return whether to trim leading and trailing blanks. 885 */ 886 public boolean getTrim() { 887 return trim; 888 } 889 890 @Override 891 public int hashCode() { 892 final int prime = 31; 893 int result = 1; 894 895 result = prime * result + delimiter; 896 result = prime * result + ((quoteMode == null) ? 0 : quoteMode.hashCode()); 897 result = prime * result + ((quoteCharacter == null) ? 0 : quoteCharacter.hashCode()); 898 result = prime * result + ((commentMarker == null) ? 0 : commentMarker.hashCode()); 899 result = prime * result + ((escapeCharacter == null) ? 0 : escapeCharacter.hashCode()); 900 result = prime * result + ((nullString == null) ? 0 : nullString.hashCode()); 901 result = prime * result + (ignoreSurroundingSpaces ? 1231 : 1237); 902 result = prime * result + (ignoreHeaderCase ? 1231 : 1237); 903 result = prime * result + (ignoreEmptyLines ? 1231 : 1237); 904 result = prime * result + (skipHeaderRecord ? 1231 : 1237); 905 result = prime * result + ((recordSeparator == null) ? 0 : recordSeparator.hashCode()); 906 result = prime * result + Arrays.hashCode(header); 907 return result; 908 } 909 910 /** 911 * Specifies whether comments are supported by this format. 912 * 913 * Note that the comment introducer character is only recognized at the start of a line. 914 * 915 * @return {@code true} is comments are supported, {@code false} otherwise 916 */ 917 public boolean isCommentMarkerSet() { 918 return commentMarker != null; 919 } 920 921 /** 922 * Returns whether escape are being processed. 923 * 924 * @return {@code true} if escapes are processed 925 */ 926 public boolean isEscapeCharacterSet() { 927 return escapeCharacter != null; 928 } 929 930 /** 931 * Returns whether a nullString has been defined. 932 * 933 * @return {@code true} if a nullString is defined 934 */ 935 public boolean isNullStringSet() { 936 return nullString != null; 937 } 938 939 /** 940 * Returns whether a quoteChar has been defined. 941 * 942 * @return {@code true} if a quoteChar is defined 943 */ 944 public boolean isQuoteCharacterSet() { 945 return quoteCharacter != null; 946 } 947 948 /** 949 * Parses the specified content. 950 * 951 * <p> 952 * See also the various static parse methods on {@link CSVParser}. 953 * </p> 954 * 955 * @param in 956 * the input stream 957 * @return a parser over a stream of {@link CSVRecord}s. 958 * @throws IOException 959 * If an I/O error occurs 960 */ 961 public CSVParser parse(final Reader in) throws IOException { 962 return new CSVParser(in, this); 963 } 964 965 /** 966 * Prints to the specified output. 967 * 968 * <p> 969 * See also {@link CSVPrinter}. 970 * </p> 971 * 972 * @param out 973 * the output. 974 * @return a printer to an output. 975 * @throws IOException 976 * thrown if the optional header cannot be printed. 977 */ 978 public CSVPrinter print(final Appendable out) throws IOException { 979 return new CSVPrinter(out, this); 980 } 981 982 /** 983 * Prints to the {@link System#out}. 984 * 985 * <p> 986 * See also {@link CSVPrinter}. 987 * </p> 988 * 989 * @return a printer to {@link System#out}. 990 * @throws IOException 991 * thrown if the optional header cannot be printed. 992 * @since 1.5 993 */ 994 public CSVPrinter printer() throws IOException { 995 return new CSVPrinter(System.out, this); 996 } 997 998 /** 999 * Prints to the specified output. 1000 * 1001 * <p> 1002 * See also {@link CSVPrinter}. 1003 * </p> 1004 * 1005 * @param out 1006 * the output. 1007 * @param charset 1008 * A charset. 1009 * @return a printer to an output. 1010 * @throws IOException 1011 * thrown if the optional header cannot be printed. 1012 * @since 1.5 1013 */ 1014 @SuppressWarnings("resource") 1015 public CSVPrinter print(final File out, Charset charset) throws IOException { 1016 // The writer will be closed when close() is called. 1017 return new CSVPrinter(new OutputStreamWriter(new FileOutputStream(out), charset), this); 1018 } 1019 1020 /** 1021 * Prints to the specified output. 1022 * 1023 * <p> 1024 * See also {@link CSVPrinter}. 1025 * </p> 1026 * 1027 * @param out 1028 * the output. 1029 * @param charset 1030 * A charset. 1031 * @return a printer to an output. 1032 * @throws IOException 1033 * thrown if the optional header cannot be printed. 1034 * @since 1.5 1035 */ 1036 public CSVPrinter print(final Path out, Charset charset) throws IOException { 1037 return print(Files.newBufferedWriter(out, charset)); 1038 } 1039 1040 /** 1041 * Prints the {@code value} as the next value on the line to {@code out}. The value will be escaped or encapsulated 1042 * as needed. Useful when one wants to avoid creating CSVPrinters. 1043 * 1044 * @param value 1045 * value to output. 1046 * @param out 1047 * where to print the value. 1048 * @param newRecord 1049 * if this a new record. 1050 * @throws IOException 1051 * If an I/O error occurs. 1052 * @since 1.4 1053 */ 1054 public void print(final Object value, final Appendable out, final boolean newRecord) throws IOException { 1055 // null values are considered empty 1056 // Only call CharSequence.toString() if you have to, helps GC-free use cases. 1057 CharSequence charSequence; 1058 if (value == null) { 1059 // https://issues.apache.org/jira/browse/CSV-203 1060 if (null == nullString) { 1061 charSequence = EMPTY; 1062 } else { 1063 if (QuoteMode.ALL == quoteMode) { 1064 charSequence = quoteCharacter + nullString + quoteCharacter; 1065 } else { 1066 charSequence = nullString; 1067 } 1068 } 1069 } else { 1070 charSequence = value instanceof CharSequence ? (CharSequence) value : value.toString(); 1071 } 1072 charSequence = getTrim() ? trim(charSequence) : charSequence; 1073 this.print(value, charSequence, 0, charSequence.length(), out, newRecord); 1074 } 1075 1076 private void print(final Object object, final CharSequence value, final int offset, final int len, 1077 final Appendable out, final boolean newRecord) throws IOException { 1078 if (!newRecord) { 1079 out.append(getDelimiter()); 1080 } 1081 if (object == null) { 1082 out.append(value); 1083 } else if (isQuoteCharacterSet()) { 1084 // the original object is needed so can check for Number 1085 printAndQuote(object, value, offset, len, out, newRecord); 1086 } else if (isEscapeCharacterSet()) { 1087 printAndEscape(value, offset, len, out); 1088 } else { 1089 out.append(value, offset, offset + len); 1090 } 1091 } 1092 1093 /* 1094 * Note: must only be called if escaping is enabled, otherwise will generate NPE 1095 */ 1096 private void printAndEscape(final CharSequence value, final int offset, final int len, final Appendable out) 1097 throws IOException { 1098 int start = offset; 1099 int pos = offset; 1100 final int end = offset + len; 1101 1102 final char delim = getDelimiter(); 1103 final char escape = getEscapeCharacter().charValue(); 1104 1105 while (pos < end) { 1106 char c = value.charAt(pos); 1107 if (c == CR || c == LF || c == delim || c == escape) { 1108 // write out segment up until this char 1109 if (pos > start) { 1110 out.append(value, start, pos); 1111 } 1112 if (c == LF) { 1113 c = 'n'; 1114 } else if (c == CR) { 1115 c = 'r'; 1116 } 1117 1118 out.append(escape); 1119 out.append(c); 1120 1121 start = pos + 1; // start on the current char after this one 1122 } 1123 1124 pos++; 1125 } 1126 1127 // write last segment 1128 if (pos > start) { 1129 out.append(value, start, pos); 1130 } 1131 } 1132 1133 /* 1134 * Note: must only be called if quoting is enabled, otherwise will generate NPE 1135 */ 1136 // the original object is needed so can check for Number 1137 private void printAndQuote(final Object object, final CharSequence value, final int offset, final int len, 1138 final Appendable out, final boolean newRecord) throws IOException { 1139 boolean quote = false; 1140 int start = offset; 1141 int pos = offset; 1142 final int end = offset + len; 1143 1144 final char delimChar = getDelimiter(); 1145 final char quoteChar = getQuoteCharacter().charValue(); 1146 1147 QuoteMode quoteModePolicy = getQuoteMode(); 1148 if (quoteModePolicy == null) { 1149 quoteModePolicy = QuoteMode.MINIMAL; 1150 } 1151 switch (quoteModePolicy) { 1152 case ALL: 1153 case ALL_NON_NULL: 1154 quote = true; 1155 break; 1156 case NON_NUMERIC: 1157 quote = !(object instanceof Number); 1158 break; 1159 case NONE: 1160 // Use the existing escaping code 1161 printAndEscape(value, offset, len, out); 1162 return; 1163 case MINIMAL: 1164 if (len <= 0) { 1165 // always quote an empty token that is the first 1166 // on the line, as it may be the only thing on the 1167 // line. If it were not quoted in that case, 1168 // an empty line has no tokens. 1169 if (newRecord) { 1170 quote = true; 1171 } 1172 } else { 1173 char c = value.charAt(pos); 1174 1175 // RFC4180 (https://tools.ietf.org/html/rfc4180) TEXTDATA = %x20-21 / %x23-2B / %x2D-7E 1176 if (newRecord && (c < 0x20 || c > 0x21 && c < 0x23 || c > 0x2B && c < 0x2D || c > 0x7E)) { 1177 quote = true; 1178 } else if (c <= COMMENT) { 1179 // Some other chars at the start of a value caused the parser to fail, so for now 1180 // encapsulate if we start in anything less than '#'. We are being conservative 1181 // by including the default comment char too. 1182 quote = true; 1183 } else { 1184 while (pos < end) { 1185 c = value.charAt(pos); 1186 if (c == LF || c == CR || c == quoteChar || c == delimChar) { 1187 quote = true; 1188 break; 1189 } 1190 pos++; 1191 } 1192 1193 if (!quote) { 1194 pos = end - 1; 1195 c = value.charAt(pos); 1196 // Some other chars at the end caused the parser to fail, so for now 1197 // encapsulate if we end in anything less than ' ' 1198 if (c <= SP) { 1199 quote = true; 1200 } 1201 } 1202 } 1203 } 1204 1205 if (!quote) { 1206 // no encapsulation needed - write out the original value 1207 out.append(value, start, end); 1208 return; 1209 } 1210 break; 1211 default: 1212 throw new IllegalStateException("Unexpected Quote value: " + quoteModePolicy); 1213 } 1214 1215 if (!quote) { 1216 // no encapsulation needed - write out the original value 1217 out.append(value, start, end); 1218 return; 1219 } 1220 1221 // we hit something that needed encapsulation 1222 out.append(quoteChar); 1223 1224 // Pick up where we left off: pos should be positioned on the first character that caused 1225 // the need for encapsulation. 1226 while (pos < end) { 1227 final char c = value.charAt(pos); 1228 if (c == quoteChar) { 1229 // write out the chunk up until this point 1230 1231 // add 1 to the length to write out the encapsulator also 1232 out.append(value, start, pos + 1); 1233 // put the next starting position on the encapsulator so we will 1234 // write it out again with the next string (effectively doubling it) 1235 start = pos; 1236 } 1237 pos++; 1238 } 1239 1240 // write the last segment 1241 out.append(value, start, pos); 1242 out.append(quoteChar); 1243 } 1244 1245 /** 1246 * Outputs the trailing delimiter (if set) followed by the record separator (if set). 1247 * 1248 * @param out 1249 * where to write 1250 * @throws IOException 1251 * If an I/O error occurs 1252 * @since 1.4 1253 */ 1254 public void println(final Appendable out) throws IOException { 1255 if (getTrailingDelimiter()) { 1256 out.append(getDelimiter()); 1257 } 1258 if (recordSeparator != null) { 1259 out.append(recordSeparator); 1260 } 1261 } 1262 1263 /** 1264 * Prints the given {@code values} to {@code out} as a single record of delimiter separated values followed by the 1265 * record separator. 1266 * 1267 * <p> 1268 * The values will be quoted if needed. Quotes and new-line characters will be escaped. This method adds the record 1269 * separator to the output after printing the record, so there is no need to call {@link #println(Appendable)}. 1270 * </p> 1271 * 1272 * @param out 1273 * where to write. 1274 * @param values 1275 * values to output. 1276 * @throws IOException 1277 * If an I/O error occurs. 1278 * @since 1.4 1279 */ 1280 public void printRecord(final Appendable out, final Object... values) throws IOException { 1281 for (int i = 0; i < values.length; i++) { 1282 print(values[i], out, i == 0); 1283 } 1284 println(out); 1285 } 1286 1287 @Override 1288 public String toString() { 1289 final StringBuilder sb = new StringBuilder(); 1290 sb.append("Delimiter=<").append(delimiter).append('>'); 1291 if (isEscapeCharacterSet()) { 1292 sb.append(' '); 1293 sb.append("Escape=<").append(escapeCharacter).append('>'); 1294 } 1295 if (isQuoteCharacterSet()) { 1296 sb.append(' '); 1297 sb.append("QuoteChar=<").append(quoteCharacter).append('>'); 1298 } 1299 if (isCommentMarkerSet()) { 1300 sb.append(' '); 1301 sb.append("CommentStart=<").append(commentMarker).append('>'); 1302 } 1303 if (isNullStringSet()) { 1304 sb.append(' '); 1305 sb.append("NullString=<").append(nullString).append('>'); 1306 } 1307 if (recordSeparator != null) { 1308 sb.append(' '); 1309 sb.append("RecordSeparator=<").append(recordSeparator).append('>'); 1310 } 1311 if (getIgnoreEmptyLines()) { 1312 sb.append(" EmptyLines:ignored"); 1313 } 1314 if (getIgnoreSurroundingSpaces()) { 1315 sb.append(" SurroundingSpaces:ignored"); 1316 } 1317 if (getIgnoreHeaderCase()) { 1318 sb.append(" IgnoreHeaderCase:ignored"); 1319 } 1320 sb.append(" SkipHeaderRecord:").append(skipHeaderRecord); 1321 if (headerComments != null) { 1322 sb.append(' '); 1323 sb.append("HeaderComments:").append(Arrays.toString(headerComments)); 1324 } 1325 if (header != null) { 1326 sb.append(' '); 1327 sb.append("Header:").append(Arrays.toString(header)); 1328 } 1329 return sb.toString(); 1330 } 1331 1332 private String[] toStringArray(final Object[] values) { 1333 if (values == null) { 1334 return null; 1335 } 1336 final String[] strings = new String[values.length]; 1337 for (int i = 0; i < values.length; i++) { 1338 final Object value = values[i]; 1339 strings[i] = value == null ? null : value.toString(); 1340 } 1341 return strings; 1342 } 1343 1344 private CharSequence trim(final CharSequence charSequence) { 1345 if (charSequence instanceof String) { 1346 return ((String) charSequence).trim(); 1347 } 1348 final int count = charSequence.length(); 1349 int len = count; 1350 int pos = 0; 1351 1352 while (pos < len && charSequence.charAt(pos) <= SP) { 1353 pos++; 1354 } 1355 while (pos < len && charSequence.charAt(len - 1) <= SP) { 1356 len--; 1357 } 1358 return pos > 0 || len < count ? charSequence.subSequence(pos, len) : charSequence; 1359 } 1360 1361 /** 1362 * Verifies the consistency of the parameters and throws an IllegalArgumentException if necessary. 1363 * 1364 * @throws IllegalArgumentException 1365 */ 1366 private void validate() throws IllegalArgumentException { 1367 if (isLineBreak(delimiter)) { 1368 throw new IllegalArgumentException("The delimiter cannot be a line break"); 1369 } 1370 1371 if (quoteCharacter != null && delimiter == quoteCharacter.charValue()) { 1372 throw new IllegalArgumentException( 1373 "The quoteChar character and the delimiter cannot be the same ('" + quoteCharacter + "')"); 1374 } 1375 1376 if (escapeCharacter != null && delimiter == escapeCharacter.charValue()) { 1377 throw new IllegalArgumentException( 1378 "The escape character and the delimiter cannot be the same ('" + escapeCharacter + "')"); 1379 } 1380 1381 if (commentMarker != null && delimiter == commentMarker.charValue()) { 1382 throw new IllegalArgumentException( 1383 "The comment start character and the delimiter cannot be the same ('" + commentMarker + "')"); 1384 } 1385 1386 if (quoteCharacter != null && quoteCharacter.equals(commentMarker)) { 1387 throw new IllegalArgumentException( 1388 "The comment start character and the quoteChar cannot be the same ('" + commentMarker + "')"); 1389 } 1390 1391 if (escapeCharacter != null && escapeCharacter.equals(commentMarker)) { 1392 throw new IllegalArgumentException( 1393 "The comment start and the escape character cannot be the same ('" + commentMarker + "')"); 1394 } 1395 1396 if (escapeCharacter == null && quoteMode == QuoteMode.NONE) { 1397 throw new IllegalArgumentException("No quotes mode set but no escape character is set"); 1398 } 1399 1400 // validate header 1401 if (header != null) { 1402 final Set<String> dupCheck = new HashSet<>(); 1403 for (final String hdr : header) { 1404 if (!dupCheck.add(hdr)) { 1405 throw new IllegalArgumentException( 1406 "The header contains a duplicate entry: '" + hdr + "' in " + Arrays.toString(header)); 1407 } 1408 } 1409 } 1410 } 1411 1412 /** 1413 * Returns a new {@code CSVFormat} with the missing column names behavior of the format set to {@code true} 1414 * 1415 * @return A new CSVFormat that is equal to this but with the specified missing column names behavior. 1416 * @see #withAllowMissingColumnNames(boolean) 1417 * @since 1.1 1418 */ 1419 public CSVFormat withAllowMissingColumnNames() { 1420 return this.withAllowMissingColumnNames(true); 1421 } 1422 1423 /** 1424 * Returns a new {@code CSVFormat} with the missing column names behavior of the format set to the given value. 1425 * 1426 * @param allowMissingColumnNames 1427 * the missing column names behavior, {@code true} to allow missing column names in the header line, 1428 * {@code false} to cause an {@link IllegalArgumentException} to be thrown. 1429 * @return A new CSVFormat that is equal to this but with the specified missing column names behavior. 1430 */ 1431 public CSVFormat withAllowMissingColumnNames(final boolean allowMissingColumnNames) { 1432 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, 1433 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 1434 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter); 1435 } 1436 1437 /** 1438 * Returns a new {@code CSVFormat} with the comment start marker of the format set to the specified character. 1439 * 1440 * Note that the comment start character is only recognized at the start of a line. 1441 * 1442 * @param commentMarker 1443 * the comment start marker 1444 * @return A new CSVFormat that is equal to this one but with the specified character as the comment start marker 1445 * @throws IllegalArgumentException 1446 * thrown if the specified character is a line break 1447 */ 1448 public CSVFormat withCommentMarker(final char commentMarker) { 1449 return withCommentMarker(Character.valueOf(commentMarker)); 1450 } 1451 1452 /** 1453 * Returns a new {@code CSVFormat} with the comment start marker of the format set to the specified character. 1454 * 1455 * Note that the comment start character is only recognized at the start of a line. 1456 * 1457 * @param commentMarker 1458 * the comment start marker, use {@code null} to disable 1459 * @return A new CSVFormat that is equal to this one but with the specified character as the comment start marker 1460 * @throws IllegalArgumentException 1461 * thrown if the specified character is a line break 1462 */ 1463 public CSVFormat withCommentMarker(final Character commentMarker) { 1464 if (isLineBreak(commentMarker)) { 1465 throw new IllegalArgumentException("The comment start marker character cannot be a line break"); 1466 } 1467 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, 1468 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 1469 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter); 1470 } 1471 1472 /** 1473 * Returns a new {@code CSVFormat} with the delimiter of the format set to the specified character. 1474 * 1475 * @param delimiter 1476 * the delimiter character 1477 * @return A new CSVFormat that is equal to this with the specified character as delimiter 1478 * @throws IllegalArgumentException 1479 * thrown if the specified character is a line break 1480 */ 1481 public CSVFormat withDelimiter(final char delimiter) { 1482 if (isLineBreak(delimiter)) { 1483 throw new IllegalArgumentException("The delimiter cannot be a line break"); 1484 } 1485 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, 1486 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 1487 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter); 1488 } 1489 1490 /** 1491 * Returns a new {@code CSVFormat} with the escape character of the format set to the specified character. 1492 * 1493 * @param escape 1494 * the escape character 1495 * @return A new CSVFormat that is equal to his but with the specified character as the escape character 1496 * @throws IllegalArgumentException 1497 * thrown if the specified character is a line break 1498 */ 1499 public CSVFormat withEscape(final char escape) { 1500 return withEscape(Character.valueOf(escape)); 1501 } 1502 1503 /** 1504 * Returns a new {@code CSVFormat} with the escape character of the format set to the specified character. 1505 * 1506 * @param escape 1507 * the escape character, use {@code null} to disable 1508 * @return A new CSVFormat that is equal to this but with the specified character as the escape character 1509 * @throws IllegalArgumentException 1510 * thrown if the specified character is a line break 1511 */ 1512 public CSVFormat withEscape(final Character escape) { 1513 if (isLineBreak(escape)) { 1514 throw new IllegalArgumentException("The escape character cannot be a line break"); 1515 } 1516 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escape, ignoreSurroundingSpaces, 1517 ignoreEmptyLines, recordSeparator, nullString, headerComments, header, skipHeaderRecord, 1518 allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter); 1519 } 1520 1521 /** 1522 * Returns a new {@code CSVFormat} using the first record as header. 1523 * 1524 * <p> 1525 * Calling this method is equivalent to calling: 1526 * </p> 1527 * 1528 * <pre> 1529 * CSVFormat format = aFormat.withHeader().withSkipHeaderRecord(); 1530 * </pre> 1531 * 1532 * @return A new CSVFormat that is equal to this but using the first record as header. 1533 * @see #withSkipHeaderRecord(boolean) 1534 * @see #withHeader(String...) 1535 * @since 1.3 1536 */ 1537 public CSVFormat withFirstRecordAsHeader() { 1538 return withHeader().withSkipHeaderRecord(); 1539 } 1540 1541 /** 1542 * Returns a new {@code CSVFormat} with the header of the format defined by the enum class. 1543 * 1544 * <p> 1545 * Example: 1546 * </p> 1547 * <pre> 1548 * public enum Header { 1549 * Name, Email, Phone 1550 * } 1551 * 1552 * CSVFormat format = aformat.withHeader(Header.class); 1553 * </pre> 1554 * <p> 1555 * The header is also used by the {@link CSVPrinter}. 1556 * </p> 1557 * 1558 * @param headerEnum 1559 * the enum defining the header, {@code null} if disabled, empty if parsed automatically, user specified 1560 * otherwise. 1561 * 1562 * @return A new CSVFormat that is equal to this but with the specified header 1563 * @see #withHeader(String...) 1564 * @see #withSkipHeaderRecord(boolean) 1565 * @since 1.3 1566 */ 1567 public CSVFormat withHeader(final Class<? extends Enum<?>> headerEnum) { 1568 String[] header = null; 1569 if (headerEnum != null) { 1570 final Enum<?>[] enumValues = headerEnum.getEnumConstants(); 1571 header = new String[enumValues.length]; 1572 for (int i = 0; i < enumValues.length; i++) { 1573 header[i] = enumValues[i].name(); 1574 } 1575 } 1576 return withHeader(header); 1577 } 1578 1579 /** 1580 * Returns a new {@code CSVFormat} with the header of the format set from the result set metadata. The header can 1581 * either be parsed automatically from the input file with: 1582 * 1583 * <pre> 1584 * CSVFormat format = aformat.withHeader(); 1585 * </pre> 1586 * 1587 * or specified manually with: 1588 * 1589 * <pre> 1590 * CSVFormat format = aformat.withHeader(resultSet); 1591 * </pre> 1592 * <p> 1593 * The header is also used by the {@link CSVPrinter}. 1594 * </p> 1595 * 1596 * @param resultSet 1597 * the resultSet for the header, {@code null} if disabled, empty if parsed automatically, user specified 1598 * otherwise. 1599 * 1600 * @return A new CSVFormat that is equal to this but with the specified header 1601 * @throws SQLException 1602 * SQLException if a database access error occurs or this method is called on a closed result set. 1603 * @since 1.1 1604 */ 1605 public CSVFormat withHeader(final ResultSet resultSet) throws SQLException { 1606 return withHeader(resultSet != null ? resultSet.getMetaData() : null); 1607 } 1608 1609 /** 1610 * Returns a new {@code CSVFormat} with the header of the format set from the result set metadata. The header can 1611 * either be parsed automatically from the input file with: 1612 * 1613 * <pre> 1614 * CSVFormat format = aformat.withHeader(); 1615 * </pre> 1616 * 1617 * or specified manually with: 1618 * 1619 * <pre> 1620 * CSVFormat format = aformat.withHeader(metaData); 1621 * </pre> 1622 * <p> 1623 * The header is also used by the {@link CSVPrinter}. 1624 * </p> 1625 * 1626 * @param metaData 1627 * the metaData for the header, {@code null} if disabled, empty if parsed automatically, user specified 1628 * otherwise. 1629 * 1630 * @return A new CSVFormat that is equal to this but with the specified header 1631 * @throws SQLException 1632 * SQLException if a database access error occurs or this method is called on a closed result set. 1633 * @since 1.1 1634 */ 1635 public CSVFormat withHeader(final ResultSetMetaData metaData) throws SQLException { 1636 String[] labels = null; 1637 if (metaData != null) { 1638 final int columnCount = metaData.getColumnCount(); 1639 labels = new String[columnCount]; 1640 for (int i = 0; i < columnCount; i++) { 1641 labels[i] = metaData.getColumnLabel(i + 1); 1642 } 1643 } 1644 return withHeader(labels); 1645 } 1646 1647 /** 1648 * Returns a new {@code CSVFormat} with the header of the format set to the given values. The header can either be 1649 * parsed automatically from the input file with: 1650 * 1651 * <pre> 1652 * CSVFormat format = aformat.withHeader(); 1653 * </pre> 1654 * 1655 * or specified manually with: 1656 * 1657 * <pre> 1658 * CSVFormat format = aformat.withHeader("name", "email", "phone"); 1659 * </pre> 1660 * <p> 1661 * The header is also used by the {@link CSVPrinter}. 1662 * </p> 1663 * 1664 * @param header 1665 * the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise. 1666 * 1667 * @return A new CSVFormat that is equal to this but with the specified header 1668 * @see #withSkipHeaderRecord(boolean) 1669 */ 1670 public CSVFormat withHeader(final String... header) { 1671 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, 1672 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 1673 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter); 1674 } 1675 1676 /** 1677 * Returns a new {@code CSVFormat} with the header comments of the format set to the given values. The comments will 1678 * be printed first, before the headers. This setting is ignored by the parser. 1679 * 1680 * <pre> 1681 * CSVFormat format = aformat.withHeaderComments("Generated by Apache Commons CSV 1.1.", new Date()); 1682 * </pre> 1683 * 1684 * @param headerComments 1685 * the headerComments which will be printed by the Printer before the actual CSV data. 1686 * 1687 * @return A new CSVFormat that is equal to this but with the specified header 1688 * @see #withSkipHeaderRecord(boolean) 1689 * @since 1.1 1690 */ 1691 public CSVFormat withHeaderComments(final Object... headerComments) { 1692 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, 1693 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 1694 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter); 1695 } 1696 1697 /** 1698 * Returns a new {@code CSVFormat} with the empty line skipping behavior of the format set to {@code true}. 1699 * 1700 * @return A new CSVFormat that is equal to this but with the specified empty line skipping behavior. 1701 * @since {@link #withIgnoreEmptyLines(boolean)} 1702 * @since 1.1 1703 */ 1704 public CSVFormat withIgnoreEmptyLines() { 1705 return this.withIgnoreEmptyLines(true); 1706 } 1707 1708 /** 1709 * Returns a new {@code CSVFormat} with the empty line skipping behavior of the format set to the given value. 1710 * 1711 * @param ignoreEmptyLines 1712 * the empty line skipping behavior, {@code true} to ignore the empty lines between the records, 1713 * {@code false} to translate empty lines to empty records. 1714 * @return A new CSVFormat that is equal to this but with the specified empty line skipping behavior. 1715 */ 1716 public CSVFormat withIgnoreEmptyLines(final boolean ignoreEmptyLines) { 1717 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, 1718 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 1719 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter); 1720 } 1721 1722 /** 1723 * Returns a new {@code CSVFormat} with the header ignore case behavior set to {@code true}. 1724 * 1725 * @return A new CSVFormat that will ignore case header name. 1726 * @see #withIgnoreHeaderCase(boolean) 1727 * @since 1.3 1728 */ 1729 public CSVFormat withIgnoreHeaderCase() { 1730 return this.withIgnoreHeaderCase(true); 1731 } 1732 1733 /** 1734 * Returns a new {@code CSVFormat} with whether header names should be accessed ignoring case. 1735 * 1736 * @param ignoreHeaderCase 1737 * the case mapping behavior, {@code true} to access name/values, {@code false} to leave the mapping as 1738 * is. 1739 * @return A new CSVFormat that will ignore case header name if specified as {@code true} 1740 * @since 1.3 1741 */ 1742 public CSVFormat withIgnoreHeaderCase(final boolean ignoreHeaderCase) { 1743 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, 1744 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 1745 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter); 1746 } 1747 1748 /** 1749 * Returns a new {@code CSVFormat} with the trimming behavior of the format set to {@code true}. 1750 * 1751 * @return A new CSVFormat that is equal to this but with the specified trimming behavior. 1752 * @see #withIgnoreSurroundingSpaces(boolean) 1753 * @since 1.1 1754 */ 1755 public CSVFormat withIgnoreSurroundingSpaces() { 1756 return this.withIgnoreSurroundingSpaces(true); 1757 } 1758 1759 /** 1760 * Returns a new {@code CSVFormat} with the trimming behavior of the format set to the given value. 1761 * 1762 * @param ignoreSurroundingSpaces 1763 * the trimming behavior, {@code true} to remove the surrounding spaces, {@code false} to leave the 1764 * spaces as is. 1765 * @return A new CSVFormat that is equal to this but with the specified trimming behavior. 1766 */ 1767 public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpaces) { 1768 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, 1769 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 1770 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter); 1771 } 1772 1773 /** 1774 * Returns a new {@code CSVFormat} with conversions to and from null for strings on input and output. 1775 * <ul> 1776 * <li><strong>Reading:</strong> Converts strings equal to the given {@code nullString} to {@code null} when reading 1777 * records.</li> 1778 * <li><strong>Writing:</strong> Writes {@code null} as the given {@code nullString} when writing records.</li> 1779 * </ul> 1780 * 1781 * @param nullString 1782 * the String to convert to and from {@code null}. No substitution occurs if {@code null} 1783 * 1784 * @return A new CSVFormat that is equal to this but with the specified null conversion string. 1785 */ 1786 public CSVFormat withNullString(final String nullString) { 1787 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, 1788 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 1789 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter); 1790 } 1791 1792 /** 1793 * Returns a new {@code CSVFormat} with the quoteChar of the format set to the specified character. 1794 * 1795 * @param quoteChar 1796 * the quoteChar character 1797 * @return A new CSVFormat that is equal to this but with the specified character as quoteChar 1798 * @throws IllegalArgumentException 1799 * thrown if the specified character is a line break 1800 */ 1801 public CSVFormat withQuote(final char quoteChar) { 1802 return withQuote(Character.valueOf(quoteChar)); 1803 } 1804 1805 /** 1806 * Returns a new {@code CSVFormat} with the quoteChar of the format set to the specified character. 1807 * 1808 * @param quoteChar 1809 * the quoteChar character, use {@code null} to disable 1810 * @return A new CSVFormat that is equal to this but with the specified character as quoteChar 1811 * @throws IllegalArgumentException 1812 * thrown if the specified character is a line break 1813 */ 1814 public CSVFormat withQuote(final Character quoteChar) { 1815 if (isLineBreak(quoteChar)) { 1816 throw new IllegalArgumentException("The quoteChar cannot be a line break"); 1817 } 1818 return new CSVFormat(delimiter, quoteChar, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces, 1819 ignoreEmptyLines, recordSeparator, nullString, headerComments, header, skipHeaderRecord, 1820 allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter); 1821 } 1822 1823 /** 1824 * Returns a new {@code CSVFormat} with the output quote policy of the format set to the specified value. 1825 * 1826 * @param quoteModePolicy 1827 * the quote policy to use for output. 1828 * 1829 * @return A new CSVFormat that is equal to this but with the specified quote policy 1830 */ 1831 public CSVFormat withQuoteMode(final QuoteMode quoteModePolicy) { 1832 return new CSVFormat(delimiter, quoteCharacter, quoteModePolicy, commentMarker, escapeCharacter, 1833 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 1834 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter); 1835 } 1836 1837 /** 1838 * Returns a new {@code CSVFormat} with the record separator of the format set to the specified character. 1839 * 1840 * <p> 1841 * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently 1842 * only works for inputs with '\n', '\r' and "\r\n" 1843 * </p> 1844 * 1845 * @param recordSeparator 1846 * the record separator to use for output. 1847 * 1848 * @return A new CSVFormat that is equal to this but with the the specified output record separator 1849 */ 1850 public CSVFormat withRecordSeparator(final char recordSeparator) { 1851 return withRecordSeparator(String.valueOf(recordSeparator)); 1852 } 1853 1854 /** 1855 * Returns a new {@code CSVFormat} with the record separator of the format set to the specified String. 1856 * 1857 * <p> 1858 * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently 1859 * only works for inputs with '\n', '\r' and "\r\n" 1860 * </p> 1861 * 1862 * @param recordSeparator 1863 * the record separator to use for output. 1864 * 1865 * @return A new CSVFormat that is equal to this but with the the specified output record separator 1866 * @throws IllegalArgumentException 1867 * if recordSeparator is none of CR, LF or CRLF 1868 */ 1869 public CSVFormat withRecordSeparator(final String recordSeparator) { 1870 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, 1871 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 1872 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter); 1873 } 1874 1875 /** 1876 * Returns a new {@code CSVFormat} with skipping the header record set to {@code true}. 1877 * 1878 * @return A new CSVFormat that is equal to this but with the the specified skipHeaderRecord setting. 1879 * @see #withSkipHeaderRecord(boolean) 1880 * @see #withHeader(String...) 1881 * @since 1.1 1882 */ 1883 public CSVFormat withSkipHeaderRecord() { 1884 return this.withSkipHeaderRecord(true); 1885 } 1886 1887 /** 1888 * Returns a new {@code CSVFormat} with whether to skip the header record. 1889 * 1890 * @param skipHeaderRecord 1891 * whether to skip the header record. 1892 * 1893 * @return A new CSVFormat that is equal to this but with the the specified skipHeaderRecord setting. 1894 * @see #withHeader(String...) 1895 */ 1896 public CSVFormat withSkipHeaderRecord(final boolean skipHeaderRecord) { 1897 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, 1898 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 1899 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter); 1900 } 1901 1902 /** 1903 * Returns a new {@code CSVFormat} to add a trailing delimiter. 1904 * 1905 * @return A new CSVFormat that is equal to this but with the trailing delimiter setting. 1906 * @since 1.3 1907 */ 1908 public CSVFormat withTrailingDelimiter() { 1909 return withTrailingDelimiter(true); 1910 } 1911 1912 /** 1913 * Returns a new {@code CSVFormat} with whether to add a trailing delimiter. 1914 * 1915 * @param trailingDelimiter 1916 * whether to add a trailing delimiter. 1917 * 1918 * @return A new CSVFormat that is equal to this but with the specified trailing delimiter setting. 1919 * @since 1.3 1920 */ 1921 public CSVFormat withTrailingDelimiter(final boolean trailingDelimiter) { 1922 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, 1923 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 1924 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter); 1925 } 1926 1927 /** 1928 * Returns a new {@code CSVFormat} to trim leading and trailing blanks. 1929 * 1930 * @return A new CSVFormat that is equal to this but with the trim setting on. 1931 * @since 1.3 1932 */ 1933 public CSVFormat withTrim() { 1934 return withTrim(true); 1935 } 1936 1937 /** 1938 * Returns a new {@code CSVFormat} with whether to trim leading and trailing blanks. 1939 * 1940 * @param trim 1941 * whether to trim leading and trailing blanks. 1942 * 1943 * @return A new CSVFormat that is equal to this but with the specified trim setting. 1944 * @since 1.3 1945 */ 1946 public CSVFormat withTrim(final boolean trim) { 1947 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, 1948 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 1949 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter); 1950 } 1951}