001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.csv; 019 020import static org.apache.commons.csv.Constants.BACKSLASH; 021import static org.apache.commons.csv.Constants.COMMA; 022import static org.apache.commons.csv.Constants.COMMENT; 023import static org.apache.commons.csv.Constants.CR; 024import static org.apache.commons.csv.Constants.CRLF; 025import static org.apache.commons.csv.Constants.DOUBLE_QUOTE_CHAR; 026import static org.apache.commons.csv.Constants.EMPTY; 027import static org.apache.commons.csv.Constants.LF; 028import static org.apache.commons.csv.Constants.PIPE; 029import static org.apache.commons.csv.Constants.SP; 030import static org.apache.commons.csv.Constants.TAB; 031 032import java.io.File; 033import java.io.FileOutputStream; 034import java.io.IOException; 035import java.io.OutputStreamWriter; 036import java.io.Reader; 037import java.io.Serializable; 038import java.io.StringWriter; 039import java.io.Writer; 040import java.nio.charset.Charset; 041import java.nio.file.Files; 042import java.nio.file.Path; 043import java.sql.ResultSet; 044import java.sql.ResultSetMetaData; 045import java.sql.SQLException; 046import java.util.Arrays; 047import java.util.HashSet; 048import java.util.Set; 049 050/** 051 * Specifies the format of a CSV file and parses input. 052 * 053 * <h2>Using predefined formats</h2> 054 * 055 * <p> 056 * You can use one of the predefined formats: 057 * </p> 058 * 059 * <ul> 060 * <li>{@link #DEFAULT}</li> 061 * <li>{@link #EXCEL}</li> 062 * <li>{@link #INFORMIX_UNLOAD}</li> 063 * <li>{@link #INFORMIX_UNLOAD_CSV}</li> 064 * <li>{@link #MYSQL}</li> 065 * <li>{@link #RFC4180}</li> 066 * <li>{@link #ORACLE}</li> 067 * <li>{@link #POSTGRESQL_CSV}</li> 068 * <li>{@link #POSTGRESQL_TEXT}</li> 069 * <li>{@link #TDF}</li> 070 * </ul> 071 * 072 * <p> 073 * For example: 074 * </p> 075 * 076 * <pre> 077 * CSVParser parser = CSVFormat.EXCEL.parse(reader); 078 * </pre> 079 * 080 * <p> 081 * The {@link CSVParser} provides static methods to parse other input types, for example: 082 * </p> 083 * 084 * <pre> 085 * CSVParser parser = CSVParser.parse(file, StandardCharsets.US_ASCII, CSVFormat.EXCEL); 086 * </pre> 087 * 088 * <h2>Defining formats</h2> 089 * 090 * <p> 091 * You can extend a format by calling the {@code with} methods. For example: 092 * </p> 093 * 094 * <pre> 095 * CSVFormat.EXCEL.withNullString("N/A").withIgnoreSurroundingSpaces(true); 096 * </pre> 097 * 098 * <h2>Defining column names</h2> 099 * 100 * <p> 101 * To define the column names you want to use to access records, write: 102 * </p> 103 * 104 * <pre> 105 * CSVFormat.EXCEL.withHeader("Col1", "Col2", "Col3"); 106 * </pre> 107 * 108 * <p> 109 * Calling {@link #withHeader(String...)} let's you use the given names to address values in a {@link CSVRecord}, and 110 * assumes that your CSV source does not contain a first record that also defines column names. 111 * 112 * If it does, then you are overriding this metadata with your names and you should skip the first record by calling 113 * {@link #withSkipHeaderRecord(boolean)} with {@code true}. 114 * </p> 115 * 116 * <h2>Parsing</h2> 117 * 118 * <p> 119 * You can use a format directly to parse a reader. For example, to parse an Excel file with columns header, write: 120 * </p> 121 * 122 * <pre> 123 * Reader in = ...; 124 * CSVFormat.EXCEL.withHeader("Col1", "Col2", "Col3").parse(in); 125 * </pre> 126 * 127 * <p> 128 * For other input types, like resources, files, and URLs, use the static methods on {@link CSVParser}. 129 * </p> 130 * 131 * <h2>Referencing columns safely</h2> 132 * 133 * <p> 134 * If your source contains a header record, you can simplify your code and safely reference columns, by using 135 * {@link #withHeader(String...)} with no arguments: 136 * </p> 137 * 138 * <pre> 139 * CSVFormat.EXCEL.withHeader(); 140 * </pre> 141 * 142 * <p> 143 * This causes the parser to read the first record and use its values as column names. 144 * 145 * Then, call one of the {@link CSVRecord} get method that takes a String column name argument: 146 * </p> 147 * 148 * <pre> 149 * String value = record.get("Col1"); 150 * </pre> 151 * 152 * <p> 153 * This makes your code impervious to changes in column order in the CSV file. 154 * </p> 155 * 156 * <h2>Notes</h2> 157 * 158 * <p> 159 * This class is immutable. 160 * </p> 161 */ 162public final class CSVFormat implements Serializable { 163 164 /** 165 * Predefines formats. 166 * 167 * @since 1.2 168 */ 169 public enum Predefined { 170 171 /** 172 * @see CSVFormat#DEFAULT 173 */ 174 Default(CSVFormat.DEFAULT), 175 176 /** 177 * @see CSVFormat#EXCEL 178 */ 179 Excel(CSVFormat.EXCEL), 180 181 /** 182 * @see CSVFormat#INFORMIX_UNLOAD 183 * @since 1.3 184 */ 185 InformixUnload(CSVFormat.INFORMIX_UNLOAD), 186 187 /** 188 * @see CSVFormat#INFORMIX_UNLOAD_CSV 189 * @since 1.3 190 */ 191 InformixUnloadCsv(CSVFormat.INFORMIX_UNLOAD_CSV), 192 193 /** 194 * @see CSVFormat#MONGODB_CSV 195 * @since 1.7 196 */ 197 MongoDBCsv(CSVFormat.MONGODB_CSV), 198 199 /** 200 * @see CSVFormat#MONGODB_TSV 201 * @since 1.7 202 */ 203 MongoDBTsv(CSVFormat.MONGODB_TSV), 204 205 /** 206 * @see CSVFormat#MYSQL 207 */ 208 MySQL(CSVFormat.MYSQL), 209 210 /** 211 * @see CSVFormat#ORACLE 212 */ 213 Oracle(CSVFormat.ORACLE), 214 215 /** 216 * @see CSVFormat#POSTGRESQL_CSV 217 * @since 1.5 218 */ 219 PostgreSQLCsv(CSVFormat.POSTGRESQL_CSV), 220 221 /** 222 * @see CSVFormat#POSTGRESQL_CSV 223 */ 224 PostgreSQLText(CSVFormat.POSTGRESQL_TEXT), 225 226 /** 227 * @see CSVFormat#RFC4180 228 */ 229 RFC4180(CSVFormat.RFC4180), 230 231 /** 232 * @see CSVFormat#TDF 233 */ 234 TDF(CSVFormat.TDF); 235 236 private final CSVFormat format; 237 238 Predefined(final CSVFormat format) { 239 this.format = format; 240 } 241 242 /** 243 * Gets the format. 244 * 245 * @return the format. 246 */ 247 public CSVFormat getFormat() { 248 return format; 249 } 250 } 251 252 /** 253 * Standard Comma Separated Value format, as for {@link #RFC4180} but allowing empty lines. 254 * 255 * <p> 256 * Settings are: 257 * </p> 258 * <ul> 259 * <li>{@code withDelimiter(',')}</li> 260 * <li>{@code withQuote('"')}</li> 261 * <li>{@code withRecordSeparator("\r\n")}</li> 262 * <li>{@code withIgnoreEmptyLines(true)}</li> 263 * <li>{@code withAllowDuplicateHeaderNames(true)}</li> 264 * </ul> 265 * 266 * @see Predefined#Default 267 */ 268 public static final CSVFormat DEFAULT = new CSVFormat(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF, 269 null, null, null, false, false, false, false, false, false, true); 270 271 /** 272 * Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is 273 * locale dependent, it might be necessary to customize this format to accommodate to your regional settings. 274 * 275 * <p> 276 * For example for parsing or generating a CSV file on a French system the following format will be used: 277 * </p> 278 * 279 * <pre> 280 * CSVFormat fmt = CSVFormat.EXCEL.withDelimiter(';'); 281 * </pre> 282 * 283 * <p> 284 * Settings are: 285 * </p> 286 * <ul> 287 * <li>{@code {@link #withDelimiter(char) withDelimiter(',')}}</li> 288 * <li>{@code {@link #withQuote(char) withQuote('"')}}</li> 289 * <li>{@code {@link #withRecordSeparator(String) withRecordSeparator("\r\n")}}</li> 290 * <li>{@code {@link #withIgnoreEmptyLines(boolean) withIgnoreEmptyLines(false)}}</li> 291 * <li>{@code {@link #withAllowMissingColumnNames(boolean) withAllowMissingColumnNames(true)}}</li> 292 * <li>{@code {@link #withAllowDuplicateHeaderNames(boolean) withAllowDuplicateHeaderNames(true)}}</li> 293 * </ul> 294 * <p> 295 * Note: This is currently like {@link #RFC4180} plus {@link #withAllowMissingColumnNames(boolean) 296 * withAllowMissingColumnNames(true)} and {@link #withIgnoreEmptyLines(boolean) withIgnoreEmptyLines(false)}. 297 * </p> 298 * 299 * @see Predefined#Excel 300 */ 301 // @formatter:off 302 public static final CSVFormat EXCEL = DEFAULT 303 .withIgnoreEmptyLines(false) 304 .withAllowMissingColumnNames(); 305 // @formatter:on 306 307 /** 308 * Default Informix CSV UNLOAD format used by the {@code UNLOAD TO file_name} operation. 309 * 310 * <p> 311 * This is a comma-delimited format with a LF character as the line separator. Values are not quoted and special 312 * characters are escaped with {@code '\'}. The default NULL string is {@code "\\N"}. 313 * </p> 314 * 315 * <p> 316 * Settings are: 317 * </p> 318 * <ul> 319 * <li>{@code withDelimiter(',')}</li> 320 * <li>{@code withEscape('\\')}</li> 321 * <li>{@code withQuote("\"")}</li> 322 * <li>{@code withRecordSeparator('\n')}</li> 323 * </ul> 324 * 325 * @see Predefined#MySQL 326 * @see <a href= 327 * "http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm"> 328 * http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm</a> 329 * @since 1.3 330 */ 331 // @formatter:off 332 public static final CSVFormat INFORMIX_UNLOAD = DEFAULT 333 .withDelimiter(PIPE) 334 .withEscape(BACKSLASH) 335 .withQuote(DOUBLE_QUOTE_CHAR) 336 .withRecordSeparator(LF); 337 // @formatter:on 338 339 /** 340 * Default Informix CSV UNLOAD format used by the {@code UNLOAD TO file_name} operation (escaping is disabled.) 341 * 342 * <p> 343 * This is a comma-delimited format with a LF character as the line separator. Values are not quoted and special 344 * characters are escaped with {@code '\'}. The default NULL string is {@code "\\N"}. 345 * </p> 346 * 347 * <p> 348 * Settings are: 349 * </p> 350 * <ul> 351 * <li>{@code withDelimiter(',')}</li> 352 * <li>{@code withQuote("\"")}</li> 353 * <li>{@code withRecordSeparator('\n')}</li> 354 * </ul> 355 * 356 * @see Predefined#MySQL 357 * @see <a href= 358 * "http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm"> 359 * http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm</a> 360 * @since 1.3 361 */ 362 // @formatter:off 363 public static final CSVFormat INFORMIX_UNLOAD_CSV = DEFAULT 364 .withDelimiter(COMMA) 365 .withQuote(DOUBLE_QUOTE_CHAR) 366 .withRecordSeparator(LF); 367 // @formatter:on 368 369 /** 370 * Default MongoDB CSV format used by the {@code mongoexport} operation. 371 * <p> 372 * <b>Parsing is not supported yet.</b> 373 * </p> 374 * 375 * <p> 376 * This is a comma-delimited format. Values are double quoted only if needed and special characters are escaped with 377 * {@code '"'}. A header line with field names is expected. 378 * </p> 379 * 380 * <p> 381 * Settings are: 382 * </p> 383 * <ul> 384 * <li>{@code withDelimiter(',')}</li> 385 * <li>{@code withEscape('"')}</li> 386 * <li>{@code withQuote('"')}</li> 387 * <li>{@code withQuoteMode(QuoteMode.ALL_NON_NULL)}</li> 388 * <li>{@code withSkipHeaderRecord(false)}</li> 389 * </ul> 390 * 391 * @see Predefined#MongoDBCsv 392 * @see <a href="https://docs.mongodb.com/manual/reference/program/mongoexport/">MongoDB mongoexport command 393 * documentation</a> 394 * @since 1.7 395 */ 396 // @formatter:off 397 public static final CSVFormat MONGODB_CSV = DEFAULT 398 .withDelimiter(COMMA) 399 .withEscape(DOUBLE_QUOTE_CHAR) 400 .withQuote(DOUBLE_QUOTE_CHAR) 401 .withQuoteMode(QuoteMode.MINIMAL) 402 .withSkipHeaderRecord(false); 403 // @formatter:off 404 405 /** 406 * Default MongoDB TSV format used by the {@code mongoexport} operation. 407 * <p> 408 * <b>Parsing is not supported yet.</b> 409 * </p> 410 * 411 * <p> 412 * This is a tab-delimited format. Values are double quoted only if needed and special 413 * characters are escaped with {@code '"'}. A header line with field names is expected. 414 * </p> 415 * 416 * <p> 417 * Settings are: 418 * </p> 419 * <ul> 420 * <li>{@code withDelimiter('\t')}</li> 421 * <li>{@code withEscape('"')}</li> 422 * <li>{@code withQuote('"')}</li> 423 * <li>{@code withQuoteMode(QuoteMode.ALL_NON_NULL)}</li> 424 * <li>{@code withSkipHeaderRecord(false)}</li> 425 * </ul> 426 * 427 * @see Predefined#MongoDBCsv 428 * @see <a href="https://docs.mongodb.com/manual/reference/program/mongoexport/">MongoDB mongoexport command 429 * documentation</a> 430 * @since 1.7 431 */ 432 // @formatter:off 433 public static final CSVFormat MONGODB_TSV = DEFAULT 434 .withDelimiter(TAB) 435 .withEscape(DOUBLE_QUOTE_CHAR) 436 .withQuote(DOUBLE_QUOTE_CHAR) 437 .withQuoteMode(QuoteMode.MINIMAL) 438 .withSkipHeaderRecord(false); 439 // @formatter:off 440 441 /** 442 * Default MySQL format used by the {@code SELECT INTO OUTFILE} and {@code LOAD DATA INFILE} operations. 443 * 444 * <p> 445 * This is a tab-delimited format with a LF character as the line separator. Values are not quoted and special 446 * characters are escaped with {@code '\'}. The default NULL string is {@code "\\N"}. 447 * </p> 448 * 449 * <p> 450 * Settings are: 451 * </p> 452 * <ul> 453 * <li>{@code withDelimiter('\t')}</li> 454 * <li>{@code withEscape('\\')}</li> 455 * <li>{@code withIgnoreEmptyLines(false)}</li> 456 * <li>{@code withQuote(null)}</li> 457 * <li>{@code withRecordSeparator('\n')}</li> 458 * <li>{@code withNullString("\\N")}</li> 459 * <li>{@code withQuoteMode(QuoteMode.ALL_NON_NULL)}</li> 460 * </ul> 461 * 462 * @see Predefined#MySQL 463 * @see <a href="http://dev.mysql.com/doc/refman/5.1/en/load-data.html"> http://dev.mysql.com/doc/refman/5.1/en/load 464 * -data.html</a> 465 */ 466 // @formatter:off 467 public static final CSVFormat MYSQL = DEFAULT 468 .withDelimiter(TAB) 469 .withEscape(BACKSLASH) 470 .withIgnoreEmptyLines(false) 471 .withQuote(null) 472 .withRecordSeparator(LF) 473 .withNullString("\\N") 474 .withQuoteMode(QuoteMode.ALL_NON_NULL); 475 // @formatter:off 476 477 /** 478 * Default Oracle format used by the SQL*Loader utility. 479 * 480 * <p> 481 * This is a comma-delimited format with the system line separator character as the record separator.Values are 482 * double quoted when needed and special characters are escaped with {@code '"'}. The default NULL string is 483 * {@code ""}. Values are trimmed. 484 * </p> 485 * 486 * <p> 487 * Settings are: 488 * </p> 489 * <ul> 490 * <li>{@code withDelimiter(',') // default is {@code FIELDS TERMINATED BY ','}}</li> 491 * <li>{@code withEscape('\\')}</li> 492 * <li>{@code withIgnoreEmptyLines(false)}</li> 493 * <li>{@code withQuote('"') // default is {@code OPTIONALLY ENCLOSED BY '"'}}</li> 494 * <li>{@code withNullString("\\N")}</li> 495 * <li>{@code withTrim()}</li> 496 * <li>{@code withSystemRecordSeparator()}</li> 497 * <li>{@code withQuoteMode(QuoteMode.MINIMAL)}</li> 498 * </ul> 499 * 500 * @see Predefined#Oracle 501 * @see <a href="https://s.apache.org/CGXG">Oracle CSV Format Specification</a> 502 * @since 1.6 503 */ 504 // @formatter:off 505 public static final CSVFormat ORACLE = DEFAULT 506 .withDelimiter(COMMA) 507 .withEscape(BACKSLASH) 508 .withIgnoreEmptyLines(false) 509 .withQuote(DOUBLE_QUOTE_CHAR) 510 .withNullString("\\N") 511 .withTrim() 512 .withSystemRecordSeparator() 513 .withQuoteMode(QuoteMode.MINIMAL); 514 // @formatter:off 515 516 /** 517 * Default PostgreSQL CSV format used by the {@code COPY} operation. 518 * 519 * <p> 520 * This is a comma-delimited format with a LF character as the line separator. Values are double quoted and special 521 * characters are escaped with {@code '"'}. The default NULL string is {@code ""}. 522 * </p> 523 * 524 * <p> 525 * Settings are: 526 * </p> 527 * <ul> 528 * <li>{@code withDelimiter(',')}</li> 529 * <li>{@code withEscape('"')}</li> 530 * <li>{@code withIgnoreEmptyLines(false)}</li> 531 * <li>{@code withQuote('"')}</li> 532 * <li>{@code withRecordSeparator('\n')}</li> 533 * <li>{@code withNullString("")}</li> 534 * <li>{@code withQuoteMode(QuoteMode.ALL_NON_NULL)}</li> 535 * </ul> 536 * 537 * @see Predefined#MySQL 538 * @see <a href="https://www.postgresql.org/docs/current/static/sql-copy.html">PostgreSQL COPY command 539 * documentation</a> 540 * @since 1.5 541 */ 542 // @formatter:off 543 public static final CSVFormat POSTGRESQL_CSV = DEFAULT 544 .withDelimiter(COMMA) 545 .withEscape(DOUBLE_QUOTE_CHAR) 546 .withIgnoreEmptyLines(false) 547 .withQuote(DOUBLE_QUOTE_CHAR) 548 .withRecordSeparator(LF) 549 .withNullString(EMPTY) 550 .withQuoteMode(QuoteMode.ALL_NON_NULL); 551 // @formatter:off 552 553 /** 554 * Default PostgreSQL text format used by the {@code COPY} operation. 555 * 556 * <p> 557 * This is a tab-delimited format with a LF character as the line separator. Values are double quoted and special 558 * characters are escaped with {@code '"'}. The default NULL string is {@code "\\N"}. 559 * </p> 560 * 561 * <p> 562 * Settings are: 563 * </p> 564 * <ul> 565 * <li>{@code withDelimiter('\t')}</li> 566 * <li>{@code withEscape('\\')}</li> 567 * <li>{@code withIgnoreEmptyLines(false)}</li> 568 * <li>{@code withQuote('"')}</li> 569 * <li>{@code withRecordSeparator('\n')}</li> 570 * <li>{@code withNullString("\\N")}</li> 571 * <li>{@code withQuoteMode(QuoteMode.ALL_NON_NULL)}</li> 572 * </ul> 573 * 574 * @see Predefined#MySQL 575 * @see <a href="https://www.postgresql.org/docs/current/static/sql-copy.html">PostgreSQL COPY command 576 * documentation</a> 577 * @since 1.5 578 */ 579 // @formatter:off 580 public static final CSVFormat POSTGRESQL_TEXT = DEFAULT 581 .withDelimiter(TAB) 582 .withEscape(BACKSLASH) 583 .withIgnoreEmptyLines(false) 584 .withQuote(DOUBLE_QUOTE_CHAR) 585 .withRecordSeparator(LF) 586 .withNullString("\\N") 587 .withQuoteMode(QuoteMode.ALL_NON_NULL); 588 // @formatter:off 589 590 /** 591 * Comma separated format as defined by <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. 592 * 593 * <p> 594 * Settings are: 595 * </p> 596 * <ul> 597 * <li>{@code withDelimiter(',')}</li> 598 * <li>{@code withQuote('"')}</li> 599 * <li>{@code withRecordSeparator("\r\n")}</li> 600 * <li>{@code withIgnoreEmptyLines(false)}</li> 601 * </ul> 602 * 603 * @see Predefined#RFC4180 604 */ 605 public static final CSVFormat RFC4180 = DEFAULT.withIgnoreEmptyLines(false); 606 607 private static final long serialVersionUID = 1L; 608 609 /** 610 * Tab-delimited format. 611 * 612 * <p> 613 * Settings are: 614 * </p> 615 * <ul> 616 * <li>{@code withDelimiter('\t')}</li> 617 * <li>{@code withQuote('"')}</li> 618 * <li>{@code withRecordSeparator("\r\n")}</li> 619 * <li>{@code withIgnoreSurroundingSpaces(true)}</li> 620 * </ul> 621 * 622 * @see Predefined#TDF 623 */ 624 // @formatter:off 625 public static final CSVFormat TDF = DEFAULT 626 .withDelimiter(TAB) 627 .withIgnoreSurroundingSpaces(); 628 // @formatter:on 629 630 /** 631 * Returns true if the given character is a line break character. 632 * 633 * @param c 634 * the character to check 635 * 636 * @return true if <code>c</code> is a line break character 637 */ 638 private static boolean isLineBreak(final char c) { 639 return c == LF || c == CR; 640 } 641 642 /** 643 * Returns true if the given character is a line break character. 644 * 645 * @param c 646 * the character to check, may be null 647 * 648 * @return true if <code>c</code> is a line break character (and not null) 649 */ 650 private static boolean isLineBreak(final Character c) { 651 return c != null && isLineBreak(c.charValue()); 652 } 653 654 /** 655 * Creates a new CSV format with the specified delimiter. 656 * 657 * <p> 658 * Use this method if you want to create a CSVFormat from scratch. All fields but the delimiter will be initialized 659 * with null/false. 660 * </p> 661 * 662 * @param delimiter 663 * the char used for value separation, must not be a line break character 664 * @return a new CSV format. 665 * @throws IllegalArgumentException 666 * if the delimiter is a line break character 667 * 668 * @see #DEFAULT 669 * @see #RFC4180 670 * @see #MYSQL 671 * @see #EXCEL 672 * @see #TDF 673 */ 674 public static CSVFormat newFormat(final char delimiter) { 675 return new CSVFormat(delimiter, null, null, null, null, false, false, null, null, null, null, false, false, 676 false, false, false, false, true); 677 } 678 679 /** 680 * Gets one of the predefined formats from {@link CSVFormat.Predefined}. 681 * 682 * @param format 683 * name 684 * @return one of the predefined formats 685 * @since 1.2 686 */ 687 public static CSVFormat valueOf(final String format) { 688 return CSVFormat.Predefined.valueOf(format).getFormat(); 689 } 690 691 private final boolean allowDuplicateHeaderNames; 692 693 private final boolean allowMissingColumnNames; 694 695 private final boolean autoFlush; 696 697 private final Character commentMarker; // null if commenting is disabled 698 699 private final char delimiter; 700 701 private final Character escapeCharacter; // null if escaping is disabled 702 703 private final String[] header; // array of header column names 704 705 private final String[] headerComments; // array of header comment lines 706 707 private final boolean ignoreEmptyLines; 708 709 private final boolean ignoreHeaderCase; // should ignore header names case 710 711 private final boolean ignoreSurroundingSpaces; // Should leading/trailing spaces be ignored around values? 712 713 private final String nullString; // the string to be used for null values 714 715 private final Character quoteCharacter; // null if quoting is disabled 716 717 private final String quotedNullString; 718 719 private final QuoteMode quoteMode; 720 721 private final String recordSeparator; // for outputs 722 723 private final boolean skipHeaderRecord; 724 725 private final boolean trailingDelimiter; 726 727 private final boolean trim; 728 729 /** 730 * Creates a customized CSV format. 731 * 732 * @param delimiter 733 * the char used for value separation, must not be a line break character 734 * @param quoteChar 735 * the Character used as value encapsulation marker, may be {@code null} to disable 736 * @param quoteMode 737 * the quote mode 738 * @param commentStart 739 * the Character used for comment identification, may be {@code null} to disable 740 * @param escape 741 * the Character used to escape special characters in values, may be {@code null} to disable 742 * @param ignoreSurroundingSpaces 743 * {@code true} when whitespaces enclosing values should be ignored 744 * @param ignoreEmptyLines 745 * {@code true} when the parser should skip empty lines 746 * @param recordSeparator 747 * the line separator to use for output 748 * @param nullString 749 * the line separator to use for output 750 * @param headerComments 751 * the comments to be printed by the Printer before the actual CSV data 752 * @param header 753 * the header 754 * @param skipHeaderRecord 755 * TODO 756 * @param allowMissingColumnNames 757 * TODO 758 * @param ignoreHeaderCase 759 * TODO 760 * @param trim 761 * TODO 762 * @param trailingDelimiter 763 * TODO 764 * @param autoFlush 765 * @throws IllegalArgumentException 766 * if the delimiter is a line break character 767 */ 768 private CSVFormat(final char delimiter, final Character quoteChar, final QuoteMode quoteMode, 769 final Character commentStart, final Character escape, final boolean ignoreSurroundingSpaces, 770 final boolean ignoreEmptyLines, final String recordSeparator, final String nullString, 771 final Object[] headerComments, final String[] header, final boolean skipHeaderRecord, 772 final boolean allowMissingColumnNames, final boolean ignoreHeaderCase, final boolean trim, 773 final boolean trailingDelimiter, final boolean autoFlush, final boolean allowDuplicateHeaderNames) { 774 this.delimiter = delimiter; 775 this.quoteCharacter = quoteChar; 776 this.quoteMode = quoteMode; 777 this.commentMarker = commentStart; 778 this.escapeCharacter = escape; 779 this.ignoreSurroundingSpaces = ignoreSurroundingSpaces; 780 this.allowMissingColumnNames = allowMissingColumnNames; 781 this.ignoreEmptyLines = ignoreEmptyLines; 782 this.recordSeparator = recordSeparator; 783 this.nullString = nullString; 784 this.headerComments = toStringArray(headerComments); 785 this.header = header == null ? null : header.clone(); 786 this.skipHeaderRecord = skipHeaderRecord; 787 this.ignoreHeaderCase = ignoreHeaderCase; 788 this.trailingDelimiter = trailingDelimiter; 789 this.trim = trim; 790 this.autoFlush = autoFlush; 791 this.quotedNullString = quoteCharacter + nullString + quoteCharacter; 792 this.allowDuplicateHeaderNames = allowDuplicateHeaderNames; 793 validate(); 794 } 795 796 @Override 797 public boolean equals(final Object obj) { 798 if (this == obj) { 799 return true; 800 } 801 if (obj == null) { 802 return false; 803 } 804 if (getClass() != obj.getClass()) { 805 return false; 806 } 807 808 final CSVFormat other = (CSVFormat) obj; 809 if (delimiter != other.delimiter) { 810 return false; 811 } 812 if (quoteMode != other.quoteMode) { 813 return false; 814 } 815 if (quoteCharacter == null) { 816 if (other.quoteCharacter != null) { 817 return false; 818 } 819 } else if (!quoteCharacter.equals(other.quoteCharacter)) { 820 return false; 821 } 822 if (commentMarker == null) { 823 if (other.commentMarker != null) { 824 return false; 825 } 826 } else if (!commentMarker.equals(other.commentMarker)) { 827 return false; 828 } 829 if (escapeCharacter == null) { 830 if (other.escapeCharacter != null) { 831 return false; 832 } 833 } else if (!escapeCharacter.equals(other.escapeCharacter)) { 834 return false; 835 } 836 if (nullString == null) { 837 if (other.nullString != null) { 838 return false; 839 } 840 } else if (!nullString.equals(other.nullString)) { 841 return false; 842 } 843 if (!Arrays.equals(header, other.header)) { 844 return false; 845 } 846 if (ignoreSurroundingSpaces != other.ignoreSurroundingSpaces) { 847 return false; 848 } 849 if (ignoreEmptyLines != other.ignoreEmptyLines) { 850 return false; 851 } 852 if (skipHeaderRecord != other.skipHeaderRecord) { 853 return false; 854 } 855 if (recordSeparator == null) { 856 if (other.recordSeparator != null) { 857 return false; 858 } 859 } else if (!recordSeparator.equals(other.recordSeparator)) { 860 return false; 861 } 862 return true; 863 } 864 865 /** 866 * Formats the specified values. 867 * 868 * @param values 869 * the values to format 870 * @return the formatted values 871 */ 872 public String format(final Object... values) { 873 final StringWriter out = new StringWriter(); 874 try (final CSVPrinter csvPrinter = new CSVPrinter(out, this)) { 875 csvPrinter.printRecord(values); 876 return out.toString().trim(); 877 } catch (final IOException e) { 878 // should not happen because a StringWriter does not do IO. 879 throw new IllegalStateException(e); 880 } 881 } 882 883 /** 884 * Returns true if and only if duplicate names are allowed in the headers. 885 * 886 * @return whether duplicate header names are allowed 887 * @since 1.7 888 */ 889 public boolean getAllowDuplicateHeaderNames() { 890 return allowDuplicateHeaderNames; 891 } 892 893 /** 894 * Specifies whether missing column names are allowed when parsing the header line. 895 * 896 * @return {@code true} if missing column names are allowed when parsing the header line, {@code false} to throw an 897 * {@link IllegalArgumentException}. 898 */ 899 public boolean getAllowMissingColumnNames() { 900 return allowMissingColumnNames; 901 } 902 903 /** 904 * Returns whether to flush on close. 905 * 906 * @return whether to flush on close. 907 * @since 1.6 908 */ 909 public boolean getAutoFlush() { 910 return autoFlush; 911 } 912 913 /** 914 * Returns the character marking the start of a line comment. 915 * 916 * @return the comment start marker, may be {@code null} 917 */ 918 public Character getCommentMarker() { 919 return commentMarker; 920 } 921 922 /** 923 * Returns the character delimiting the values (typically ';', ',' or '\t'). 924 * 925 * @return the delimiter character 926 */ 927 public char getDelimiter() { 928 return delimiter; 929 } 930 931 /** 932 * Returns the escape character. 933 * 934 * @return the escape character, may be {@code null} 935 */ 936 public Character getEscapeCharacter() { 937 return escapeCharacter; 938 } 939 940 /** 941 * Returns a copy of the header array. 942 * 943 * @return a copy of the header array; {@code null} if disabled, the empty array if to be read from the file 944 */ 945 public String[] getHeader() { 946 return header != null ? header.clone() : null; 947 } 948 949 /** 950 * Returns a copy of the header comment array. 951 * 952 * @return a copy of the header comment array; {@code null} if disabled. 953 */ 954 public String[] getHeaderComments() { 955 return headerComments != null ? headerComments.clone() : null; 956 } 957 958 /** 959 * Specifies whether empty lines between records are ignored when parsing input. 960 * 961 * @return {@code true} if empty lines between records are ignored, {@code false} if they are turned into empty 962 * records. 963 */ 964 public boolean getIgnoreEmptyLines() { 965 return ignoreEmptyLines; 966 } 967 968 /** 969 * Specifies whether header names will be accessed ignoring case. 970 * 971 * @return {@code true} if header names cases are ignored, {@code false} if they are case sensitive. 972 * @since 1.3 973 */ 974 public boolean getIgnoreHeaderCase() { 975 return ignoreHeaderCase; 976 } 977 978 /** 979 * Specifies whether spaces around values are ignored when parsing input. 980 * 981 * @return {@code true} if spaces around values are ignored, {@code false} if they are treated as part of the value. 982 */ 983 public boolean getIgnoreSurroundingSpaces() { 984 return ignoreSurroundingSpaces; 985 } 986 987 /** 988 * Gets the String to convert to and from {@code null}. 989 * <ul> 990 * <li><strong>Reading:</strong> Converts strings equal to the given {@code nullString} to {@code null} when reading 991 * records.</li> 992 * <li><strong>Writing:</strong> Writes {@code null} as the given {@code nullString} when writing records.</li> 993 * </ul> 994 * 995 * @return the String to convert to and from {@code null}. No substitution occurs if {@code null} 996 */ 997 public String getNullString() { 998 return nullString; 999 } 1000 1001 /** 1002 * Returns the character used to encapsulate values containing special characters. 1003 * 1004 * @return the quoteChar character, may be {@code null} 1005 */ 1006 public Character getQuoteCharacter() { 1007 return quoteCharacter; 1008 } 1009 1010 /** 1011 * Returns the quote policy output fields. 1012 * 1013 * @return the quote policy 1014 */ 1015 public QuoteMode getQuoteMode() { 1016 return quoteMode; 1017 } 1018 1019 /** 1020 * Returns the record separator delimiting output records. 1021 * 1022 * @return the record separator 1023 */ 1024 public String getRecordSeparator() { 1025 return recordSeparator; 1026 } 1027 1028 /** 1029 * Returns whether to skip the header record. 1030 * 1031 * @return whether to skip the header record. 1032 */ 1033 public boolean getSkipHeaderRecord() { 1034 return skipHeaderRecord; 1035 } 1036 1037 /** 1038 * Returns whether to add a trailing delimiter. 1039 * 1040 * @return whether to add a trailing delimiter. 1041 * @since 1.3 1042 */ 1043 public boolean getTrailingDelimiter() { 1044 return trailingDelimiter; 1045 } 1046 1047 /** 1048 * Returns whether to trim leading and trailing blanks. 1049 * 1050 * @return whether to trim leading and trailing blanks. 1051 */ 1052 public boolean getTrim() { 1053 return trim; 1054 } 1055 1056 @Override 1057 public int hashCode() { 1058 final int prime = 31; 1059 int result = 1; 1060 1061 result = prime * result + delimiter; 1062 result = prime * result + ((quoteMode == null) ? 0 : quoteMode.hashCode()); 1063 result = prime * result + ((quoteCharacter == null) ? 0 : quoteCharacter.hashCode()); 1064 result = prime * result + ((commentMarker == null) ? 0 : commentMarker.hashCode()); 1065 result = prime * result + ((escapeCharacter == null) ? 0 : escapeCharacter.hashCode()); 1066 result = prime * result + ((nullString == null) ? 0 : nullString.hashCode()); 1067 result = prime * result + (ignoreSurroundingSpaces ? 1231 : 1237); 1068 result = prime * result + (ignoreHeaderCase ? 1231 : 1237); 1069 result = prime * result + (ignoreEmptyLines ? 1231 : 1237); 1070 result = prime * result + (skipHeaderRecord ? 1231 : 1237); 1071 result = prime * result + ((recordSeparator == null) ? 0 : recordSeparator.hashCode()); 1072 result = prime * result + Arrays.hashCode(header); 1073 return result; 1074 } 1075 1076 /** 1077 * Specifies whether comments are supported by this format. 1078 * 1079 * Note that the comment introducer character is only recognized at the start of a line. 1080 * 1081 * @return {@code true} is comments are supported, {@code false} otherwise 1082 */ 1083 public boolean isCommentMarkerSet() { 1084 return commentMarker != null; 1085 } 1086 1087 /** 1088 * Returns whether escape are being processed. 1089 * 1090 * @return {@code true} if escapes are processed 1091 */ 1092 public boolean isEscapeCharacterSet() { 1093 return escapeCharacter != null; 1094 } 1095 1096 /** 1097 * Returns whether a nullString has been defined. 1098 * 1099 * @return {@code true} if a nullString is defined 1100 */ 1101 public boolean isNullStringSet() { 1102 return nullString != null; 1103 } 1104 1105 /** 1106 * Returns whether a quoteChar has been defined. 1107 * 1108 * @return {@code true} if a quoteChar is defined 1109 */ 1110 public boolean isQuoteCharacterSet() { 1111 return quoteCharacter != null; 1112 } 1113 1114 /** 1115 * Parses the specified content. 1116 * 1117 * <p> 1118 * See also the various static parse methods on {@link CSVParser}. 1119 * </p> 1120 * 1121 * @param in 1122 * the input stream 1123 * @return a parser over a stream of {@link CSVRecord}s. 1124 * @throws IOException 1125 * If an I/O error occurs 1126 */ 1127 public CSVParser parse(final Reader in) throws IOException { 1128 return new CSVParser(in, this); 1129 } 1130 1131 /** 1132 * Prints to the specified output. 1133 * 1134 * <p> 1135 * See also {@link CSVPrinter}. 1136 * </p> 1137 * 1138 * @param out 1139 * the output. 1140 * @return a printer to an output. 1141 * @throws IOException 1142 * thrown if the optional header cannot be printed. 1143 */ 1144 public CSVPrinter print(final Appendable out) throws IOException { 1145 return new CSVPrinter(out, this); 1146 } 1147 1148 /** 1149 * Prints to the specified output. 1150 * 1151 * <p> 1152 * See also {@link CSVPrinter}. 1153 * </p> 1154 * 1155 * @param out 1156 * the output. 1157 * @param charset 1158 * A charset. 1159 * @return a printer to an output. 1160 * @throws IOException 1161 * thrown if the optional header cannot be printed. 1162 * @since 1.5 1163 */ 1164 @SuppressWarnings("resource") 1165 public CSVPrinter print(final File out, final Charset charset) throws IOException { 1166 // The writer will be closed when close() is called. 1167 return new CSVPrinter(new OutputStreamWriter(new FileOutputStream(out), charset), this); 1168 } 1169 1170 /** 1171 * Prints the {@code value} as the next value on the line to {@code out}. The value will be escaped or encapsulated 1172 * as needed. Useful when one wants to avoid creating CSVPrinters. 1173 * 1174 * @param value 1175 * value to output. 1176 * @param out 1177 * where to print the value. 1178 * @param newRecord 1179 * if this a new record. 1180 * @throws IOException 1181 * If an I/O error occurs. 1182 * @since 1.4 1183 */ 1184 public void print(final Object value, final Appendable out, final boolean newRecord) throws IOException { 1185 // null values are considered empty 1186 // Only call CharSequence.toString() if you have to, helps GC-free use cases. 1187 CharSequence charSequence; 1188 if (value == null) { 1189 // https://issues.apache.org/jira/browse/CSV-203 1190 if (null == nullString) { 1191 charSequence = EMPTY; 1192 } else { 1193 if (QuoteMode.ALL == quoteMode) { 1194 charSequence = quotedNullString; 1195 } else { 1196 charSequence = nullString; 1197 } 1198 } 1199 } else { 1200 if (value instanceof CharSequence) { 1201 charSequence = (CharSequence) value; 1202 } else if (value instanceof Reader) { 1203 print((Reader) value, out, newRecord); 1204 return; 1205 } else { 1206 charSequence = value.toString(); 1207 } 1208 } 1209 charSequence = getTrim() ? trim(charSequence) : charSequence; 1210 print(value, charSequence, out, newRecord); 1211 } 1212 1213 private void print(final Object object, final CharSequence value, final Appendable out, final boolean newRecord) 1214 throws IOException { 1215 final int offset = 0; 1216 final int len = value.length(); 1217 if (!newRecord) { 1218 out.append(getDelimiter()); 1219 } 1220 if (object == null) { 1221 out.append(value); 1222 } else if (isQuoteCharacterSet()) { 1223 // the original object is needed so can check for Number 1224 printWithQuotes(object, value, out, newRecord); 1225 } else if (isEscapeCharacterSet()) { 1226 printWithEscapes(value, out); 1227 } else { 1228 out.append(value, offset, len); 1229 } 1230 } 1231 1232 /** 1233 * Prints to the specified output. 1234 * 1235 * <p> 1236 * See also {@link CSVPrinter}. 1237 * </p> 1238 * 1239 * @param out 1240 * the output. 1241 * @param charset 1242 * A charset. 1243 * @return a printer to an output. 1244 * @throws IOException 1245 * thrown if the optional header cannot be printed. 1246 * @since 1.5 1247 */ 1248 public CSVPrinter print(final Path out, final Charset charset) throws IOException { 1249 return print(Files.newBufferedWriter(out, charset)); 1250 } 1251 1252 private void print(final Reader reader, final Appendable out, final boolean newRecord) throws IOException { 1253 // Reader is never null 1254 if (!newRecord) { 1255 out.append(getDelimiter()); 1256 } 1257 if (isQuoteCharacterSet()) { 1258 printWithQuotes(reader, out); 1259 } else if (isEscapeCharacterSet()) { 1260 printWithEscapes(reader, out); 1261 } else if (out instanceof Writer) { 1262 IOUtils.copyLarge(reader, (Writer) out); 1263 } else { 1264 IOUtils.copy(reader, out); 1265 } 1266 1267 } 1268 1269 /** 1270 * Prints to the {@link System#out}. 1271 * 1272 * <p> 1273 * See also {@link CSVPrinter}. 1274 * </p> 1275 * 1276 * @return a printer to {@link System#out}. 1277 * @throws IOException 1278 * thrown if the optional header cannot be printed. 1279 * @since 1.5 1280 */ 1281 public CSVPrinter printer() throws IOException { 1282 return new CSVPrinter(System.out, this); 1283 } 1284 1285 /** 1286 * Outputs the trailing delimiter (if set) followed by the record separator (if set). 1287 * 1288 * @param out 1289 * where to write 1290 * @throws IOException 1291 * If an I/O error occurs 1292 * @since 1.4 1293 */ 1294 public void println(final Appendable out) throws IOException { 1295 if (getTrailingDelimiter()) { 1296 out.append(getDelimiter()); 1297 } 1298 if (recordSeparator != null) { 1299 out.append(recordSeparator); 1300 } 1301 } 1302 1303 /** 1304 * Prints the given {@code values} to {@code out} as a single record of delimiter separated values followed by the 1305 * record separator. 1306 * 1307 * <p> 1308 * The values will be quoted if needed. Quotes and new-line characters will be escaped. This method adds the record 1309 * separator to the output after printing the record, so there is no need to call {@link #println(Appendable)}. 1310 * </p> 1311 * 1312 * @param out 1313 * where to write. 1314 * @param values 1315 * values to output. 1316 * @throws IOException 1317 * If an I/O error occurs. 1318 * @since 1.4 1319 */ 1320 public void printRecord(final Appendable out, final Object... values) throws IOException { 1321 for (int i = 0; i < values.length; i++) { 1322 print(values[i], out, i == 0); 1323 } 1324 println(out); 1325 } 1326 1327 /* 1328 * Note: must only be called if escaping is enabled, otherwise will generate NPE 1329 */ 1330 private void printWithEscapes(final CharSequence value, final Appendable out) throws IOException { 1331 int start = 0; 1332 int pos = 0; 1333 final int len = value.length(); 1334 final int end = len; 1335 1336 final char delim = getDelimiter(); 1337 final char escape = getEscapeCharacter().charValue(); 1338 1339 while (pos < end) { 1340 char c = value.charAt(pos); 1341 if (c == CR || c == LF || c == delim || c == escape) { 1342 // write out segment up until this char 1343 if (pos > start) { 1344 out.append(value, start, pos); 1345 } 1346 if (c == LF) { 1347 c = 'n'; 1348 } else if (c == CR) { 1349 c = 'r'; 1350 } 1351 1352 out.append(escape); 1353 out.append(c); 1354 1355 start = pos + 1; // start on the current char after this one 1356 } 1357 pos++; 1358 } 1359 1360 // write last segment 1361 if (pos > start) { 1362 out.append(value, start, pos); 1363 } 1364 } 1365 1366 private void printWithEscapes(final Reader reader, final Appendable out) throws IOException { 1367 int start = 0; 1368 int pos = 0; 1369 1370 final char delim = getDelimiter(); 1371 final char escape = getEscapeCharacter().charValue(); 1372 final StringBuilder builder = new StringBuilder(IOUtils.DEFAULT_BUFFER_SIZE); 1373 1374 int c; 1375 while (-1 != (c = reader.read())) { 1376 builder.append((char) c); 1377 if (c == CR || c == LF || c == delim || c == escape) { 1378 // write out segment up until this char 1379 if (pos > start) { 1380 out.append(builder.substring(start, pos)); 1381 builder.setLength(0); 1382 } 1383 if (c == LF) { 1384 c = 'n'; 1385 } else if (c == CR) { 1386 c = 'r'; 1387 } 1388 1389 out.append(escape); 1390 out.append((char) c); 1391 1392 start = pos + 1; // start on the current char after this one 1393 } 1394 pos++; 1395 } 1396 1397 // write last segment 1398 if (pos > start) { 1399 out.append(builder.substring(start, pos)); 1400 } 1401 } 1402 1403 /* 1404 * Note: must only be called if quoting is enabled, otherwise will generate NPE 1405 */ 1406 // the original object is needed so can check for Number 1407 private void printWithQuotes(final Object object, final CharSequence value, final Appendable out, 1408 final boolean newRecord) throws IOException { 1409 boolean quote = false; 1410 int start = 0; 1411 int pos = 0; 1412 final int len = value.length(); 1413 final int end = len; 1414 1415 final char delimChar = getDelimiter(); 1416 final char quoteChar = getQuoteCharacter().charValue(); 1417 1418 QuoteMode quoteModePolicy = getQuoteMode(); 1419 if (quoteModePolicy == null) { 1420 quoteModePolicy = QuoteMode.MINIMAL; 1421 } 1422 switch (quoteModePolicy) { 1423 case ALL: 1424 case ALL_NON_NULL: 1425 quote = true; 1426 break; 1427 case NON_NUMERIC: 1428 quote = !(object instanceof Number); 1429 break; 1430 case NONE: 1431 // Use the existing escaping code 1432 printWithEscapes(value, out); 1433 return; 1434 case MINIMAL: 1435 if (len <= 0) { 1436 // always quote an empty token that is the first 1437 // on the line, as it may be the only thing on the 1438 // line. If it were not quoted in that case, 1439 // an empty line has no tokens. 1440 if (newRecord) { 1441 quote = true; 1442 } 1443 } else { 1444 char c = value.charAt(pos); 1445 1446 if (c <= COMMENT) { 1447 // Some other chars at the start of a value caused the parser to fail, so for now 1448 // encapsulate if we start in anything less than '#'. We are being conservative 1449 // by including the default comment char too. 1450 quote = true; 1451 } else { 1452 while (pos < end) { 1453 c = value.charAt(pos); 1454 if (c == LF || c == CR || c == quoteChar || c == delimChar) { 1455 quote = true; 1456 break; 1457 } 1458 pos++; 1459 } 1460 1461 if (!quote) { 1462 pos = end - 1; 1463 c = value.charAt(pos); 1464 // Some other chars at the end caused the parser to fail, so for now 1465 // encapsulate if we end in anything less than ' ' 1466 if (c <= SP) { 1467 quote = true; 1468 } 1469 } 1470 } 1471 } 1472 1473 if (!quote) { 1474 // no encapsulation needed - write out the original value 1475 out.append(value, start, end); 1476 return; 1477 } 1478 break; 1479 default: 1480 throw new IllegalStateException("Unexpected Quote value: " + quoteModePolicy); 1481 } 1482 1483 if (!quote) { 1484 // no encapsulation needed - write out the original value 1485 out.append(value, start, end); 1486 return; 1487 } 1488 1489 // we hit something that needed encapsulation 1490 out.append(quoteChar); 1491 1492 // Pick up where we left off: pos should be positioned on the first character that caused 1493 // the need for encapsulation. 1494 while (pos < end) { 1495 final char c = value.charAt(pos); 1496 if (c == quoteChar) { 1497 // write out the chunk up until this point 1498 1499 // add 1 to the length to write out the encapsulator also 1500 out.append(value, start, pos + 1); 1501 // put the next starting position on the encapsulator so we will 1502 // write it out again with the next string (effectively doubling it) 1503 start = pos; 1504 } 1505 pos++; 1506 } 1507 1508 // write the last segment 1509 out.append(value, start, pos); 1510 out.append(quoteChar); 1511 } 1512 1513 /** 1514 * Always use quotes unless QuoteMode is NONE, so we not have to look ahead. 1515 * 1516 * @throws IOException 1517 */ 1518 private void printWithQuotes(final Reader reader, final Appendable out) throws IOException { 1519 1520 if (getQuoteMode() == QuoteMode.NONE) { 1521 printWithEscapes(reader, out); 1522 return; 1523 } 1524 1525 int pos = 0; 1526 1527 final char quote = getQuoteCharacter().charValue(); 1528 final StringBuilder builder = new StringBuilder(IOUtils.DEFAULT_BUFFER_SIZE); 1529 1530 out.append(quote); 1531 1532 int c; 1533 while (-1 != (c = reader.read())) { 1534 builder.append((char) c); 1535 if (c == quote) { 1536 // write out segment up until this char 1537 if (pos > 0) { 1538 out.append(builder.substring(0, pos)); 1539 builder.setLength(0); 1540 pos = -1; 1541 } 1542 1543 out.append(quote); 1544 out.append((char) c); 1545 } 1546 pos++; 1547 } 1548 1549 // write last segment 1550 if (pos > 0) { 1551 out.append(builder.substring(0, pos)); 1552 } 1553 1554 out.append(quote); 1555 } 1556 1557 @Override 1558 public String toString() { 1559 final StringBuilder sb = new StringBuilder(); 1560 sb.append("Delimiter=<").append(delimiter).append('>'); 1561 if (isEscapeCharacterSet()) { 1562 sb.append(' '); 1563 sb.append("Escape=<").append(escapeCharacter).append('>'); 1564 } 1565 if (isQuoteCharacterSet()) { 1566 sb.append(' '); 1567 sb.append("QuoteChar=<").append(quoteCharacter).append('>'); 1568 } 1569 if (isCommentMarkerSet()) { 1570 sb.append(' '); 1571 sb.append("CommentStart=<").append(commentMarker).append('>'); 1572 } 1573 if (isNullStringSet()) { 1574 sb.append(' '); 1575 sb.append("NullString=<").append(nullString).append('>'); 1576 } 1577 if (recordSeparator != null) { 1578 sb.append(' '); 1579 sb.append("RecordSeparator=<").append(recordSeparator).append('>'); 1580 } 1581 if (getIgnoreEmptyLines()) { 1582 sb.append(" EmptyLines:ignored"); 1583 } 1584 if (getIgnoreSurroundingSpaces()) { 1585 sb.append(" SurroundingSpaces:ignored"); 1586 } 1587 if (getIgnoreHeaderCase()) { 1588 sb.append(" IgnoreHeaderCase:ignored"); 1589 } 1590 sb.append(" SkipHeaderRecord:").append(skipHeaderRecord); 1591 if (headerComments != null) { 1592 sb.append(' '); 1593 sb.append("HeaderComments:").append(Arrays.toString(headerComments)); 1594 } 1595 if (header != null) { 1596 sb.append(' '); 1597 sb.append("Header:").append(Arrays.toString(header)); 1598 } 1599 return sb.toString(); 1600 } 1601 1602 private String[] toStringArray(final Object[] values) { 1603 if (values == null) { 1604 return null; 1605 } 1606 final String[] strings = new String[values.length]; 1607 for (int i = 0; i < values.length; i++) { 1608 final Object value = values[i]; 1609 strings[i] = value == null ? null : value.toString(); 1610 } 1611 return strings; 1612 } 1613 1614 private CharSequence trim(final CharSequence charSequence) { 1615 if (charSequence instanceof String) { 1616 return ((String) charSequence).trim(); 1617 } 1618 final int count = charSequence.length(); 1619 int len = count; 1620 int pos = 0; 1621 1622 while (pos < len && charSequence.charAt(pos) <= SP) { 1623 pos++; 1624 } 1625 while (pos < len && charSequence.charAt(len - 1) <= SP) { 1626 len--; 1627 } 1628 return pos > 0 || len < count ? charSequence.subSequence(pos, len) : charSequence; 1629 } 1630 1631 /** 1632 * Verifies the consistency of the parameters and throws an IllegalArgumentException if necessary. 1633 * 1634 * @throws IllegalArgumentException 1635 */ 1636 private void validate() throws IllegalArgumentException { 1637 if (isLineBreak(delimiter)) { 1638 throw new IllegalArgumentException("The delimiter cannot be a line break"); 1639 } 1640 1641 if (quoteCharacter != null && delimiter == quoteCharacter.charValue()) { 1642 throw new IllegalArgumentException( 1643 "The quoteChar character and the delimiter cannot be the same ('" + quoteCharacter + "')"); 1644 } 1645 1646 if (escapeCharacter != null && delimiter == escapeCharacter.charValue()) { 1647 throw new IllegalArgumentException( 1648 "The escape character and the delimiter cannot be the same ('" + escapeCharacter + "')"); 1649 } 1650 1651 if (commentMarker != null && delimiter == commentMarker.charValue()) { 1652 throw new IllegalArgumentException( 1653 "The comment start character and the delimiter cannot be the same ('" + commentMarker + "')"); 1654 } 1655 1656 if (quoteCharacter != null && quoteCharacter.equals(commentMarker)) { 1657 throw new IllegalArgumentException( 1658 "The comment start character and the quoteChar cannot be the same ('" + commentMarker + "')"); 1659 } 1660 1661 if (escapeCharacter != null && escapeCharacter.equals(commentMarker)) { 1662 throw new IllegalArgumentException( 1663 "The comment start and the escape character cannot be the same ('" + commentMarker + "')"); 1664 } 1665 1666 if (escapeCharacter == null && quoteMode == QuoteMode.NONE) { 1667 throw new IllegalArgumentException("No quotes mode set but no escape character is set"); 1668 } 1669 1670 // validate header 1671 if (header != null) { 1672 final Set<String> dupCheck = new HashSet<>(); 1673 for (final String hdr : header) { 1674 if (!dupCheck.add(hdr)) { 1675 throw new IllegalArgumentException( 1676 "The header contains a duplicate entry: '" + hdr + "' in " + Arrays.toString(header)); 1677 } 1678 } 1679 } 1680 } 1681 1682 /** 1683 * Returns a new {@code CSVFormat} that allows duplicate header names. 1684 * 1685 * @return a new {@code CSVFormat} that allows duplicate header names 1686 * @since 1.7 1687 */ 1688 public CSVFormat withAllowDuplicateHeaderNames() { 1689 return withAllowDuplicateHeaderNames(true); 1690 } 1691 1692 /** 1693 * Returns a new {@code CSVFormat} with duplicate header names behavior set to the given value. 1694 * 1695 * @param allowDuplicateHeaderNames the duplicate header names behavior, true to allow, false to disallow. 1696 * @return a new {@code CSVFormat} with duplicate header names behavior set to the given value. 1697 * @since 1.7 1698 */ 1699 public CSVFormat withAllowDuplicateHeaderNames(final boolean allowDuplicateHeaderNames) { 1700 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, 1701 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 1702 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, 1703 allowDuplicateHeaderNames); 1704 } 1705 1706 /** 1707 * Returns a new {@code CSVFormat} with the missing column names behavior of the format set to {@code true} 1708 * 1709 * @return A new CSVFormat that is equal to this but with the specified missing column names behavior. 1710 * @see #withAllowMissingColumnNames(boolean) 1711 * @since 1.1 1712 */ 1713 public CSVFormat withAllowMissingColumnNames() { 1714 return this.withAllowMissingColumnNames(true); 1715 } 1716 1717 /** 1718 * Returns a new {@code CSVFormat} with the missing column names behavior of the format set to the given value. 1719 * 1720 * @param allowMissingColumnNames 1721 * the missing column names behavior, {@code true} to allow missing column names in the header line, 1722 * {@code false} to cause an {@link IllegalArgumentException} to be thrown. 1723 * @return A new CSVFormat that is equal to this but with the specified missing column names behavior. 1724 */ 1725 public CSVFormat withAllowMissingColumnNames(final boolean allowMissingColumnNames) { 1726 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, 1727 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 1728 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, 1729 allowDuplicateHeaderNames); 1730 } 1731 1732 /** 1733 * Returns a new {@code CSVFormat} with whether to flush on close. 1734 * 1735 * @param autoFlush 1736 * whether to flush on close. 1737 * 1738 * @return A new CSVFormat that is equal to this but with the specified autoFlush setting. 1739 * @since 1.6 1740 */ 1741 public CSVFormat withAutoFlush(final boolean autoFlush) { 1742 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, 1743 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 1744 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, 1745 allowDuplicateHeaderNames); 1746 } 1747 1748 /** 1749 * Returns a new {@code CSVFormat} with the comment start marker of the format set to the specified character. 1750 * 1751 * Note that the comment start character is only recognized at the start of a line. 1752 * 1753 * @param commentMarker 1754 * the comment start marker 1755 * @return A new CSVFormat that is equal to this one but with the specified character as the comment start marker 1756 * @throws IllegalArgumentException 1757 * thrown if the specified character is a line break 1758 */ 1759 public CSVFormat withCommentMarker(final char commentMarker) { 1760 return withCommentMarker(Character.valueOf(commentMarker)); 1761 } 1762 1763 /** 1764 * Returns a new {@code CSVFormat} with the comment start marker of the format set to the specified character. 1765 * 1766 * Note that the comment start character is only recognized at the start of a line. 1767 * 1768 * @param commentMarker 1769 * the comment start marker, use {@code null} to disable 1770 * @return A new CSVFormat that is equal to this one but with the specified character as the comment start marker 1771 * @throws IllegalArgumentException 1772 * thrown if the specified character is a line break 1773 */ 1774 public CSVFormat withCommentMarker(final Character commentMarker) { 1775 if (isLineBreak(commentMarker)) { 1776 throw new IllegalArgumentException("The comment start marker character cannot be a line break"); 1777 } 1778 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, 1779 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 1780 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, 1781 allowDuplicateHeaderNames); 1782 } 1783 1784 /** 1785 * Returns a new {@code CSVFormat} with the delimiter of the format set to the specified character. 1786 * 1787 * @param delimiter 1788 * the delimiter character 1789 * @return A new CSVFormat that is equal to this with the specified character as delimiter 1790 * @throws IllegalArgumentException 1791 * thrown if the specified character is a line break 1792 */ 1793 public CSVFormat withDelimiter(final char delimiter) { 1794 if (isLineBreak(delimiter)) { 1795 throw new IllegalArgumentException("The delimiter cannot be a line break"); 1796 } 1797 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, 1798 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 1799 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, 1800 allowDuplicateHeaderNames); 1801 } 1802 1803 /** 1804 * Returns a new {@code CSVFormat} with the escape character of the format set to the specified character. 1805 * 1806 * @param escape 1807 * the escape character 1808 * @return A new CSVFormat that is equal to his but with the specified character as the escape character 1809 * @throws IllegalArgumentException 1810 * thrown if the specified character is a line break 1811 */ 1812 public CSVFormat withEscape(final char escape) { 1813 return withEscape(Character.valueOf(escape)); 1814 } 1815 1816 /** 1817 * Returns a new {@code CSVFormat} with the escape character of the format set to the specified character. 1818 * 1819 * @param escape 1820 * the escape character, use {@code null} to disable 1821 * @return A new CSVFormat that is equal to this but with the specified character as the escape character 1822 * @throws IllegalArgumentException 1823 * thrown if the specified character is a line break 1824 */ 1825 public CSVFormat withEscape(final Character escape) { 1826 if (isLineBreak(escape)) { 1827 throw new IllegalArgumentException("The escape character cannot be a line break"); 1828 } 1829 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escape, ignoreSurroundingSpaces, 1830 ignoreEmptyLines, recordSeparator, nullString, headerComments, header, skipHeaderRecord, 1831 allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, 1832 allowDuplicateHeaderNames); 1833 } 1834 1835 /** 1836 * Returns a new {@code CSVFormat} using the first record as header. 1837 * 1838 * <p> 1839 * Calling this method is equivalent to calling: 1840 * </p> 1841 * 1842 * <pre> 1843 * CSVFormat format = aFormat.withHeader().withSkipHeaderRecord(); 1844 * </pre> 1845 * 1846 * @return A new CSVFormat that is equal to this but using the first record as header. 1847 * @see #withSkipHeaderRecord(boolean) 1848 * @see #withHeader(String...) 1849 * @since 1.3 1850 */ 1851 public CSVFormat withFirstRecordAsHeader() { 1852 return withHeader().withSkipHeaderRecord(); 1853 } 1854 1855 /** 1856 * Returns a new {@code CSVFormat} with the header of the format defined by the enum class. 1857 * 1858 * <p> 1859 * Example: 1860 * </p> 1861 * 1862 * <pre> 1863 * public enum Header { 1864 * Name, Email, Phone 1865 * } 1866 * 1867 * CSVFormat format = aformat.withHeader(Header.class); 1868 * </pre> 1869 * <p> 1870 * The header is also used by the {@link CSVPrinter}. 1871 * </p> 1872 * 1873 * @param headerEnum 1874 * the enum defining the header, {@code null} if disabled, empty if parsed automatically, user specified 1875 * otherwise. 1876 * 1877 * @return A new CSVFormat that is equal to this but with the specified header 1878 * @see #withHeader(String...) 1879 * @see #withSkipHeaderRecord(boolean) 1880 * @since 1.3 1881 */ 1882 public CSVFormat withHeader(final Class<? extends Enum<?>> headerEnum) { 1883 String[] header = null; 1884 if (headerEnum != null) { 1885 final Enum<?>[] enumValues = headerEnum.getEnumConstants(); 1886 header = new String[enumValues.length]; 1887 for (int i = 0; i < enumValues.length; i++) { 1888 header[i] = enumValues[i].name(); 1889 } 1890 } 1891 return withHeader(header); 1892 } 1893 1894 /** 1895 * Returns a new {@code CSVFormat} with the header of the format set from the result set metadata. The header can 1896 * either be parsed automatically from the input file with: 1897 * 1898 * <pre> 1899 * CSVFormat format = aformat.withHeader(); 1900 * </pre> 1901 * 1902 * or specified manually with: 1903 * 1904 * <pre> 1905 * CSVFormat format = aformat.withHeader(resultSet); 1906 * </pre> 1907 * <p> 1908 * The header is also used by the {@link CSVPrinter}. 1909 * </p> 1910 * 1911 * @param resultSet 1912 * the resultSet for the header, {@code null} if disabled, empty if parsed automatically, user specified 1913 * otherwise. 1914 * 1915 * @return A new CSVFormat that is equal to this but with the specified header 1916 * @throws SQLException 1917 * SQLException if a database access error occurs or this method is called on a closed result set. 1918 * @since 1.1 1919 */ 1920 public CSVFormat withHeader(final ResultSet resultSet) throws SQLException { 1921 return withHeader(resultSet != null ? resultSet.getMetaData() : null); 1922 } 1923 1924 /** 1925 * Returns a new {@code CSVFormat} with the header of the format set from the result set metadata. The header can 1926 * either be parsed automatically from the input file with: 1927 * 1928 * <pre> 1929 * CSVFormat format = aformat.withHeader(); 1930 * </pre> 1931 * 1932 * or specified manually with: 1933 * 1934 * <pre> 1935 * CSVFormat format = aformat.withHeader(metaData); 1936 * </pre> 1937 * <p> 1938 * The header is also used by the {@link CSVPrinter}. 1939 * </p> 1940 * 1941 * @param metaData 1942 * the metaData for the header, {@code null} if disabled, empty if parsed automatically, user specified 1943 * otherwise. 1944 * 1945 * @return A new CSVFormat that is equal to this but with the specified header 1946 * @throws SQLException 1947 * SQLException if a database access error occurs or this method is called on a closed result set. 1948 * @since 1.1 1949 */ 1950 public CSVFormat withHeader(final ResultSetMetaData metaData) throws SQLException { 1951 String[] labels = null; 1952 if (metaData != null) { 1953 final int columnCount = metaData.getColumnCount(); 1954 labels = new String[columnCount]; 1955 for (int i = 0; i < columnCount; i++) { 1956 labels[i] = metaData.getColumnLabel(i + 1); 1957 } 1958 } 1959 return withHeader(labels); 1960 } 1961 1962 /** 1963 * Returns a new {@code CSVFormat} with the header of the format set to the given values. The header can either be 1964 * parsed automatically from the input file with: 1965 * 1966 * <pre> 1967 * CSVFormat format = aformat.withHeader(); 1968 * </pre> 1969 * 1970 * or specified manually with: 1971 * 1972 * <pre> 1973 * CSVFormat format = aformat.withHeader("name", "email", "phone"); 1974 * </pre> 1975 * <p> 1976 * The header is also used by the {@link CSVPrinter}. 1977 * </p> 1978 * 1979 * @param header 1980 * the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise. 1981 * 1982 * @return A new CSVFormat that is equal to this but with the specified header 1983 * @see #withSkipHeaderRecord(boolean) 1984 */ 1985 public CSVFormat withHeader(final String... header) { 1986 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, 1987 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 1988 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, 1989 allowDuplicateHeaderNames); 1990 } 1991 1992 /** 1993 * Returns a new {@code CSVFormat} with the header comments of the format set to the given values. The comments will 1994 * be printed first, before the headers. This setting is ignored by the parser. 1995 * 1996 * <pre> 1997 * CSVFormat format = aformat.withHeaderComments("Generated by Apache Commons CSV 1.1.", new Date()); 1998 * </pre> 1999 * 2000 * @param headerComments 2001 * the headerComments which will be printed by the Printer before the actual CSV data. 2002 * 2003 * @return A new CSVFormat that is equal to this but with the specified header 2004 * @see #withSkipHeaderRecord(boolean) 2005 * @since 1.1 2006 */ 2007 public CSVFormat withHeaderComments(final Object... headerComments) { 2008 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, 2009 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 2010 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, 2011 allowDuplicateHeaderNames); 2012 } 2013 2014 /** 2015 * Returns a new {@code CSVFormat} with the empty line skipping behavior of the format set to {@code true}. 2016 * 2017 * @return A new CSVFormat that is equal to this but with the specified empty line skipping behavior. 2018 * @since {@link #withIgnoreEmptyLines(boolean)} 2019 * @since 1.1 2020 */ 2021 public CSVFormat withIgnoreEmptyLines() { 2022 return this.withIgnoreEmptyLines(true); 2023 } 2024 2025 /** 2026 * Returns a new {@code CSVFormat} with the empty line skipping behavior of the format set to the given value. 2027 * 2028 * @param ignoreEmptyLines 2029 * the empty line skipping behavior, {@code true} to ignore the empty lines between the records, 2030 * {@code false} to translate empty lines to empty records. 2031 * @return A new CSVFormat that is equal to this but with the specified empty line skipping behavior. 2032 */ 2033 public CSVFormat withIgnoreEmptyLines(final boolean ignoreEmptyLines) { 2034 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, 2035 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 2036 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, 2037 allowDuplicateHeaderNames); 2038 } 2039 2040 /** 2041 * Returns a new {@code CSVFormat} with the header ignore case behavior set to {@code true}. 2042 * 2043 * @return A new CSVFormat that will ignore case header name. 2044 * @see #withIgnoreHeaderCase(boolean) 2045 * @since 1.3 2046 */ 2047 public CSVFormat withIgnoreHeaderCase() { 2048 return this.withIgnoreHeaderCase(true); 2049 } 2050 2051 /** 2052 * Returns a new {@code CSVFormat} with whether header names should be accessed ignoring case. 2053 * 2054 * @param ignoreHeaderCase 2055 * the case mapping behavior, {@code true} to access name/values, {@code false} to leave the mapping as 2056 * is. 2057 * @return A new CSVFormat that will ignore case header name if specified as {@code true} 2058 * @since 1.3 2059 */ 2060 public CSVFormat withIgnoreHeaderCase(final boolean ignoreHeaderCase) { 2061 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, 2062 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 2063 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, 2064 allowDuplicateHeaderNames); 2065 } 2066 2067 /** 2068 * Returns a new {@code CSVFormat} with the trimming behavior of the format set to {@code true}. 2069 * 2070 * @return A new CSVFormat that is equal to this but with the specified trimming behavior. 2071 * @see #withIgnoreSurroundingSpaces(boolean) 2072 * @since 1.1 2073 */ 2074 public CSVFormat withIgnoreSurroundingSpaces() { 2075 return this.withIgnoreSurroundingSpaces(true); 2076 } 2077 2078 /** 2079 * Returns a new {@code CSVFormat} with the trimming behavior of the format set to the given value. 2080 * 2081 * @param ignoreSurroundingSpaces 2082 * the trimming behavior, {@code true} to remove the surrounding spaces, {@code false} to leave the 2083 * spaces as is. 2084 * @return A new CSVFormat that is equal to this but with the specified trimming behavior. 2085 */ 2086 public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpaces) { 2087 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, 2088 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 2089 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, 2090 allowDuplicateHeaderNames); 2091 } 2092 2093 /** 2094 * Returns a new {@code CSVFormat} with conversions to and from null for strings on input and output. 2095 * <ul> 2096 * <li><strong>Reading:</strong> Converts strings equal to the given {@code nullString} to {@code null} when reading 2097 * records.</li> 2098 * <li><strong>Writing:</strong> Writes {@code null} as the given {@code nullString} when writing records.</li> 2099 * </ul> 2100 * 2101 * @param nullString 2102 * the String to convert to and from {@code null}. No substitution occurs if {@code null} 2103 * 2104 * @return A new CSVFormat that is equal to this but with the specified null conversion string. 2105 */ 2106 public CSVFormat withNullString(final String nullString) { 2107 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, 2108 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 2109 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, 2110 allowDuplicateHeaderNames); 2111 } 2112 2113 /** 2114 * Returns a new {@code CSVFormat} with the quoteChar of the format set to the specified character. 2115 * 2116 * @param quoteChar 2117 * the quoteChar character 2118 * @return A new CSVFormat that is equal to this but with the specified character as quoteChar 2119 * @throws IllegalArgumentException 2120 * thrown if the specified character is a line break 2121 */ 2122 public CSVFormat withQuote(final char quoteChar) { 2123 return withQuote(Character.valueOf(quoteChar)); 2124 } 2125 2126 /** 2127 * Returns a new {@code CSVFormat} with the quoteChar of the format set to the specified character. 2128 * 2129 * @param quoteChar 2130 * the quoteChar character, use {@code null} to disable 2131 * @return A new CSVFormat that is equal to this but with the specified character as quoteChar 2132 * @throws IllegalArgumentException 2133 * thrown if the specified character is a line break 2134 */ 2135 public CSVFormat withQuote(final Character quoteChar) { 2136 if (isLineBreak(quoteChar)) { 2137 throw new IllegalArgumentException("The quoteChar cannot be a line break"); 2138 } 2139 return new CSVFormat(delimiter, quoteChar, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces, 2140 ignoreEmptyLines, recordSeparator, nullString, headerComments, header, skipHeaderRecord, 2141 allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, 2142 allowDuplicateHeaderNames); 2143 } 2144 2145 /** 2146 * Returns a new {@code CSVFormat} with the output quote policy of the format set to the specified value. 2147 * 2148 * @param quoteModePolicy 2149 * the quote policy to use for output. 2150 * 2151 * @return A new CSVFormat that is equal to this but with the specified quote policy 2152 */ 2153 public CSVFormat withQuoteMode(final QuoteMode quoteModePolicy) { 2154 return new CSVFormat(delimiter, quoteCharacter, quoteModePolicy, commentMarker, escapeCharacter, 2155 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 2156 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, 2157 allowDuplicateHeaderNames); 2158 } 2159 2160 /** 2161 * Returns a new {@code CSVFormat} with the record separator of the format set to the specified character. 2162 * 2163 * <p> 2164 * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently 2165 * only works for inputs with '\n', '\r' and "\r\n" 2166 * </p> 2167 * 2168 * @param recordSeparator 2169 * the record separator to use for output. 2170 * 2171 * @return A new CSVFormat that is equal to this but with the specified output record separator 2172 */ 2173 public CSVFormat withRecordSeparator(final char recordSeparator) { 2174 return withRecordSeparator(String.valueOf(recordSeparator)); 2175 } 2176 2177 /** 2178 * Returns a new {@code CSVFormat} with the record separator of the format set to the specified String. 2179 * 2180 * <p> 2181 * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently 2182 * only works for inputs with '\n', '\r' and "\r\n" 2183 * </p> 2184 * 2185 * @param recordSeparator 2186 * the record separator to use for output. 2187 * 2188 * @return A new CSVFormat that is equal to this but with the specified output record separator 2189 * @throws IllegalArgumentException 2190 * if recordSeparator is none of CR, LF or CRLF 2191 */ 2192 public CSVFormat withRecordSeparator(final String recordSeparator) { 2193 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, 2194 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 2195 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, 2196 allowDuplicateHeaderNames); 2197 } 2198 2199 /** 2200 * Returns a new {@code CSVFormat} with skipping the header record set to {@code true}. 2201 * 2202 * @return A new CSVFormat that is equal to this but with the specified skipHeaderRecord setting. 2203 * @see #withSkipHeaderRecord(boolean) 2204 * @see #withHeader(String...) 2205 * @since 1.1 2206 */ 2207 public CSVFormat withSkipHeaderRecord() { 2208 return this.withSkipHeaderRecord(true); 2209 } 2210 2211 /** 2212 * Returns a new {@code CSVFormat} with whether to skip the header record. 2213 * 2214 * @param skipHeaderRecord 2215 * whether to skip the header record. 2216 * 2217 * @return A new CSVFormat that is equal to this but with the specified skipHeaderRecord setting. 2218 * @see #withHeader(String...) 2219 */ 2220 public CSVFormat withSkipHeaderRecord(final boolean skipHeaderRecord) { 2221 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, 2222 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 2223 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, 2224 allowDuplicateHeaderNames); 2225 } 2226 2227 /** 2228 * Returns a new {@code CSVFormat} with the record separator of the format set to the operating system's line 2229 * separator string, typically CR+LF on Windows and LF on Linux. 2230 * 2231 * <p> 2232 * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently 2233 * only works for inputs with '\n', '\r' and "\r\n" 2234 * </p> 2235 * 2236 * @return A new CSVFormat that is equal to this but with the operating system's line separator string. 2237 * @since 1.6 2238 */ 2239 public CSVFormat withSystemRecordSeparator() { 2240 return withRecordSeparator(System.getProperty("line.separator")); 2241 } 2242 2243 /** 2244 * Returns a new {@code CSVFormat} to add a trailing delimiter. 2245 * 2246 * @return A new CSVFormat that is equal to this but with the trailing delimiter setting. 2247 * @since 1.3 2248 */ 2249 public CSVFormat withTrailingDelimiter() { 2250 return withTrailingDelimiter(true); 2251 } 2252 2253 /** 2254 * Returns a new {@code CSVFormat} with whether to add a trailing delimiter. 2255 * 2256 * @param trailingDelimiter 2257 * whether to add a trailing delimiter. 2258 * 2259 * @return A new CSVFormat that is equal to this but with the specified trailing delimiter setting. 2260 * @since 1.3 2261 */ 2262 public CSVFormat withTrailingDelimiter(final boolean trailingDelimiter) { 2263 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, 2264 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 2265 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, 2266 allowDuplicateHeaderNames); 2267 } 2268 2269 /** 2270 * Returns a new {@code CSVFormat} to trim leading and trailing blanks. 2271 * 2272 * @return A new CSVFormat that is equal to this but with the trim setting on. 2273 * @since 1.3 2274 */ 2275 public CSVFormat withTrim() { 2276 return withTrim(true); 2277 } 2278 2279 /** 2280 * Returns a new {@code CSVFormat} with whether to trim leading and trailing blanks. 2281 * 2282 * @param trim 2283 * whether to trim leading and trailing blanks. 2284 * 2285 * @return A new CSVFormat that is equal to this but with the specified trim setting. 2286 * @since 1.3 2287 */ 2288 public CSVFormat withTrim(final boolean trim) { 2289 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, 2290 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 2291 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, 2292 allowDuplicateHeaderNames); 2293 } 2294}