From 3ff112fdc173749bbc898463e639165a0baa4f5f Mon Sep 17 00:00:00 2001 From: lucap Date: Mon, 4 Feb 2019 13:24:21 +0100 Subject: [PATCH 1/2] Using isQuoteCharacter in DelimitedLineTokenizer instead of direct comparison of chars --- .../transform/DelimitedLineTokenizer.java | 199 ++++++++---------- 1 file changed, 90 insertions(+), 109 deletions(-) diff --git a/spring-batch-infrastructure/src/main/java/org/springframework/batch/item/file/transform/DelimitedLineTokenizer.java b/spring-batch-infrastructure/src/main/java/org/springframework/batch/item/file/transform/DelimitedLineTokenizer.java index 2756fd682b..a044fa2b52 100644 --- a/spring-batch-infrastructure/src/main/java/org/springframework/batch/item/file/transform/DelimitedLineTokenizer.java +++ b/spring-batch-infrastructure/src/main/java/org/springframework/batch/item/file/transform/DelimitedLineTokenizer.java @@ -1,17 +1,14 @@ /* * Copyright 2006-2018 the original author or authors. * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a + * copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations + * under the License. */ package org.springframework.batch.item.file.transform; @@ -26,8 +23,7 @@ import org.springframework.util.StringUtils; /** - * A {@link LineTokenizer} implementation that splits the input String on a - * configurable delimiter. This implementation also supports the use of an + * A {@link LineTokenizer} implementation that splits the input String on a configurable delimiter. This implementation also supports the use of an * escape character to escape delimiters and line endings. * * @author Rob Harrop @@ -36,7 +32,8 @@ * @author Olivier Bourgain */ public class DelimitedLineTokenizer extends AbstractLineTokenizer - implements InitializingBean { + implements InitializingBean { + /** * Convenient constant for the common case of a tab delimiter. */ @@ -48,8 +45,7 @@ public class DelimitedLineTokenizer extends AbstractLineTokenizer public static final String DELIMITER_COMMA = ","; /** - * Convenient constant for the common case of a " character used to escape - * delimiters or line endings. + * Convenient constant for the common case of a " character used to escape delimiters or line endings. */ public static final char DEFAULT_QUOTE_CHARACTER = '"'; @@ -60,13 +56,12 @@ public class DelimitedLineTokenizer extends AbstractLineTokenizer private String quoteString; - private String escapedQuoteString; + private String escapedQuoteString; private Collection includedFields = null; /** - * Create a new instance of the {@link DelimitedLineTokenizer} class for the - * common case where the delimiter is a {@link #DELIMITER_COMMA comma}. + * Create a new instance of the {@link DelimitedLineTokenizer} class for the common case where the delimiter is a {@link #DELIMITER_COMMA comma}. * * @see #DelimitedLineTokenizer(String) * @see #DELIMITER_COMMA @@ -78,7 +73,7 @@ public DelimitedLineTokenizer() { /** * Create a new instance of the {@link DelimitedLineTokenizer} class. * - * @param delimiter the desired delimiter. This is required + * @param delimiter the desired delimiter. This is required */ public DelimitedLineTokenizer(String delimiter) { Assert.notNull(delimiter, "A delimiter is required"); @@ -99,10 +94,8 @@ public void setDelimiter(String delimiter) { } /** - * The fields to include in the output by position (starting at 0). By - * default all fields are included, but this property can be set to pick out - * only a few fields from a larger set. Note that if field names are - * provided, their number must match the number of included fields. + * The fields to include in the output by position (starting at 0). By default all fields are included, but this property can be set to pick out + * only a few fields from a larger set. Note that if field names are provided, their number must match the number of included fields. * * @param includedFields the included fields to set */ @@ -114,10 +107,8 @@ public void setIncludedFields(int... includedFields) { } /** - * Public setter for the quoteCharacter. The quote character can be used to - * extend a field across line endings or to enclose a String which contains - * the delimiter. Inside a quoted token the quote character can be used to - * escape itself, thus "a""b""c" is tokenized to a"b"c. + * Public setter for the quoteCharacter. The quote character can be used to extend a field across line endings or to enclose a String which + * contains the delimiter. Inside a quoted token the quote character can be used to escape itself, thus "a""b""c" is tokenized to a"b"c. * * @param quoteCharacter the quoteCharacter to set * @@ -126,12 +117,11 @@ public void setIncludedFields(int... includedFields) { public void setQuoteCharacter(char quoteCharacter) { this.quoteCharacter = quoteCharacter; this.quoteString = "" + quoteCharacter; - this.escapedQuoteString = "" + quoteCharacter + quoteCharacter; + this.escapedQuoteString = "" + quoteCharacter + quoteCharacter; } /** - * Yields the tokens resulting from the splitting of the supplied - * line. + * Yields the tokens resulting from the splitting of the supplied line. * * @param line the line to be tokenized * @@ -154,7 +144,7 @@ protected List doTokenize(String line) { char currentChar = line.charAt(i); boolean isEnd = (i == (length - 1)); - boolean isDelimiter = endsWithDelimiter(line, i, endIndexLastDelimiter); + boolean isDelimiter = endsWithDelimiter(line, i, endIndexLastDelimiter); if ((isDelimiter && !inQuoted) || isEnd) { endIndexLastDelimiter = i; @@ -162,14 +152,13 @@ protected List doTokenize(String line) { if (isEnd && isDelimiter) { endPosition = endPosition - delimiter.length(); - } - else if (!isEnd){ + } else if (!isEnd) { endPosition = (endPosition - delimiter.length()) + 1; } if (includedFields == null || includedFields.contains(fieldCount)) { - String value = - substringWithTrimmedWhitespaceAndQuotesIfQuotesPresent(line, lastCut, endPosition); + String value = + substringWithTrimmedWhitespaceAndQuotesIfQuotesPresent(line, lastCut, endPosition); tokens.add(value); } @@ -183,8 +172,7 @@ else if (!isEnd){ } lastCut = i + 1; - } - else if (isQuoteCharacter(currentChar)) { + } else if (isQuoteCharacter(currentChar)) { inQuoted = !inQuoted; } @@ -193,82 +181,75 @@ else if (isQuoteCharacter(currentChar)) { return tokens; } - /** - * Trim any leading or trailing quotes (and any leading or trailing - * whitespace before or after the quotes) from within the specified character - * array beginning at the specified offset index for the specified count. - *

- * Quotes are escaped with double instances of the quote character. - * - * @param line the string - * @param offset index from which to begin extracting substring - * @param count length of substring - * @return a substring from the specified offset within the character array - * with any leading or trailing whitespace trimmed. - * @see String#trim() - */ - private String substringWithTrimmedWhitespaceAndQuotesIfQuotesPresent(String line, int offset, int count) { - int start = offset; - int len = count; - - while ((start < (start + len - 1)) && (line.charAt(start) <= ' ')) { - start++; - len--; - } - - while ((start < (start + len)) && ((start + len - 1 < line.length()) && (line.charAt(start + len - 1) <= ' '))) { - len--; - } - - String value; - - if ((line.length() >= 2) && (line.charAt(start) == quoteCharacter) && (line.charAt(start + len - 1) == quoteCharacter)) { + /** + * Trim any leading or trailing quotes (and any leading or trailing whitespace before or after the quotes) from within the specified character + * array beginning at the specified offset index for the specified count. + *

+ * Quotes are escaped with double instances of the quote character. + * + * @param line the string + * @param offset index from which to begin extracting substring + * @param count length of substring + * @return a substring from the specified offset within the character array with any leading or trailing whitespace trimmed. + * @see String#trim() + */ + private String substringWithTrimmedWhitespaceAndQuotesIfQuotesPresent(String line, int offset, int count) { + int start = offset; + int len = count; + + while ((start < (start + len - 1)) && (line.charAt(start) <= ' ')) { + start++; + len--; + } + + while ((start < (start + len)) && ((start + len - 1 < line.length()) && (line.charAt(start + len - 1) <= ' '))) { + len--; + } + + String value; + + if ((line.length() >= 2) && isQuoteCharacter(line.charAt(start)) && isQuoteCharacter(line.charAt(start + len - 1))) { int beginIndex = start + 1; int endIndex = len - 2; value = line.substring(beginIndex, beginIndex + endIndex); - if (value.contains(escapedQuoteString)) { - value = StringUtils.replace(value, escapedQuoteString, quoteString); - } - } - else { - value = line.substring(offset, offset + count); - } - - return value; - } - - /** - * Do the character(s) in the specified array end, at the specified end - * index, with the delimiter character(s)? - *

- * Checks that the specified end index is sufficiently greater than the - * specified previous delimiter end index to warrant trying to match - * another delimiter. Also checks that the specified end index is - * sufficiently large to be able to match the length of a delimiter. - * - * @param line the string - * @param end the index in up to which the delimiter should be matched - * @param previous the index of the end of the last delimiter - * @return true if the character(s) from the specified end - * match the delimiter character(s), otherwise false - * @see DelimitedLineTokenizer#DelimitedLineTokenizer(String) - */ - private boolean endsWithDelimiter(String line, int end, int previous) { - boolean result = false; - - if (end - previous >= delimiter.length()) { - if (end >= delimiter.length() - 1) { - result = true; - for (int j = 0; j < delimiter.length() && (((end - delimiter.length() + 1) + j) < line.length()); j++) { - if (delimiter.charAt(j) != line.charAt((end - delimiter.length() + 1) + j)) { - result = false; - } - } - } - } - - return result; - } + if (value.contains(escapedQuoteString)) { + value = StringUtils.replace(value, escapedQuoteString, quoteString); + } + } else { + value = line.substring(offset, offset + count); + } + + return value; + } + + /** + * Do the character(s) in the specified array end, at the specified end index, with the delimiter character(s)? + *

+ * Checks that the specified end index is sufficiently greater than the specified previous delimiter end index to warrant trying to match another + * delimiter. Also checks that the specified end index is sufficiently large to be able to match the length of a delimiter. + * + * @param line the string + * @param end the index in up to which the delimiter should be matched + * @param previous the index of the end of the last delimiter + * @return true if the character(s) from the specified end match the delimiter character(s), otherwise false + * @see DelimitedLineTokenizer#DelimitedLineTokenizer(String) + */ + private boolean endsWithDelimiter(String line, int end, int previous) { + boolean result = false; + + if (end - previous >= delimiter.length()) { + if (end >= delimiter.length() - 1) { + result = true; + for (int j = 0; j < delimiter.length() && (((end - delimiter.length() + 1) + j) < line.length()); j++) { + if (delimiter.charAt(j) != line.charAt((end - delimiter.length() + 1) + j)) { + result = false; + } + } + } + } + + return result; + } /** * Is the supplied character a quote character? From f32fde27924719c4725ca6e24f74bc268dd19299 Mon Sep 17 00:00:00 2001 From: lucap Date: Mon, 4 Feb 2019 13:36:00 +0100 Subject: [PATCH 2/2] fixed formatting errors --- .../transform/DelimitedLineTokenizer.java | 199 ++++++++++-------- 1 file changed, 109 insertions(+), 90 deletions(-) diff --git a/spring-batch-infrastructure/src/main/java/org/springframework/batch/item/file/transform/DelimitedLineTokenizer.java b/spring-batch-infrastructure/src/main/java/org/springframework/batch/item/file/transform/DelimitedLineTokenizer.java index a044fa2b52..e9caaabd86 100644 --- a/spring-batch-infrastructure/src/main/java/org/springframework/batch/item/file/transform/DelimitedLineTokenizer.java +++ b/spring-batch-infrastructure/src/main/java/org/springframework/batch/item/file/transform/DelimitedLineTokenizer.java @@ -1,14 +1,17 @@ /* * Copyright 2006-2018 the original author or authors. * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a - * copy of the License at + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations - * under the License. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ package org.springframework.batch.item.file.transform; @@ -23,7 +26,8 @@ import org.springframework.util.StringUtils; /** - * A {@link LineTokenizer} implementation that splits the input String on a configurable delimiter. This implementation also supports the use of an + * A {@link LineTokenizer} implementation that splits the input String on a + * configurable delimiter. This implementation also supports the use of an * escape character to escape delimiters and line endings. * * @author Rob Harrop @@ -32,8 +36,7 @@ * @author Olivier Bourgain */ public class DelimitedLineTokenizer extends AbstractLineTokenizer - implements InitializingBean { - + implements InitializingBean { /** * Convenient constant for the common case of a tab delimiter. */ @@ -45,7 +48,8 @@ public class DelimitedLineTokenizer extends AbstractLineTokenizer public static final String DELIMITER_COMMA = ","; /** - * Convenient constant for the common case of a " character used to escape delimiters or line endings. + * Convenient constant for the common case of a " character used to escape + * delimiters or line endings. */ public static final char DEFAULT_QUOTE_CHARACTER = '"'; @@ -56,12 +60,13 @@ public class DelimitedLineTokenizer extends AbstractLineTokenizer private String quoteString; - private String escapedQuoteString; + private String escapedQuoteString; private Collection includedFields = null; /** - * Create a new instance of the {@link DelimitedLineTokenizer} class for the common case where the delimiter is a {@link #DELIMITER_COMMA comma}. + * Create a new instance of the {@link DelimitedLineTokenizer} class for the + * common case where the delimiter is a {@link #DELIMITER_COMMA comma}. * * @see #DelimitedLineTokenizer(String) * @see #DELIMITER_COMMA @@ -73,7 +78,7 @@ public DelimitedLineTokenizer() { /** * Create a new instance of the {@link DelimitedLineTokenizer} class. * - * @param delimiter the desired delimiter. This is required + * @param delimiter the desired delimiter. This is required */ public DelimitedLineTokenizer(String delimiter) { Assert.notNull(delimiter, "A delimiter is required"); @@ -94,8 +99,10 @@ public void setDelimiter(String delimiter) { } /** - * The fields to include in the output by position (starting at 0). By default all fields are included, but this property can be set to pick out - * only a few fields from a larger set. Note that if field names are provided, their number must match the number of included fields. + * The fields to include in the output by position (starting at 0). By + * default all fields are included, but this property can be set to pick out + * only a few fields from a larger set. Note that if field names are + * provided, their number must match the number of included fields. * * @param includedFields the included fields to set */ @@ -107,8 +114,10 @@ public void setIncludedFields(int... includedFields) { } /** - * Public setter for the quoteCharacter. The quote character can be used to extend a field across line endings or to enclose a String which - * contains the delimiter. Inside a quoted token the quote character can be used to escape itself, thus "a""b""c" is tokenized to a"b"c. + * Public setter for the quoteCharacter. The quote character can be used to + * extend a field across line endings or to enclose a String which contains + * the delimiter. Inside a quoted token the quote character can be used to + * escape itself, thus "a""b""c" is tokenized to a"b"c. * * @param quoteCharacter the quoteCharacter to set * @@ -117,11 +126,12 @@ public void setIncludedFields(int... includedFields) { public void setQuoteCharacter(char quoteCharacter) { this.quoteCharacter = quoteCharacter; this.quoteString = "" + quoteCharacter; - this.escapedQuoteString = "" + quoteCharacter + quoteCharacter; + this.escapedQuoteString = "" + quoteCharacter + quoteCharacter; } /** - * Yields the tokens resulting from the splitting of the supplied line. + * Yields the tokens resulting from the splitting of the supplied + * line. * * @param line the line to be tokenized * @@ -144,7 +154,7 @@ protected List doTokenize(String line) { char currentChar = line.charAt(i); boolean isEnd = (i == (length - 1)); - boolean isDelimiter = endsWithDelimiter(line, i, endIndexLastDelimiter); + boolean isDelimiter = endsWithDelimiter(line, i, endIndexLastDelimiter); if ((isDelimiter && !inQuoted) || isEnd) { endIndexLastDelimiter = i; @@ -152,13 +162,14 @@ protected List doTokenize(String line) { if (isEnd && isDelimiter) { endPosition = endPosition - delimiter.length(); - } else if (!isEnd) { + } + else if (!isEnd){ endPosition = (endPosition - delimiter.length()) + 1; } if (includedFields == null || includedFields.contains(fieldCount)) { - String value = - substringWithTrimmedWhitespaceAndQuotesIfQuotesPresent(line, lastCut, endPosition); + String value = + substringWithTrimmedWhitespaceAndQuotesIfQuotesPresent(line, lastCut, endPosition); tokens.add(value); } @@ -172,7 +183,8 @@ protected List doTokenize(String line) { } lastCut = i + 1; - } else if (isQuoteCharacter(currentChar)) { + } + else if (isQuoteCharacter(currentChar)) { inQuoted = !inQuoted; } @@ -181,75 +193,82 @@ protected List doTokenize(String line) { return tokens; } - /** - * Trim any leading or trailing quotes (and any leading or trailing whitespace before or after the quotes) from within the specified character - * array beginning at the specified offset index for the specified count. - *

- * Quotes are escaped with double instances of the quote character. - * - * @param line the string - * @param offset index from which to begin extracting substring - * @param count length of substring - * @return a substring from the specified offset within the character array with any leading or trailing whitespace trimmed. - * @see String#trim() - */ - private String substringWithTrimmedWhitespaceAndQuotesIfQuotesPresent(String line, int offset, int count) { - int start = offset; - int len = count; - - while ((start < (start + len - 1)) && (line.charAt(start) <= ' ')) { - start++; - len--; - } - - while ((start < (start + len)) && ((start + len - 1 < line.length()) && (line.charAt(start + len - 1) <= ' '))) { - len--; - } - - String value; - - if ((line.length() >= 2) && isQuoteCharacter(line.charAt(start)) && isQuoteCharacter(line.charAt(start + len - 1))) { + /** + * Trim any leading or trailing quotes (and any leading or trailing + * whitespace before or after the quotes) from within the specified character + * array beginning at the specified offset index for the specified count. + *

+ * Quotes are escaped with double instances of the quote character. + * + * @param line the string + * @param offset index from which to begin extracting substring + * @param count length of substring + * @return a substring from the specified offset within the character array + * with any leading or trailing whitespace trimmed. + * @see String#trim() + */ + private String substringWithTrimmedWhitespaceAndQuotesIfQuotesPresent(String line, int offset, int count) { + int start = offset; + int len = count; + + while ((start < (start + len - 1)) && (line.charAt(start) <= ' ')) { + start++; + len--; + } + + while ((start < (start + len)) && ((start + len - 1 < line.length()) && (line.charAt(start + len - 1) <= ' '))) { + len--; + } + + String value; + + if ((line.length() >= 2) && isQuoteCharacter(line.charAt(start)) && isQuoteCharacter(line.charAt(start + len - 1))) { int beginIndex = start + 1; int endIndex = len - 2; value = line.substring(beginIndex, beginIndex + endIndex); - if (value.contains(escapedQuoteString)) { - value = StringUtils.replace(value, escapedQuoteString, quoteString); - } - } else { - value = line.substring(offset, offset + count); - } - - return value; - } - - /** - * Do the character(s) in the specified array end, at the specified end index, with the delimiter character(s)? - *

- * Checks that the specified end index is sufficiently greater than the specified previous delimiter end index to warrant trying to match another - * delimiter. Also checks that the specified end index is sufficiently large to be able to match the length of a delimiter. - * - * @param line the string - * @param end the index in up to which the delimiter should be matched - * @param previous the index of the end of the last delimiter - * @return true if the character(s) from the specified end match the delimiter character(s), otherwise false - * @see DelimitedLineTokenizer#DelimitedLineTokenizer(String) - */ - private boolean endsWithDelimiter(String line, int end, int previous) { - boolean result = false; - - if (end - previous >= delimiter.length()) { - if (end >= delimiter.length() - 1) { - result = true; - for (int j = 0; j < delimiter.length() && (((end - delimiter.length() + 1) + j) < line.length()); j++) { - if (delimiter.charAt(j) != line.charAt((end - delimiter.length() + 1) + j)) { - result = false; - } - } - } - } - - return result; - } + if (value.contains(escapedQuoteString)) { + value = StringUtils.replace(value, escapedQuoteString, quoteString); + } + } + else { + value = line.substring(offset, offset + count); + } + + return value; + } + + /** + * Do the character(s) in the specified array end, at the specified end + * index, with the delimiter character(s)? + *

+ * Checks that the specified end index is sufficiently greater than the + * specified previous delimiter end index to warrant trying to match + * another delimiter. Also checks that the specified end index is + * sufficiently large to be able to match the length of a delimiter. + * + * @param line the string + * @param end the index in up to which the delimiter should be matched + * @param previous the index of the end of the last delimiter + * @return true if the character(s) from the specified end + * match the delimiter character(s), otherwise false + * @see DelimitedLineTokenizer#DelimitedLineTokenizer(String) + */ + private boolean endsWithDelimiter(String line, int end, int previous) { + boolean result = false; + + if (end - previous >= delimiter.length()) { + if (end >= delimiter.length() - 1) { + result = true; + for (int j = 0; j < delimiter.length() && (((end - delimiter.length() + 1) + j) < line.length()); j++) { + if (delimiter.charAt(j) != line.charAt((end - delimiter.length() + 1) + j)) { + result = false; + } + } + } + } + + return result; + } /** * Is the supplied character a quote character?