|
5 | 5 | import org.jetbrains.annotations.Nls; |
6 | 6 | import org.jetbrains.annotations.NotNull; |
7 | 7 |
|
8 | | -import java.util.*; |
9 | | -import java.util.regex.Pattern; |
| 8 | +import java.util.List; |
10 | 9 |
|
11 | 10 | public final class NlsCapitalizationUtil { |
12 | | - private static final Set<String> TITLE_CASE_LOWERCASE_WORDS = Set.of( |
13 | | - "a", "an", "the", |
14 | | - "and", "or", "but", |
15 | | - "at", "by", "for", "from", "in", "into", "of", "off", "on", "onto", "out", "over", "to", "up", "with" |
16 | | - ); |
17 | | - private static final Pattern PERIOD_PATTERN = Pattern.compile("\\.(?!\\s*$)"); |
18 | | - private static final Pattern DOUBLE_QUOTES_PATTERN = Pattern.compile("[“”\"]"); |
19 | | - private static final Pattern EXCLAMATION_PATTERN = Pattern.compile("!"); |
20 | | - private static final Pattern CONTRACTION_PATTERN = Pattern.compile("(?i)\\b(can't|won't|isn't|aren't|wasn't|weren't|hasn't|haven't|hadn't|doesn't|don't|didn't|shouldn't|wouldn't|couldn't|mightn't|mustn't)\\b(?<!Don't)"); |
21 | | - private static final Pattern WHITESPACE_SPLIT_PATTERN = Pattern.compile("(?<=\\s)|(?=\\s)"); |
22 | | - private static final Pattern PUNCTUATION_WITH_WORD_PATTERN = Pattern.compile("(^\\P{Alnum}*)([\\p{Alnum}]+)(\\P{Alnum}*$)"); |
23 | | - private static final Pattern LEADING_PUNCTUATION_WITH_FIRST_LETTER_PATTERN = Pattern.compile("(^\\P{Alnum}*)(\\p{Alpha})(.*)"); |
24 | | - private static final Pattern SPECIAL_PREFIX_PATTERN = Pattern.compile("^[.*~].*"); |
25 | | - |
26 | 11 | public static boolean isCapitalizationSatisfied(String value, Nls.Capitalization capitalization) { |
27 | 12 | if (StringUtil.isEmpty(value) || capitalization == Nls.Capitalization.NotSpecified) { |
28 | 13 | return true; |
29 | 14 | } |
| 15 | + |
30 | 16 | return capitalization == Nls.Capitalization.Title |
31 | | - ? checkTitleCapitalization(value) |
| 17 | + ? StringUtil.wordsToBeginFromUpperCase(value).equals(value) |
32 | 18 | : checkSentenceCapitalization(value); |
33 | 19 | } |
34 | 20 |
|
35 | | - private static List<String> splitByWhitespace(String s) { |
36 | | - return Arrays.stream(s.trim().split("\\s+")) |
37 | | - .filter(str -> !str.isEmpty()) |
38 | | - .toList(); |
39 | | - } |
40 | | - |
41 | | - private static boolean checkTitleCapitalization(@NotNull String value) { |
42 | | - List<String> words = splitByWhitespace(value); |
43 | | - final int wordCount = words.size(); |
44 | | - if (wordCount == 0) return true; |
45 | | - for (int i = 0; i < wordCount; i++) { |
46 | | - String word = words.get(i); |
47 | | - if (word.isEmpty()) continue; |
48 | | - String cleanWord = stripPunctuation(word); |
49 | | - if (cleanWord.isEmpty()) continue; |
50 | | - // Check if it's a special case (like iOS, macOS) |
51 | | - if (hasInternalCapitalization(cleanWord)) { |
52 | | - continue; |
53 | | - } |
54 | | - if (i == 0 || i == wordCount - 1) { |
55 | | - if (!isCapitalizedWord(cleanWord)) return false; |
56 | | - } |
57 | | - else { |
58 | | - String lowerWord = cleanWord.toLowerCase(Locale.ENGLISH); |
59 | | - if (TITLE_CASE_LOWERCASE_WORDS.contains(lowerWord)) { |
60 | | - if (isCapitalizedWord(cleanWord)) return false; |
61 | | - } |
62 | | - else { |
63 | | - if (!isCapitalizedWord(cleanWord)) return false; |
64 | | - } |
65 | | - } |
66 | | - } |
67 | | - return true; |
68 | | - } |
69 | | - |
70 | | - private static boolean hasInternalCapitalization(@NotNull String word) { |
71 | | - if (word.length() <= 1) return false; |
72 | | - boolean hasLowerCase = false; |
73 | | - boolean hasUpperCaseAfterFirst = false; |
74 | | - for (int i = 0; i < word.length(); i++) { |
75 | | - char c = word.charAt(i); |
76 | | - if (Character.isLetter(c)) { |
77 | | - if (Character.isLowerCase(c)) { |
78 | | - hasLowerCase = true; |
79 | | - } |
80 | | - else if (i > 0 && Character.isUpperCase(c)) { |
81 | | - hasUpperCaseAfterFirst = true; |
82 | | - } |
83 | | - } |
84 | | - } |
85 | | - return hasLowerCase && hasUpperCaseAfterFirst; |
86 | | - } |
87 | | - |
88 | 21 | private static boolean checkSentenceCapitalization(@NotNull String value) { |
89 | 22 | List<String> words = StringUtil.split(value, " "); |
90 | 23 | final int wordCount = words.size(); |
@@ -116,98 +49,9 @@ private static boolean isCapitalizedWord(String word) { |
116 | 49 | return !word.isEmpty() && Character.isLetter(word.charAt(0)) && Character.isUpperCase(word.charAt(0)); |
117 | 50 | } |
118 | 51 |
|
119 | | - private static @NotNull String stripPunctuation(@NotNull String word) { |
120 | | - int start = 0; |
121 | | - int end = word.length(); |
122 | | - while (start < end && !Character.isLetterOrDigit(word.charAt(start))) { |
123 | | - start++; |
124 | | - } |
125 | | - while (end > start && !Character.isLetterOrDigit(word.charAt(end - 1))) { |
126 | | - end--; |
127 | | - } |
128 | | - return start < end ? word.substring(start, end) : ""; |
129 | | - } |
130 | | - |
131 | | - public static boolean checkPunctuation(@NotNull String value) { |
132 | | - if (PERIOD_PATTERN.matcher(value).find()) { |
133 | | - return value.endsWith("."); |
134 | | - } |
135 | | - if (value.endsWith(".")) { |
136 | | - return false; |
137 | | - } |
138 | | - if (DOUBLE_QUOTES_PATTERN.matcher(value).find()) { |
139 | | - return false; |
140 | | - } |
141 | | - if (EXCLAMATION_PATTERN.matcher(value).find()) { |
142 | | - return false; |
143 | | - } |
144 | | - if (CONTRACTION_PATTERN.matcher(value).find()) { |
145 | | - return false; |
146 | | - } |
147 | | - return true; |
148 | | - } |
149 | | - |
150 | 52 | public static @NotNull String fixValue(String string, Nls.Capitalization capitalization) { |
151 | | - if (capitalization == Nls.Capitalization.Title) { |
152 | | - return fixTitleCapitalization(string); |
153 | | - } |
154 | | - else { |
155 | | - return StringUtil.capitalize(StringUtil.wordsToBeginFromLowerCase(string)); |
156 | | - } |
157 | | - } |
158 | | - |
159 | | - private static String fixTitleCapitalization(String text) { |
160 | | - if (text == null || text.isBlank()) return text; |
161 | | - String[] tokens = WHITESPACE_SPLIT_PATTERN.split(text); |
162 | | - |
163 | | - int firstWordIndex = -1, lastWordIndex = -1; |
164 | | - for (int i = 0; i < tokens.length; i++) { |
165 | | - String cleanedToken = stripPunctuation(tokens[i]); |
166 | | - if (!cleanedToken.isEmpty()) { |
167 | | - if (firstWordIndex == -1) firstWordIndex = i; |
168 | | - lastWordIndex = i; |
169 | | - } |
170 | | - } |
171 | | - if (firstWordIndex == -1) return text; |
172 | | - |
173 | | - StringBuilder result = new StringBuilder(); |
174 | | - for (int i = 0; i < tokens.length; i++) { |
175 | | - String token = tokens[i]; |
176 | | - if (token.isBlank()) { |
177 | | - result.append(token); |
178 | | - continue; |
179 | | - } |
180 | | - String cleanedToken = stripPunctuation(token); |
181 | | - if (cleanedToken.isEmpty()) { |
182 | | - result.append(token); |
183 | | - continue; |
184 | | - } |
185 | | - String lowercaseToken = cleanedToken.toLowerCase(Locale.ENGLISH); |
186 | | - boolean isFirstWord = i == firstWordIndex; |
187 | | - boolean isLastWord = i == lastWordIndex; |
188 | | - |
189 | | - if (!isFirstWord && !isLastWord && TITLE_CASE_LOWERCASE_WORDS.contains(lowercaseToken)) { |
190 | | - result.append(applyLowercaseToTokenPreservingPunctuation(token, lowercaseToken)); |
191 | | - } else if (hasInternalCapitalization(cleanedToken)) { |
192 | | - result.append(token); |
193 | | - } else { |
194 | | - result.append(capitalizeFirstLetter(token)); |
195 | | - } |
196 | | - } |
197 | | - return result.toString(); |
198 | | - } |
199 | | - |
200 | | - private static String applyLowercaseToTokenPreservingPunctuation(String token, String lowercaseWord) { |
201 | | - var matcher = PUNCTUATION_WITH_WORD_PATTERN.matcher(token); |
202 | | - return matcher.matches() ? matcher.group(1) + lowercaseWord + matcher.group(3) : token; |
203 | | - } |
204 | | - |
205 | | - private static String capitalizeFirstLetter(String token) { |
206 | | - if (token.isEmpty() || SPECIAL_PREFIX_PATTERN.matcher(token).matches()) return token; |
207 | | - var matcher = LEADING_PUNCTUATION_WITH_FIRST_LETTER_PATTERN.matcher(token); |
208 | | - if (matcher.matches()) { |
209 | | - return matcher.group(1) + Character.toUpperCase(matcher.group(2).charAt(0)) + matcher.group(3); |
210 | | - } |
211 | | - return token; |
| 53 | + return capitalization == Nls.Capitalization.Title |
| 54 | + ? StringUtil.wordsToBeginFromUpperCase(string) |
| 55 | + : StringUtil.capitalize(StringUtil.wordsToBeginFromLowerCase(string)); |
212 | 56 | } |
213 | 57 | } |
0 commit comments