1 package org.davidmoten.text.utils;
2
3 import java.io.BufferedReader;
4 import java.io.File;
5 import java.io.FileInputStream;
6 import java.io.FileNotFoundException;
7 import java.io.FileOutputStream;
8 import java.io.IOException;
9 import java.io.InputStream;
10 import java.io.InputStreamReader;
11 import java.io.OutputStreamWriter;
12 import java.io.Reader;
13 import java.io.StringWriter;
14 import java.io.Writer;
15 import java.nio.charset.Charset;
16 import java.nio.charset.StandardCharsets;
17 import java.util.ArrayList;
18 import java.util.HashSet;
19 import java.util.List;
20 import java.util.Set;
21 import java.util.function.Function;
22
23 import com.github.davidmoten.guavamini.Preconditions;
24 import com.github.davidmoten.guavamini.annotations.VisibleForTesting;
25
26 public final class WordWrap {
27
28 private WordWrap() {
29
30 }
31
32 private static final String SPECIAL_WORD_CHARS = "\"\'\u2018\u2019\u201C\u201D?./!,;:_";
33
34 public static final Set<Character> SPECIAL_WORD_CHARS_SET_DEFAULT = toSet(SPECIAL_WORD_CHARS);
35
36 private static final Function<CharSequence, Number> STRING_WIDTH_DEFAULT = s -> s.length();
37
38 private static final String PUNCTUATION = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~";
39
40
41
42
43
44
45
46
47 public static Builder from(Reader reader) {
48 return from(reader, false);
49 }
50
51
52
53
54
55
56
57
58
59 public static Builder fromClasspathUtf8(String resource) {
60 return fromClasspath(resource, StandardCharsets.UTF_8);
61 }
62
63
64
65
66
67
68
69
70
71
72 public static Builder fromClasspath(String resource, Charset charset) {
73 return new Builder(new BufferedReader(
74 new InputStreamReader(WordWrap.class.getResourceAsStream(resource), charset)),
75 true);
76 }
77
78
79
80
81
82
83
84
85 public static Builder from(CharSequence text) {
86 return from(new BufferedReader(new CharSequenceReader(text)), true);
87 }
88
89
90
91
92
93
94
95
96 public static Builder fromUtf8(InputStream in) {
97 return from(in, StandardCharsets.UTF_8);
98 }
99
100
101
102
103
104
105
106
107
108
109 public static Builder from(InputStream in, Charset charset) {
110 return from(new BufferedReader(new InputStreamReader(in, charset)));
111 }
112
113
114
115
116
117
118
119
120
121
122 public static Builder from(File file, Charset charset) {
123 try {
124 return from(
125 new BufferedReader(new InputStreamReader(new FileInputStream(file), charset)),
126 true);
127 } catch (FileNotFoundException e) {
128 throw new IORuntimeException(e);
129 }
130 }
131
132 @VisibleForTesting
133 static Builder from(Reader reader, boolean close) {
134 return new Builder(reader, close);
135 }
136
137
138
139
140 public static final class Builder {
141
142 private final Reader reader;
143 private final boolean closeReader;
144 private Number maxWidth = 80;
145 private Function<? super CharSequence, ? extends Number> stringWidth = STRING_WIDTH_DEFAULT;
146 private Set<Character> extraWordChars = SPECIAL_WORD_CHARS_SET_DEFAULT;
147 private String newLine = "\n";
148 private boolean insertHyphens = true;
149 private boolean breakWords = true;
150
151 Builder(Reader reader, boolean closeReader) {
152 this.reader = reader;
153 this.closeReader = closeReader;
154 }
155
156
157
158
159
160
161
162
163
164
165
166
167 public Builder maxWidth(Number maxWidth) {
168 Preconditions.checkArgument(maxWidth.doubleValue() > 0);
169 this.maxWidth = maxWidth;
170 return this;
171 }
172
173
174
175
176
177
178
179
180
181
182 public Builder stringWidth(Function<? super CharSequence, ? extends Number> stringWidth) {
183 this.stringWidth = stringWidth;
184 return this;
185 }
186
187
188
189
190
191
192
193
194 public Builder newLine(String newLine) {
195 this.newLine = newLine;
196 return this;
197 }
198
199
200
201
202
203
204
205
206
207 public Builder extraWordChars(Set<Character> extraWordChars) {
208 this.extraWordChars = extraWordChars;
209 return this;
210 }
211
212
213
214
215
216
217
218
219
220 public Builder extraWordChars(String extraWordChars) {
221 return extraWordChars(toSet(extraWordChars));
222 }
223
224
225
226
227
228
229
230
231 public Builder includeExtraWordChars(String includeWordChars) {
232 prepareExtraWordCharsForMutation();
233 this.extraWordChars.addAll(toSet(includeWordChars));
234 return this;
235 }
236
237
238
239
240
241
242
243
244 public Builder excludeExtraWordChars(String excludeWordChars) {
245 prepareExtraWordCharsForMutation();
246 this.extraWordChars.removeAll(toSet(excludeWordChars));
247 return this;
248 }
249
250
251
252
253
254
255 private void prepareExtraWordCharsForMutation() {
256 if (this.extraWordChars == SPECIAL_WORD_CHARS_SET_DEFAULT) {
257 this.extraWordChars = new HashSet<>(SPECIAL_WORD_CHARS_SET_DEFAULT);
258 }
259 }
260
261
262
263
264
265
266
267
268 public Builder insertHyphens(boolean insertHyphens) {
269 this.insertHyphens = insertHyphens;
270 return this;
271 }
272
273
274
275
276
277
278
279
280
281 public Builder breakWords(boolean breakWords) {
282 this.breakWords = breakWords;
283 return this;
284 }
285
286
287
288
289
290
291
292 public void wrap(Writer out) {
293 try {
294 wordWrap(reader, out, newLine, maxWidth, stringWidth, extraWordChars, insertHyphens,
295 breakWords);
296 } catch (IOException e) {
297 throw new IORuntimeException(e);
298 } finally {
299 if (closeReader) {
300 close(reader);
301 }
302 }
303 }
304
305 public List<String> wrapToList() {
306 List<String> lines = new ArrayList<>();
307 StringBuilder b = new StringBuilder();
308 boolean[] building = new boolean[1];
309 wrap(new LineConsumer() {
310
311 @Override
312 public void write(char[] chars, int offset, int length) throws IOException {
313 building[0] = true;
314 b.append(chars, offset, length);
315 }
316
317 @Override
318 public void writeNewLine() throws IOException {
319 lines.add(b.toString());
320 b.setLength(0);
321 building[0] = false;
322 }
323 });
324 if (building[0]) {
325 lines.add(b.toString());
326 }
327 return lines;
328 }
329
330 public void wrap(LineConsumer consumer) {
331 try {
332 wordWrap(reader, consumer, maxWidth, stringWidth, extraWordChars, insertHyphens,
333 breakWords);
334 } catch (IOException e) {
335 throw new IORuntimeException(e);
336 } finally {
337 if (closeReader) {
338 close(reader);
339 }
340 }
341 }
342
343
344
345
346
347
348
349
350 public void wrap(File file, Charset charset) {
351 try (Writer writer = new OutputStreamWriter(new FileOutputStream(file), charset)) {
352 wrap(writer);
353 } catch (IOException e) {
354 throw new IORuntimeException(e);
355 }
356 }
357
358
359
360
361
362
363
364 public void wrapUtf8(File file) {
365 wrap(file, StandardCharsets.UTF_8);
366 }
367
368
369
370
371
372
373
374 public void wrapUtf8(String filename) {
375 wrapUtf8(new File(filename));
376 }
377
378
379
380
381
382
383
384
385 public void wrap(String filename, Charset charset) {
386 wrap(new File(filename), charset);
387 }
388
389
390
391
392
393
394 public String wrap() {
395
396 StringWriter out = new StringWriter();
397 wrap(out);
398 return out.toString();
399 }
400 }
401
402 @VisibleForTesting
403 static void close(Reader reader) {
404 try {
405 reader.close();
406 } catch (IOException e) {
407 throw new IORuntimeException(e);
408 }
409 }
410
411 private static Set<Character> toSet(String chars) {
412 Set<Character> set = new HashSet<Character>();
413 for (int i = 0; i < chars.length(); i++) {
414 set.add(chars.charAt(i));
415 }
416 return set;
417 }
418
419 static void wordWrap(Reader in, Writer out, String newLine, Number maxWidth,
420 Function<? super CharSequence, ? extends Number> stringWidth,
421 Set<Character> extraWordChars, boolean insertHyphens, boolean breakWords)
422 throws IOException {
423 LineConsumer consumer = new LineConsumer() {
424
425 @Override
426 public void write(String s) throws IOException {
427 out.write(s);
428 }
429
430 @Override
431 public void write(char[] chars, int start, int length) throws IOException {
432 out.write(chars, start, length);
433 }
434
435 @Override
436 public void writeNewLine() throws IOException {
437 out.write(newLine);
438 }
439
440 };
441 wordWrap(in, consumer, maxWidth, stringWidth, extraWordChars, insertHyphens, breakWords);
442 }
443
444 static void wordWrap(Reader in, LineConsumer out, Number maxWidth,
445 Function<? super CharSequence, ? extends Number> stringWidth,
446 Set<Character> extraWordChars, boolean insertHyphens, boolean breakWords)
447 throws IOException {
448 StringBuilder2 line = new StringBuilder2();
449 StringBuilder2 word = new StringBuilder2();
450 CharSequence lineAndWordRightTrim = concatRightTrim(line, word);
451 double maxWidthDouble = maxWidth.doubleValue();
452 boolean broken = false;
453 boolean isWordCharacter = false;
454 boolean previousWasPunctuation = false;
455 while (true) {
456 int c = in.read();
457 if (c == -1) {
458 break;
459 }
460 char ch = (char) c;
461 isWordCharacter = Character.isLetter(ch) || extraWordChars.contains(ch);
462 if (ch == '\n') {
463 line.append(word);
464 if (tooLong(stringWidth, line, maxWidthDouble)) {
465 line.rightTrim();
466 }
467 if (!isWhitespace(line)) {
468 out.write(line.internalArray(), 0, line.length());
469 }
470 out.writeNewLine();
471 word.setLength(0);
472 line.setLength(0);
473 broken = false;
474 } else if (ch == '\r') {
475
476 } else if (isWordCharacter && !previousWasPunctuation) {
477 word.append(ch);
478 if (broken && line.length() == 0) {
479 leftTrim(word);
480 }
481 if (tooLong(stringWidth, lineAndWordRightTrim, maxWidthDouble)) {
482 if (line.length() > 0) {
483 writeLine(out, line);
484 leftTrim(word);
485 if (tooLong(stringWidth, word, maxWidthDouble)) {
486 if (breakWords) {
487 writeBrokenWord(out, word, insertHyphens);
488 } else {
489 broken = true;
490 }
491 } else {
492 broken = true;
493 }
494 } else {
495 if (breakWords) {
496 writeBrokenWord(out, word, insertHyphens);
497 } else {
498 broken = true;
499 }
500 }
501 }
502 } else {
503 if (word.length() > 0 && !isWhitespace(word)) {
504 appendWordToLine(line, word);
505 if (broken) {
506 leftTrim(line);
507 }
508 }
509 word.append(ch);
510 if (tooLong(stringWidth, lineAndWordRightTrim, maxWidthDouble)) {
511 if (!isWhitespace(line)) {
512 writeLine(out, line);
513 } else {
514 line.setLength(0);
515 }
516 broken = true;
517 }
518 }
519 previousWasPunctuation = isPunctuation(ch) && !extraWordChars.contains(ch);
520 }
521 if (line.length() > 0) {
522 String s = line.toString() + word.toString();
523 if (broken) {
524 s = leftTrim(s);
525 }
526 out.write(s);
527 } else {
528 if (broken) {
529 leftTrim(word);
530 }
531 if (!isWhitespace(word)) {
532 out.write(word.internalArray(), 0, word.length());
533 }
534 }
535 }
536
537 private static CharSequence concatRightTrim(CharSequence a, CharSequence b) {
538 return new CharSequenceConcatRightTrim(a, b);
539 }
540
541 private static boolean isPunctuation(char ch) {
542 return PUNCTUATION.indexOf(ch) != -1;
543 }
544
545 private static boolean tooLong(Function<? super CharSequence, ? extends Number> stringWidth,
546 CharSequence s, double maxWidthDouble) {
547 return stringWidth.apply(s).doubleValue() > maxWidthDouble;
548 }
549
550 @VisibleForTesting
551 static CharSequence rightTrim(CharSequence s) {
552 int i = s.length();
553 while (i > 0) {
554 if (Character.isWhitespace(s.charAt(i - 1))) {
555 i--;
556 } else {
557 break;
558 }
559 }
560 if (i != s.length()) {
561 return s.subSequence(0, i);
562 } else {
563 return s;
564 }
565 }
566
567 static boolean isWhitespace(CharSequence s) {
568 for (int i = 0; i < s.length(); i++) {
569 if (!Character.isWhitespace(s.charAt(i))) {
570 return false;
571 }
572 }
573 return true;
574 }
575
576 @VisibleForTesting
577 static void leftTrim(StringBuilder2 word) {
578
579
580 int i;
581 for (i = 0; i < word.length(); i++) {
582 if (!Character.isWhitespace(word.charAt(i))) {
583 break;
584 }
585 }
586 if (i < word.length() && i > 0) {
587 word.delete(0, i);
588 }
589 }
590
591 private static String leftTrim(String s) {
592 StringBuilder2 b = new StringBuilder2(s);
593 leftTrim(b);
594 return b.toString();
595 }
596
597 private static void appendWordToLine(StringBuilder2 line, StringBuilder2 word) {
598 line.append(word);
599 word.setLength(0);
600 }
601
602 private static void writeBrokenWord(LineConsumer out, StringBuilder2 word, boolean insertHyphens) throws IOException {
603
604
605 String x;
606 if (insertHyphens && word.length() > 2
607 && !isWhitespace((x = word.substring(0, word.length() - 2)))) {
608 out.write(x);
609 out.write("-");
610 out.writeNewLine();
611 word.delete(0, word.length() - 2);
612 } else {
613 String prefix = word.substring(0, word.length() - 1);
614 if (!isWhitespace(prefix)) {
615 out.write(prefix);
616 }
617 out.writeNewLine();
618 word.delete(0, word.length() - 1);
619 }
620 }
621
622 private static void writeLine(LineConsumer out, StringBuilder2 line)
623 throws IOException {
624 out.write(line.internalArray(), 0, line.length());
625 out.writeNewLine();
626 line.setLength(0);
627 }
628 }