View Javadoc
1   /*
2    * Copyright (c) 1996, 2013, Oracle and/or its affiliates. All rights reserved.
3    * ORACLE PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
4    *
5    *
6    *
7    *
8    *
9    *
10   *
11   *
12   *
13   *
14   *
15   *
16   *
17   *
18   *
19   *
20   *
21   *
22   *
23   *
24   */
25  
26  package org.davidmoten.io.extras;
27  
28  
29  import java.io.FileReader;
30  import java.io.IOException;
31  import java.io.InputStreamReader;
32  import java.io.Reader;
33  import java.io.UncheckedIOException;
34  import java.util.Iterator;
35  import java.util.NoSuchElementException;
36  import java.util.Spliterator;
37  import java.util.Spliterators;
38  import java.util.stream.Stream;
39  import java.util.stream.StreamSupport;
40  
41  /**
42   * Reads text from a character-input stream, buffering characters so as to
43   * provide for the efficient reading of characters, arrays, and lines.
44   * 
45   * <p> The buffer size may be specified, or the default size may be used.  The
46   * default is large enough for most purposes.
47   * 
48   * <p> A maximum line length can be specified such that characters read past 
49   * that maximum line length are discarded and not returned in a call to 
50   * {@code readLine()}.
51   *
52   * <p> In general, each read request made of a Reader causes a corresponding
53   * read request to be made of the underlying character or byte stream.  It is
54   * therefore advisable to wrap a BufferedReader around any Reader whose read()
55   * operations may be costly, such as FileReaders and InputStreamReaders.  For
56   * example,
57   *
58   * <pre>
59   * BoundedBufferedReader in
60   *   = new BoundedBufferedReader(new FileReader("foo.in"), 1000);
61   * </pre>
62   *
63   * will buffer the input from the specified file.  Without buffering, each
64   * invocation of read() or readLine() could cause bytes to be read from the
65   * file, converted into characters, and then returned, which can be very
66   * inefficient. Any line longer than 1000 characters will be trimmed to 1000
67   * characters.
68   *
69   * <p> Programs that use DataInputStreams for textual input can be localized by
70   * replacing each DataInputStream with an appropriate BufferedReader.
71   *
72   * @see FileReader
73   * @see InputStreamReader
74   * @see java.nio.file.Files#newBufferedReader
75   *
76   * @author      Mark Reinhold
77   * @since       JDK1.1
78   */
79  
80  public class BoundedBufferedReader extends Reader {
81  
82      private Reader in;
83      
84      private final int maxLineLength;
85  
86      private char cb[];
87      private int nChars, nextChar;
88  
89      private static final int INVALIDATED = -2;
90      private static final int UNMARKED = -1;
91      private int markedChar = UNMARKED;
92      private int readAheadLimit = 0; /* Valid only when markedChar > 0 */
93  
94      /** If the next character is a line feed, skip it */
95      private boolean skipLF = false;
96  
97      /** The skipLF flag when the mark was set */
98      private boolean markedSkipLF = false;
99  
100     private static int defaultCharBufferSize = 8192;
101     private static int defaultExpectedLineLength = 80;
102 
103     /**
104      * Creates a buffering character-input stream that uses an input buffer of
105      * the specified size. When a a line longer than {@code maxLineLength} is 
106      * encountered the line is trimmed to that maximum length and the rest of 
107      * the line ignored.
108      *
109      * @param  in   A Reader
110      * @param  sz   Input-buffer size
111      * @param  maxLineLength maximum size of a returned line in chars
112      *
113      * @exception  IllegalArgumentException  If {@code sz <= 0}
114      */
115     public BoundedBufferedReader(Reader in, int sz, int maxLineLength) {
116         super(in);
117         if (sz <= 0)
118             throw new IllegalArgumentException("Buffer size <= 0");
119         this.in = in;
120         if (maxLineLength < 0) {
121             throw new IllegalArgumentException("maxLineLength cannot be negative");
122         }
123         this.maxLineLength = maxLineLength;
124         cb = new char[sz];
125         nextChar = nChars = 0;
126     }
127 
128     /**
129      * Creates a buffering character-input stream that uses a default-sized
130      * input buffer and puts no limit on line length.
131      *
132      * @param  in   A Reader
133      */
134     public BoundedBufferedReader(Reader in) {
135         this(in, defaultCharBufferSize, 0);
136     }
137 
138     /** Checks to make sure that the stream has not been closed */
139     private void ensureOpen() throws IOException {
140         if (in == null)
141             throw new IOException("Stream closed");
142     }
143 
144     /**
145      * Fills the input buffer, taking the mark into account if it is valid.
146      */
147     private void fill() throws IOException {
148         int dst;
149         if (markedChar <= UNMARKED) {
150             /* No mark */
151             dst = 0;
152         } else {
153             /* Marked */
154             int delta = nextChar - markedChar;
155             if (delta >= readAheadLimit) {
156                 /* Gone past read-ahead limit: Invalidate mark */
157                 markedChar = INVALIDATED;
158                 readAheadLimit = 0;
159                 dst = 0;
160             } else {
161                 if (readAheadLimit <= cb.length) {
162                     /* Shuffle in the current buffer */
163                     System.arraycopy(cb, markedChar, cb, 0, delta);
164                     markedChar = 0;
165                     dst = delta;
166                 } else {
167                     /* Reallocate buffer to accommodate read-ahead limit */
168                     char ncb[] = new char[readAheadLimit];
169                     System.arraycopy(cb, markedChar, ncb, 0, delta);
170                     cb = ncb;
171                     markedChar = 0;
172                     dst = delta;
173                 }
174                 nextChar = nChars = delta;
175             }
176         }
177 
178         int n;
179         do {
180             n = in.read(cb, dst, cb.length - dst);
181         } while (n == 0);
182         if (n > 0) {
183             nChars = dst + n;
184             nextChar = dst;
185         }
186     }
187 
188     /**
189      * Reads a single character.
190      *
191      * @return The character read, as an integer in the range
192      *         0 to 65535 (<tt>0x00-0xffff</tt>), or -1 if the
193      *         end of the stream has been reached
194      * @exception  IOException  If an I/O error occurs
195      */
196     public int read() throws IOException {
197         synchronized (lock) {
198             ensureOpen();
199             for (;;) {
200                 if (nextChar >= nChars) {
201                     fill();
202                     if (nextChar >= nChars)
203                         return -1;
204                 }
205                 if (skipLF) {
206                     skipLF = false;
207                     if (cb[nextChar] == '\n') {
208                         nextChar++;
209                         continue;
210                     }
211                 }
212                 return cb[nextChar++];
213             }
214         }
215     }
216 
217     /**
218      * Reads characters into a portion of an array, reading from the underlying
219      * stream if necessary.
220      */
221     private int read1(char[] cbuf, int off, int len) throws IOException {
222         if (nextChar >= nChars) {
223             /* If the requested length is at least as large as the buffer, and
224                if there is no mark/reset activity, and if line feeds are not
225                being skipped, do not bother to copy the characters into the
226                local buffer.  In this way buffered streams will cascade
227                harmlessly. */
228             if (len >= cb.length && markedChar <= UNMARKED && !skipLF) {
229                 return in.read(cbuf, off, len);
230             }
231             fill();
232         }
233         if (nextChar >= nChars) return -1;
234         if (skipLF) {
235             skipLF = false;
236             if (cb[nextChar] == '\n') {
237                 nextChar++;
238                 if (nextChar >= nChars)
239                     fill();
240                 if (nextChar >= nChars)
241                     return -1;
242             }
243         }
244         int n = Math.min(len, nChars - nextChar);
245         System.arraycopy(cb, nextChar, cbuf, off, n);
246         nextChar += n;
247         return n;
248     }
249 
250     /**
251      * Reads characters into a portion of an array.
252      *
253      * <p> This method implements the general contract of the corresponding
254      * <code>{@link Reader#read(char[], int, int) read}</code> method of the
255      * <code>{@link Reader}</code> class.  As an additional convenience, it
256      * attempts to read as many characters as possible by repeatedly invoking
257      * the <code>read</code> method of the underlying stream.  This iterated
258      * <code>read</code> continues until one of the following conditions becomes
259      * true: <ul>
260      *
261      *   <li> The specified number of characters have been read,
262      *
263      *   <li> The <code>read</code> method of the underlying stream returns
264      *   <code>-1</code>, indicating end-of-file, or
265      *
266      *   <li> The <code>ready</code> method of the underlying stream
267      *   returns <code>false</code>, indicating that further input requests
268      *   would block.
269      *
270      * </ul> If the first <code>read</code> on the underlying stream returns
271      * <code>-1</code> to indicate end-of-file then this method returns
272      * <code>-1</code>.  Otherwise this method returns the number of characters
273      * actually read.
274      *
275      * <p> Subclasses of this class are encouraged, but not required, to
276      * attempt to read as many characters as possible in the same fashion.
277      *
278      * <p> Ordinarily this method takes characters from this stream's character
279      * buffer, filling it from the underlying stream as necessary.  If,
280      * however, the buffer is empty, the mark is not valid, and the requested
281      * length is at least as large as the buffer, then this method will read
282      * characters directly from the underlying stream into the given array.
283      * Thus redundant <code>BufferedReader</code>s will not copy data
284      * unnecessarily.
285      *
286      * @param      cbuf  Destination buffer
287      * @param      off   Offset at which to start storing characters
288      * @param      len   Maximum number of characters to read
289      *
290      * @return     The number of characters read, or -1 if the end of the
291      *             stream has been reached
292      *
293      * @exception  IOException  If an I/O error occurs
294      */
295     public int read(char cbuf[], int off, int len) throws IOException {
296         synchronized (lock) {
297             ensureOpen();
298             if ((off < 0) || (off > cbuf.length) || (len < 0) ||
299                 ((off + len) > cbuf.length) || ((off + len) < 0)) {
300                 throw new IndexOutOfBoundsException();
301             } else if (len == 0) {
302                 return 0;
303             }
304 
305             int n = read1(cbuf, off, len);
306             if (n <= 0) return n;
307             while ((n < len) && in.ready()) {
308                 int n1 = read1(cbuf, off + n, len - n);
309                 if (n1 <= 0) break;
310                 n += n1;
311             }
312             return n;
313         }
314     }
315 
316     /**
317      * Reads a line of text trimmed to {@code maxLineLength} characters.  A line is considered
318      * to be terminated by any one of a line feed ('\n'), a carriage return ('\r'), 
319      * or a carriage return followed immediately by a linefeed.
320      *
321      * @param      ignoreLF  If true, the next '\n' will be skipped
322      *
323      * @return     A String containing the contents of the line, not including
324      *             any line-termination characters, or null if the end of the
325      *             stream has been reached
326      *
327      * @see        java.io.LineNumberReader#readLine()
328      *
329      * @exception  IOException  If an I/O error occurs
330      */
331     String readLine(boolean ignoreLF) throws IOException {
332         StringBuffer s = null;
333         int startChar;
334 
335         synchronized (lock) {
336             ensureOpen();
337             boolean omitLF = ignoreLF || skipLF;
338 
339 //        bufferLoop:
340             for (;;) {
341 
342                 if (nextChar >= nChars)
343                     fill();
344                 if (nextChar >= nChars) { /* EOF */
345                     if (s != null && s.length() > 0)
346                         return s.toString();
347                     else
348                         return null;
349                 }
350                 boolean eol = false;
351                 char c = 0;
352                 int i;
353 
354                 /* Skip a leftover '\n', if necessary */
355                 if (omitLF && (cb[nextChar] == '\n'))
356                     nextChar++;
357                 skipLF = false;
358                 omitLF = false;
359 
360             charLoop:
361                 for (i = nextChar; i < nChars; i++) {
362                     c = cb[i];
363                     if ((c == '\n') || (c == '\r')) {
364                         eol = true;
365                         break charLoop;
366                     }
367                 }
368 
369                 startChar = nextChar;
370                 nextChar = i;
371 
372                 if (eol) {
373                     String str;
374                     if (s == null) {
375                         str = new String(cb, startChar, i - startChar);
376                     } else {
377                         final int maxAvailable = maxAvailable(s);
378                         s.append(cb, startChar, Math.min(maxAvailable, i - startChar));
379                         str = s.toString();
380                     }
381                     nextChar++;
382                     if (c == '\r') {
383                         skipLF = true;
384                     }
385                     return str;
386                 }
387 
388                 if (s == null)
389                     s = new StringBuffer(defaultExpectedLineLength);
390                 final int maxAvailable = maxAvailable(s);
391                 s.append(cb, startChar, Math.min(maxAvailable, i - startChar));
392             }
393         }
394     }
395     
396     private int maxAvailable(StringBuffer s) {
397         if (maxLineLength == 0 ) {
398             return Integer.MAX_VALUE;
399         } else {
400             return maxLineLength - s.length();
401         }
402     }
403 
404     /**
405      * Reads a line of text trimmed to {@code maxLineLength} characters.  A line is 
406      * considered to be terminated by any one of a line feed ('\n'), a carriage
407      * return ('\r'), or a carriage return followed immediately by a linefeed.
408      *
409      * @return     A String containing the contents of the line, not including
410      *             any line-termination characters, or null if the end of the
411      *             stream has been reached
412      *
413      * @exception  IOException  If an I/O error occurs
414      *
415      * @see java.nio.file.Files#readAllLines
416      */
417     public String readLine() throws IOException {
418         return readLine(false);
419     }
420 
421     /**
422      * Skips characters.
423      *
424      * @param  n  The number of characters to skip
425      *
426      * @return    The number of characters actually skipped
427      *
428      * @exception  IllegalArgumentException  If <code>n</code> is negative.
429      * @exception  IOException  If an I/O error occurs
430      */
431     public long skip(long n) throws IOException {
432         if (n < 0L) {
433             throw new IllegalArgumentException("skip value is negative");
434         }
435         synchronized (lock) {
436             ensureOpen();
437             long r = n;
438             while (r > 0) {
439                 if (nextChar >= nChars)
440                     fill();
441                 if (nextChar >= nChars) /* EOF */
442                     break;
443                 if (skipLF) {
444                     skipLF = false;
445                     if (cb[nextChar] == '\n') {
446                         nextChar++;
447                     }
448                 }
449                 long d = nChars - nextChar;
450                 if (r <= d) {
451                     nextChar += r;
452                     r = 0;
453                     break;
454                 }
455                 else {
456                     r -= d;
457                     nextChar = nChars;
458                 }
459             }
460             return n - r;
461         }
462     }
463 
464     /**
465      * Tells whether this stream is ready to be read.  A buffered character
466      * stream is ready if the buffer is not empty, or if the underlying
467      * character stream is ready.
468      *
469      * @exception  IOException  If an I/O error occurs
470      */
471     public boolean ready() throws IOException {
472         synchronized (lock) {
473             ensureOpen();
474 
475             /*
476              * If newline needs to be skipped and the next char to be read
477              * is a newline character, then just skip it right away.
478              */
479             if (skipLF) {
480                 /* Note that in.ready() will return true if and only if the next
481                  * read on the stream will not block.
482                  */
483                 if (nextChar >= nChars && in.ready()) {
484                     fill();
485                 }
486                 if (nextChar < nChars) {
487                     if (cb[nextChar] == '\n')
488                         nextChar++;
489                     skipLF = false;
490                 }
491             }
492             return (nextChar < nChars) || in.ready();
493         }
494     }
495 
496     /**
497      * Tells whether this stream supports the mark() operation, which it does.
498      */
499     public boolean markSupported() {
500         return true;
501     }
502 
503     /**
504      * Marks the present position in the stream.  Subsequent calls to reset()
505      * will attempt to reposition the stream to this point.
506      *
507      * @param readAheadLimit   Limit on the number of characters that may be
508      *                         read while still preserving the mark. An attempt
509      *                         to reset the stream after reading characters
510      *                         up to this limit or beyond may fail.
511      *                         A limit value larger than the size of the input
512      *                         buffer will cause a new buffer to be allocated
513      *                         whose size is no smaller than limit.
514      *                         Therefore large values should be used with care.
515      *
516      * @exception  IllegalArgumentException  If {@code readAheadLimit < 0}
517      * @exception  IOException  If an I/O error occurs
518      */
519     public void mark(int readAheadLimit) throws IOException {
520         if (readAheadLimit < 0) {
521             throw new IllegalArgumentException("Read-ahead limit < 0");
522         }
523         synchronized (lock) {
524             ensureOpen();
525             this.readAheadLimit = readAheadLimit;
526             markedChar = nextChar;
527             markedSkipLF = skipLF;
528         }
529     }
530 
531     /**
532      * Resets the stream to the most recent mark.
533      *
534      * @exception  IOException  If the stream has never been marked,
535      *                          or if the mark has been invalidated
536      */
537     public void reset() throws IOException {
538         synchronized (lock) {
539             ensureOpen();
540             if (markedChar < 0)
541                 throw new IOException((markedChar == INVALIDATED)
542                                       ? "Mark invalid"
543                                       : "Stream not marked");
544             nextChar = markedChar;
545             skipLF = markedSkipLF;
546         }
547     }
548 
549     public void close() throws IOException {
550         synchronized (lock) {
551             if (in == null)
552                 return;
553             try {
554                 in.close();
555             } finally {
556                 in = null;
557                 cb = null;
558             }
559         }
560     }
561 
562     /**
563      * Returns a {@code Stream}, the elements of which are lines read from
564      * this {@code BufferedReader}.  The {@link Stream} is lazily populated,
565      * i.e., read only occurs during the
566      * <a href="../util/stream/package-summary.html#StreamOps">terminal
567      * stream operation</a>.
568      *
569      * <p> The reader must not be operated on during the execution of the
570      * terminal stream operation. Otherwise, the result of the terminal stream
571      * operation is undefined.
572      *
573      * <p> After execution of the terminal stream operation there are no
574      * guarantees that the reader will be at a specific position from which to
575      * read the next character or line.
576      *
577      * <p> If an {@link IOException} is thrown when accessing the underlying
578      * {@code BufferedReader}, it is wrapped in an {@link
579      * UncheckedIOException} which will be thrown from the {@code Stream}
580      * method that caused the read to take place. This method will return a
581      * Stream if invoked on a BufferedReader that is closed. Any operation on
582      * that stream that requires reading from the BufferedReader after it is
583      * closed, will cause an UncheckedIOException to be thrown.
584      *
585      * @return a {@code Stream<String>} providing the lines of text
586      *         described by this {@code BufferedReader}
587      *
588      * @since 1.8
589      */
590     public Stream<String> lines() {
591         Iterator<String> iter = new Iterator<String>() {
592             String nextLine = null;
593 
594             @Override
595             public boolean hasNext() {
596                 if (nextLine != null) {
597                     return true;
598                 } else {
599                     try {
600                         nextLine = readLine();
601                         return (nextLine != null);
602                     } catch (IOException e) {
603                         throw new UncheckedIOException(e);
604                     }
605                 }
606             }
607 
608             @Override
609             public String next() {
610                 if (nextLine != null || hasNext()) {
611                     String line = nextLine;
612                     nextLine = null;
613                     return line;
614                 } else {
615                     throw new NoSuchElementException();
616                 }
617             }
618         };
619         return StreamSupport.stream(Spliterators.spliteratorUnknownSize(
620                 iter, Spliterator.ORDERED | Spliterator.NONNULL), false);
621     }
622 }