| /* |
| * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * |
| * - Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * |
| * - Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * |
| * - Neither the name of Oracle nor the names of its |
| * contributors may be used to endorse or promote products derived |
| * from this software without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS |
| * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, |
| * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR |
| * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
| * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
| * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| /* |
| * This source code is provided to illustrate the usage of a given feature |
| * or technique and has been deliberately simplified. Additional steps |
| * required for a production-quality application, such as security checks, |
| * input validation, and proper error handling, might not be present in |
| * this sample code. |
| */ |
| |
| import java.io.BufferedReader; |
| import java.io.FileNotFoundException; |
| import java.io.FileReader; |
| import java.io.IOException; |
| import java.util.function.Consumer; |
| import java.util.regex.Pattern; |
| |
| /** |
| * WC - Prints newline, word, and character counts for each file. See |
| * the {@link #usage} method for instructions and command line parameters. This |
| * sample shows usages of: |
| * <ul> |
| * <li>Lambda and bulk operations. Shows how to create a custom collector to |
| * gather custom statistics. Implements the collection of statistics using a |
| * built-in API.</li> |
| * <li>Constructor reference.</li> |
| * <li>Try-with-resources feature.</li> |
| * </ul> |
| * |
| */ |
| public class WC { |
| |
| //The number of characters that may be read. |
| private static final int READ_AHEAD_LIMIT = 100_000_000; |
| |
| //The pattern for splitting strings by non word characters to get words. |
| private static final Pattern nonWordPattern = Pattern.compile("\\W"); |
| |
| /** |
| * The main method for the WC program. Run the program with an empty |
| * argument list to see possible arguments. |
| * |
| * @param args the argument list for WC |
| * @throws java.io.IOException If an input exception occurred. |
| */ |
| public static void main(String[] args) throws IOException { |
| |
| if (args.length != 1) { |
| usage(); |
| return; |
| } |
| |
| try (BufferedReader reader = new BufferedReader( |
| new FileReader(args[0]))) { |
| reader.mark(READ_AHEAD_LIMIT); |
| /* |
| * Statistics can be gathered in four passes using a built-in API. |
| * The method demonstrates how separate operations can be |
| * implemented using a built-in API. |
| */ |
| collectInFourPasses(reader); |
| /* |
| * Usage of several passes to collect data is not the best way. |
| * Statistics can be gathered by a custom collector in one pass. |
| */ |
| reader.reset(); |
| collectInOnePass(reader); |
| } catch (FileNotFoundException e) { |
| usage(); |
| System.err.println(e); |
| } |
| } |
| |
| private static void collectInFourPasses(BufferedReader reader) |
| throws IOException { |
| /* |
| * Input is read as a stream of lines by lines(). |
| * Every line is turned into a stream of chars by the flatMapToInt(...) |
| * method. |
| * Length of the stream is counted by count(). |
| */ |
| System.out.println("Character count = " |
| + reader.lines().flatMapToInt(String::chars).count()); |
| /* |
| * Input is read as a stream of lines by lines(). |
| * Every line is split by nonWordPattern into words by flatMap(...) |
| * method. |
| * Empty lines are removed by the filter(...) method. |
| * Length of the stream is counted by count(). |
| */ |
| reader.reset(); |
| System.out.println("Word count = " |
| + reader.lines() |
| .flatMap(nonWordPattern::splitAsStream) |
| .filter(str -> !str.isEmpty()).count()); |
| |
| reader.reset(); |
| System.out.println("Newline count = " + reader.lines().count()); |
| /* |
| * Input is read as a stream of lines by lines(). |
| * Every line is mapped to its length. |
| * Maximum of the lengths is calculated. |
| */ |
| reader.reset(); |
| System.out.println("Max line length = " |
| + reader.lines().mapToInt(String::length).max().getAsInt()); |
| } |
| |
| private static void collectInOnePass(BufferedReader reader) { |
| /* |
| * The collect() method has three parameters: |
| * The first parameter is the {@code WCStatistic} constructor reference. |
| * collect() will create {@code WCStatistics} instances, where |
| * statistics will be aggregated. |
| * The second parameter shows how {@code WCStatistics} will process |
| * String. |
| * The third parameter shows how to merge two {@code WCStatistic} |
| * instances. |
| * |
| * Also {@code Collector} can be used, which would be more reusable |
| * solution. See {@code CSVProcessor} example for how {@code Collector} |
| * can be implemented. |
| * |
| * Note that the any performance increase when going parallel will |
| * depend on the size of the input (lines) and the cost per-element. |
| */ |
| WCStatistics wc = reader.lines().parallel() |
| .collect(WCStatistics::new, |
| WCStatistics::accept, |
| WCStatistics::combine); |
| System.out.println(wc); |
| } |
| |
| private static void usage() { |
| System.out.println("Usage: " + WC.class.getSimpleName() + " FILE"); |
| System.out.println("Print newline, word," |
| + " character counts and max line length for FILE."); |
| } |
| |
| private static class WCStatistics implements Consumer<String> { |
| /* |
| * @implNote This implementation does not need to be thread safe because |
| * the parallel implementation of |
| * {@link java.util.stream.Stream#collect Stream.collect()} |
| * provides the necessary partitioning and isolation for safe parallel |
| * execution. |
| */ |
| |
| private long characterCount; |
| private long lineCount; |
| private long wordCount; |
| private long maxLineLength; |
| |
| |
| /* |
| * Processes line. |
| */ |
| @Override |
| public void accept(String line) { |
| characterCount += line.length(); |
| lineCount++; |
| wordCount += nonWordPattern.splitAsStream(line) |
| .filter(str -> !str.isEmpty()).count(); |
| maxLineLength = Math.max(maxLineLength, line.length()); |
| } |
| |
| /* |
| * Merges two WCStatistics. |
| */ |
| public void combine(WCStatistics stat) { |
| wordCount += stat.wordCount; |
| lineCount += stat.lineCount; |
| characterCount += stat.characterCount; |
| maxLineLength = Math.max(maxLineLength, stat.maxLineLength); |
| } |
| |
| @Override |
| public String toString() { |
| StringBuilder sb = new StringBuilder(); |
| sb.append("#------WCStatistic------#\n"); |
| sb.append("Character count = ").append(characterCount).append('\n'); |
| sb.append("Word count = ").append(wordCount).append('\n'); |
| sb.append("Newline count = ").append(lineCount).append('\n'); |
| sb.append("Max line length = ").append(maxLineLength).append('\n'); |
| return sb.toString(); |
| } |
| } |
| } |