import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import scala.Tuple2; import java.util.Arrays; public class Main { public static void main(String[] args) { SparkConf conf = new SparkConf().setAppName("wordCounter").setMaster("local[*]"); JavaSparkContext sc = new JavaSparkContext(conf); JavaRDD<String> inputData = sc.textFile("src/main/resources/input.txt"); inputData.map(value -> value.replaceAll("[^a-zA-z\\s]", "")) .flatMap(value -> Arrays.asList(value.split(" ")).iterator()) .filter(word -> word.length() > 1) .mapToPair(value -> new Tuple2<String,Long>( value, 1L)) .reduceByKey((value1, value2) -> value1 + value2) .mapToPair(value -> new Tuple2<Long,String>(value._2, value._1)).sortByKey(false,1) .foreach(w->System.out.println(w)); sc.close(); } }
Örnek Çıktı :
(2509,to)
(2011,that)
(1552,and)
(1420,of)
(1287,this)
(1246,we)
(1243,is)
(1098,you)
(955,in)
(833,on)
(779,it)
(713,container)
(683,So)
(670,do)
(656,be)
(654,for)
(617,can)
(610,have)
(576,And)
(552,Docker)
(524,going)
(480,if)
(466,just)
(456,its)
(441,here)
(434,will)
(422,but)
(414,as)
(394,image)
(392,now)
(365,running)
(354,with)
(353,need)
.....
.....
(1,Gerty)
(1,Opt)
(1,maths)
(1,construct)
(1,statement)
(1,Course)
(1,forwarding)
(1,episode)
(1,catching)
(1,angry)
(1,Target)
(1,favour)