分类: 系统运维
2016-12-27 16:33:21
模型随着接收的新消息,不断更新自己;而不是像离线训练一次次重新训练。
输入源:Akka actors、消息队列、Flume、Kafka、……
类群(lineage):应用到RDD上的转换算子和执行算子的集合
依赖Spark MLlib和Spark Streaming
name := "scala-spark-streaming-app" version := "1.0" scalaVersion := "2.11.7" libraryDependencies += "org.apache.spark" %% "spark-mllib" % "1.5.1" libraryDependencies += "org.apache.spark" %% "spark-streaming" % "1.5.1"
使用国内镜像仓库
~/.sbt/repositories
[repositories]
local
osc:
typesafe: [organization]/[module]/(scala_[scalaVersion]/)(sbt_[sbtVersion]/)[revision]/[type]s/[artifact](-[classifier]).[ext], bootOnly
sonatype-oss-releases
maven-central
sonatype-oss-snapshots
object StreamingProducer { def main(args: Array[String]) { val random = new Random() // Maximum number of events per second val MaxEvents = 6 // Read the list of possible names val namesResource = this.getClass.getResourceAsStream("/names.csv") val names = scala.io.Source.fromInputStream(namesResource)
.getLines()
.toList
.head
.split(",")
.toSeq // Generate a sequence of possible products val products = Seq( "iPhone Cover" -> 9.99, "Headphones" -> 5.49, "Samsung Galaxy Cover" -> 8.95, "iPad Cover" -> 7.49 ) /** Generate a number of random product events */ def generateProductEvents(n: Int) = {
(1 to n).map { i => val (product, price) = products(random.nextInt(products.size)) val user = random.shuffle(names).head
(user, product, price)
}
} // create a network producer val listener = new ServerSocket(9999)
println("Listening on port: 9999") while (true) { val socket = listener.accept() new Thread() { override def run = {
println("Got client connected from: " + socket.getInetAddress) val out = new PrintWriter(socket.getOutputStream(), true) while (true) { Thread.sleep(1000) val num = random.nextInt(MaxEvents) val productEvents = generateProductEvents(num)
productEvents.foreach{ event =>
out.write(event.productIterator.mkString(","))
out.write("\n")
}
out.flush()
println(s"Created $num events...")
}
socket.close()
}
}.start()
}
}
}
sbt run
Multiple main classes detected, select one to run: [1] MonitoringStreamingModel [2] SimpleStreamingApp [3] SimpleStreamingModel [4] StreamingAnalyticsApp [5] StreamingModelProducer [6] StreamingProducer [7] StreamingStateApp
Enter number: 6