• 博客访问： 479876
• 博文数量： 25
• 博客积分： 111
• 博客等级： 民兵
• 技术积分： 1278
• 用 户 组： 普通用户
• 注册时间： 2012-10-26 20:51

2014年（17）

2013年（8）

2014-08-22 07:20:07

1. def sigmoid(xi: Array[Int], w: Array[Double]):Double = {
2.         var sum = 0.0

3.         xi.foreach{i => sum += w(i)}

4.         if(sum > 35.0)
5.             sum = 35.0
6.         if(sum < -35.0)
7.             sum = -35.0
8.         1.0 / (1.0 + math.exp(-sum))
9.     }

10. /*迭代求权重*/
11.         for(i <- 0 until iter){
12.             var h = input.map{ case (label,indices) =>
13.                 (label,indices,sigmoid(indices,weight))
14.             }
15.             val loss = h.map{ case (label,indices,hh) =>
16.                 label*math.log(hh)+(1.0-label)*math.log(1.0-hh)
17.             }
18.             val sum = loss.reduce(_+_)
19.             val num = loss.count
20.             //loss.collect.foreach(ll => logger.error(ll))
21.             logger.error("loss rate: ")

22.             logger.error(sum)
23.             logger.error(num)
24.             logger.error(-sum/num)

26.             h.map{ case (label,indices,hh) =>
27.                 (indices,label - hh)
28.             }.map{case (indices,e) => indices.map{f => (f,alpha*e)}.toMap
29.             }.reduce{case (map1,map2) => map1 ++ map2.map{ case (k,v) => (k,v + map1.getOrElse(k,0.0)) }}
30.             .foreach{case(k,v)=>weight(k) += v}
31.         }
32.         weight.foreach{w => logger.error(w)}
33.         weight
34.     }

1. def splitSamples(input:RDD[(Double, Array[Int])], splitNum:Int=10, iterNum:Int=2){
2.                 val logger = Logger.getRootLogger()
3.                 var splitWeight = new Array[Map[Int,Double]](splitNum)
4.                 var ww = new Array[Double](1861181)
5.                 for(j <- 0 until iterNum){
6.                         //calculate loss
7.                         val loss = input.map{case (label,indices) => (label,indices,sigmoid(indices,ww))}
8.                         .map{case (label,indices,hh) => label*math.log(hh)+(1.0-label)*math.log(1.0-hh)}
9.                         val sum = loss.reduce(_+_)
10.                         val num = loss.count
11.                         logger.error("loss rate: ")

12.                         logger.error(sum)
13.                         logger.error(num)
14.                         logger.error(-sum/num)

15.                         for(i <- 0 until splitNum){
16.                                 var j = 0
17.                                 train(input.map{case (label,indices) =>
18.                                         j += 1
19.                                         (j,label,indices)
20.                                 }.filter{case(num,label,indices) => num % splitNum == i}
21.                                 .map{case(num,label,indices) => (label,indices)},ww).foreach{case(k,v)=>ww(k) += v/splitNum}
22.                         }
23.                 }
24.         }

25.         def sigmoid(xi: Array[Int], w: Array[Double]):Double = {
26.                 var sum = 0.0

27.                 xi.foreach{i => sum += w(i)}

28.                 if(sum > 35.0)
29.                         sum = 35.0
30.                 if(sum < -35.0)
31.                         sum = -35.0
32.                 1.0 / (1.0 + math.exp(-sum))
33.         }

34.         def train(input:RDD[(Double, Array[Int])], weight:Array[Double], alpha:Double=0.001):Map[Int,Double]={
35.                 val rst = input.map{case (label,indices) =>
36.                                 indices.map{ arr =>
37.                                         weight(arr) += alpha*(label-sigmoid(indices,weight))
38.                                         (arr,weight(arr))
39.                                 }.toMap
40.                         }.reduce(_++_)
41.                 rst
42.         }
43. }

0