大部分数据库都提供了窗口函数,比如RANK,ROW_NUMBER等等。 MySQL 这方面没有直接提供,但是可以变相的实现,我以前写了row_number 的实现,今天有时间把 rank 的实现贴出来。
这里,我用MySQL 以及Python 分别实现了rank 窗口函数。
原始表信息:
-
t_girl=# \d group_concat;
-
Table "ytt.group_concat"
-
Column | Type | Modifiers
-
----------+-----------------------+-----------
-
rank | integer |
-
username | character varying(20) |
表数据
-
t_girl=# select * from group_concat;
-
rank | username
-
------+----------
-
100 | Lucy
-
127 | Lucy
-
146 | Lucy
-
137 | Lucy
-
104 | Lucy
-
121 | Lucy
-
136 | Lily
-
100 | Lily
-
100 | Lily
-
105 | Lily
-
136 | Lily
-
149 | ytt
-
116 | ytt
-
116 | ytt
-
149 | ytt
-
106 | ytt
-
117 | ytt
-
(17 rows)
-
-
-
Time: 0.638 ms
PostgreSQL 的rank 窗口函数示例:
-
t_girl=# select username,rank,rank() over(partition by username order by rank desc) as rank_cnt from group_concat;
-
username | rank | rank_cnt
-
----------+------+----------
-
Lily | 136 | 1
-
Lily | 136 | 1
-
Lily | 105 | 3
-
Lily | 100 | 4
-
Lily | 100 | 4
-
Lucy | 146 | 1
-
Lucy | 137 | 2
-
Lucy | 127 | 3
-
Lucy | 121 | 4
-
Lucy | 104 | 5
-
Lucy | 100 | 6
-
ytt | 149 | 1
-
ytt | 149 | 1
-
ytt | 117 | 3
-
ytt | 116 | 4
-
ytt | 116 | 4
-
ytt | 106 | 6
-
(17 rows)
-
-
-
Time: 131.150 ms
MySQL 提供了group_concat 聚合函数可以变相的实现:
-
mysql>
-
select a.username, a.rank, find_in_set(a.rank,b.rank_gp) as rank_cnt
-
from group_concat as a ,
-
(select username,group_concat(rank order by rank desc separator ',') as rank_gp from group_concat group by username
-
) b
-
where a.username = b.username order by a.username asc,a.rank desc;
-
+----------+------+----------+
-
| username | rank | rank_cnt |
-
+----------+------+----------+
-
| Lily | 136 | 1 |
-
| Lily | 136 | 1 |
-
| Lily | 105 | 3 |
-
| Lily | 100 | 4 |
-
| Lily | 100 | 4 |
-
| Lucy | 146 | 1 |
-
| Lucy | 137 | 2 |
-
| Lucy | 127 | 3 |
-
| Lucy | 121 | 4 |
-
| Lucy | 104 | 5 |
-
| Lucy | 100 | 6 |
-
| ytt | 149 | 1 |
-
| ytt | 149 | 1 |
-
| ytt | 117 | 3 |
-
| ytt | 116 | 4 |
-
| ytt | 116 | 4 |
-
| ytt | 106 | 6 |
-
+----------+------+----------+
-
17 rows in set (0.02 sec)
当然了,如果MySQL SQL不太熟悉,可以用程序来处理,比如我下面用python 实现了rank 函数,执行结果如下:(脚本源代码最后)
-
>>> ================================ RESTART ================================
-
>>>
-
username | rank | rank_cnt
-
--------------------------------
-
ytt |149 |1
-
ytt |149 |1
-
ytt |117 |3
-
ytt |116 |4
-
ytt |116 |4
-
ytt |106 |6
-
Lucy |146 |1
-
Lucy |137 |2
-
Lucy |127 |3
-
Lucy |121 |4
-
Lucy |104 |5
-
Lucy |100 |6
-
Lily |136 |1
-
Lily |136 |2
-
Lily |105 |3
-
Lily |100 |4
-
Lily |100 |4
-
(17 Rows.)
-
Time: 0.162 Seconds.
附上脚本代码:
-
from __future__ import print_function
-
from datetime import date, datetime, timedelta
-
import mysql.connector
-
import time
-
# Created by ytt 2014/5/14.
-
# Rank function implement.
-
def db_connect(is_true):
-
cnx = mysql.connector.connect(host='192.168.1.131',port='3306',user='python_user', password='python_user',database='t_girl',autocommit=is_true)
-
return cnx
-
def db_rs_rank(c1 ='username desc' ,c2 = ' rank desc'):
-
# c1: partition column.
-
# c2: sort column.
-
time_start = time.time()
-
cnx = db_connect(True)
-
rs = cnx.cursor()
-
query0 = "select username,rank from group_concat order by " + c1 + ", " + c2
-
rs.execute(query0,multi=False)
-
if rs.with_rows:
-
rows = rs.fetchall()
-
else:
-
return "No rows affected."
-
i = 0
-
j = 0
-
k = 1
-
result = []
-
field1_compare = rows[0][0]
-
field2_compare = rows[0][1]
-
while i < len(rows):
-
if field1_compare == rows[i][0]:
-
j += 1
-
if field2_compare != rows[i][1]:
-
field2_compare =rows[i][1]
-
k = j
-
result.append((rows[i][0],rows[i][1],k))
-
else:
-
j = 1
-
k = 1
-
field1_compare = rows[i][0]
-
result.append((rows[i][0],rows[i][1],k))
-
i += 1
-
i = 0
-
rows_header = list(rs.column_names)
-
rows_header.append('rank_cnt')
-
print (rows_header[0].center(10,' ') + '|' + rows_header[1].center(10,' ') + '|' + rows_header[2].center(10,' '))
-
print ('-'.center(32,'-'))
-
while i < len(result):
-
print (result[i][0].ljust(10,' ') + '|' + str(result[i][1]).ljust(10,' ') + '|' + str(result[i][2]).ljust(10,' '))
-
i += 1
-
rs.close()
-
cnx.close()
-
time_end = time.time()
-
print ('(' + str(len(rows))+ ' Rows.)')
-
print ("Time:" + str(round((time_end-time_start),3)).rjust(10,' ') + ' Seconds.')
-
if __name__=='__main__':
-
db_rs_rank()
阅读(6988) | 评论(0) | 转发(0) |