分类: Mysql/postgreSQL
2008-05-15 15:40:24
在实际应用中,很可能会碰到一些需要删除某些字段的重复记录,我现在把我能想到的写下来,望高手们补充。
1、
具体实现如下:
Table Create Table
------------ --------------------------------------------------------
users_groups CREATE TABLE `users_groups` (
`id` int(10) unsigned NOT NULL AUTO_INCREMENT,
`uid` int(11) NOT NULL,
`gid` int(11) NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=15 DEFAULT CHARSET=utf8
users_groups.txt内容:
1,11,502
2,107,502
3,100,503
4,110,501
5,112,501
6,104,502
7,100,502
8,100,501
9,102,501
10,104,502
11,100,502
12,100,501
13,102,501
14,110,501
mysql> load data infile 'c:\\users_groups.txt'
into table users_groups fields
rminated by ',' lines terminated by '\n';
Query OK, 14 rows affected (0.05 sec)
Records: 14 Deleted: 0 Skipped: 0 Warnings: 0
mysql> select * from users_groups;
id |
uid |
gid |
1 |
11 |
502 |
2 |
107 |
502 |
3 |
100 |
503 |
4 |
110 |
501 |
5 |
112 |
501 |
6 |
104 |
502 |
7 |
100 |
502 |
8 |
100 |
501 |
9 |
102 |
501 |
10 |
104 |
502 |
11 |
100 |
502 |
12 |
100 |
501 |
13 |
102 |
501 |
14 |
110 |
501 |
1
4 rows in set (0.00 sec)
mysql> create
temporary table tmp_wrap select * from users_groups group by uid having
count(1) > 1 union all
select * from users_groups group by uid having count(1) = 1;
Query OK, 7 rows affected (0.11 sec)
Records: 7 Duplicates: 0 Warnings: 0
mysql> truncate table users_groups;
Query OK, 14 rows affected (0.03 sec)
mysql> insert into users_groups select * from
tmp_wrap;
Query OK, 7 rows affected (0.03 sec)
Records: 7 Duplicates: 0 Warnings: 0
mysql> select * from users_groups;
id |
uid |
gid |
1 |
11 |
502 |
2 |
107 |
502 |
3 |
100 |
503 |
4 |
110 |
501 |
5 |
112 |
501 |
6 |
104 |
502 |
9 |
102 |
501 |
mysql> drop
table tmp_wrap;
Query OK, 0 rows affected (0.05
sec)
2、还有一个很精简的办法。
查找重复的,并且除掉最小的那个。
delete users_groups as
a from users_groups as a,
(
select *,min(id) from users_groups group by uid having count(1) > 1
) as b
where a.uid = b.uid and a.id > b.id;
(7 row(s)affected)
(0 ms taken)
id |
uid |
gid |
1 |
11 |
502 |
2 |
107 |
502 |
3 |
100 |
503 |
4 |
110 |
501 |
5 |
112 |
501 |
6 |
104 |
502 |
9 |
102 |
501 |
3、现在来看一下这两个办法的效率。
运行一下以下SQL 语句
create index f_uid on
users_groups(uid);
explain select * from users_groups group by uid having count(1) > 1 union
all
select * from users_groups group by uid having count(1) = 1;
explain select *
from users_groups as a,
(
select *,min(id) from users_groups group by uid having count(1) > 1
) as b
where a.uid = b.uid and a.id > b.id;
id |
select_type |
table |
type |
possible_keys |
key |
key_len |
ref |
rows |
Extra |
1 |
PRIMARY |
users_groups |
index |
(NULL) |
f_uid |
4 |
(NULL) |
14 |
|
2 |
UNION |
users_groups |
index |
(NULL) |
f_uid |
4 |
(NULL) |
14 |
|
(NULL) |
UNION RESULT |
|
ALL |
(NULL) |
(NULL) |
(NULL) |
(NULL) |
(NULL) |
|
id |
select_type |
table |
type |
possible_keys |
key |
key_len |
ref |
rows |
Extra |
1 |
PRIMARY |
|
ALL |
(NULL) |
(NULL) |
(NULL) |
(NULL) |
4 |
|
1 |
PRIMARY |
a |
ref |
PRIMARY,f_uid |
f_uid |
4 |
b.uid |
1 |
Using where |
2 |
DERIVED |
users_groups |
index |
(NULL) |
f_uid |
4 |
(NULL) |
14 |
|
很明显的第二个比第一个扫描的函数要少。