分类: C/C++
2010-06-14 00:40:26
对外存(磁盘或磁带)中而不是内存中的数据进行排序称为外部排序。对大量数据排序的方法与数据的类型有关。如果是数据元素本身很大(如图像)而无法装入内存,并且数据元素的数目不是很多,那么可以只在内存中保留排序关键字和一个指示数据在外存中的位置的值,只要将排序关键字和位置指示值这一对数据排好序,外存中的数据也就排好序了。如果是数据元素太多而无法一次装入内存,那么可以将数据分成几组,分别装入内存进行排序,然后合并所得的排序结果文件。
在本章前文中已经介绍了4种排序算法程序,它们都可用来实现外部排序。
例3.3是实现外部排序的一个例子,它对每组含10,000字符串的若干组数据进行排序,并将排序结果存为若干个文件,然后合并所有的文件。例3.3和例3.2b有许多相似之处,你可以将这两个例子比较一下。
例3.3中的各个函数的作用分别如下:
(1) myfgets()函数删去行尾的换行符('\n')。
(2) myfgets()函数在行尾插入换行符('\n')。
(3) openFile()函数处理打开文件时出现的错误。
(4) fileName()函数创建临时文件名。
(5) split()函数在第69—74行从输入文件中读入10,000行,在第76行对读入的数据进行内部排序,在第77—80行将排序结果写入一个临时文件。
(6) merge()函数将排好序的两个文件合并为一个文件,其合并方式与例3.2b中的merge()函数合并两个链表的方式完全相同。
(7) mergePairs()函数把每两个临时文件合并为一个文件,其合并方式与例3.2b中的mergePairs()函数合并链表的方式完全相同。
(8) main()函数先调用split()函数对初始文件进行处理,然后调用mergePairs()函数将所有的临时文件合并为一个大的文件,最后用排好序的文件替换初始文件。
例3.3一个外部排序算法程序
1:#include
2:#include
3:#include
4:#include
5:
6:#define LINES_PER_FIlE 10000
7:
8: /* Just 1ike fgets(),but removes trailing '\n'. */
9:Char *
10:myfgets(char *buf,size_t size,FiLE *fp)
11:{
12; char *s=fgets(buf,size,fp);
13: if (s)
14: s[strlen(s)—1]='\0';
15: return s,
16:}
17:
18:/* Just 1ikefputs(),butadds trailing,'\n' */
19:void
20:myfputs(char *s,FILE *fp)
21:{
22: int n=strlen(s);
23: s[n] = '\n';
24: fwrite(s,1,n+1,fp);
25: s[n]='\0';
26:}
27:
28: /* Just like fopen(),but prints message and dies
if error. */
29: FILE *
30: openFile(const char *name, const char *mode)
31: {
32: FILE *fp = fopen(name, mode);
33:
34: if (fp == NULL)
35: {
36: perror (name);
37: exit(1);
38: }
39: return fp;
40: }
41:
42: / * Takes a number and generates a filename from it.
* /
43: const char *
44: fileName (int n)
45: {
46: static char name[16];
47:
48: sprintf (name, "temp %d", n);
49: return name;
50: }
51:
52: /*
53: * Splits input file into sorted files whith no more
54: * than LINES_PER_FILE lines each.
55: */
56: int
57: split(FILE *infp)
58: {
59: int nfiles = 0
60: int line;
61:
62: for (line = LINES_PER_FILE; line ==
LINES_PRE_FILE ; )
63: {
64: char *array[LINES_PER_ FILE + 1 ];
65: char buf[1024];
66: int i;
67: FILE *fp;
68:
69: for (line = 0; line
)
70: {
71: if ( ! myfgets (buf, sizeof(buf),
第17 页
C语言编程常见问题
infp) )
72: break
73: array [line ] = strdup (bur);
74: }
75: array[line]= NULL;
76: sortStrings (array);
77: fp = openFile (fileName (nfiles++ ), "w" );
78: for (i =0; i < line; i++)
79: myfputs (array[i], fp);
80: fclose (fp);
81: }
82: return nfiles;
83: }
84:
85: /*
86: * Merges two sorted input files into
87: * one sorted output file.
88: * /
89: void
90: merge(FILE *outfp, FILE *fpl, FILE *fp2)
91: {
92: char bufl[1024];
93: char buf2[1024];
94: char * first;
95: char * second;
96:
97: first = myfgets(buf1, sizeof(buf1), fp1);
98: second = myfgets (buf2, sizeof(buf2), fp2);
99: while (first && second)
100: {
101: if (strcmp(first, second) > 0)
102: {
103: myfputs (second, outfp);
104: second = myfgets (buf2, sizeof
(buf2),
105:
fp2);
106: }
107: else
108: {
109: myfputs (first, outfp)
110: first = myfgets(buf1,
sizeof(buf1),
111:
fp1);
112: }
113: }
114: while (first)
115: {
116: myfputs (first, outfp);
117: first = myfgets(buf1, sizeof(buf1),
fp1);
118: }
119: while (second)
120: {
121: myfputs (second, outfp );
122: second = myfgets (buf2, sizeof (buf2),
fp2)
123: }
124: }
125:
126: /*
127: * Takes nfiles files and merges pairs of them.
128: * Returns new number of files.
129: * /
130: int
131: mergePairs (int nfiles)
132: {
133: int i;
134: int out = O;
135:
136: for (i = O; i < nfiles-1; i += 2)
137: {
138: FILE * temp;
139: FILE *fp1;
140: FILE * fp2;
141: const char * first;
142: const char * second;
143:
144: temp = openFile("temp" , "w" ) ;
145: fp1 = openFile(fileName(i), "r" );
146: fp2 = openFile(fileName(i + 1), "r");
147: merge (temp, fp1, fp2);
148: fclose (fpl);
149: fclose (fp2);
150: fclose (temp);
151: unlink(fileName (i));
152: unlink(fileName(i + 1));
153: rename ("temp", fileName (out ++ ));
154: }
155: if (i < nfiles)
156: {
157: char * tmp = strdup(fileName(i));
158: rename (tmp, fileName (out ++ ) )
159: free (tmp);
160: }
第19 页
C语言编程常见问题
161: return out;
162: }
163:
164: int
165: main(int argc, char ** argv)
166:{
167: char buf2[1024];
168: int nfiles;
169: int line;
170: int in;
171: int out;
172: FILE * infp;
173:
174: if (argc !=2)
175: {
176: fprintf(stderr, "usage: %s file\n",
argv[0]);
177: exit(1);
178: }
179: infp = openFile (argv[1], "r" );
180: nfiles = split(infp);
181: fclose (infp);
182: while (nfiles > 1)
183: nfiles = mergePairs (nfiles);
184: rename (fileName (0), argv[1]);
185: return 0;
186: }
请参见:
3.1 哪一种排序方法最方便?
3.2 哪一种排序方法最快?
3.7 怎样对链表进行排序?