最新消息: USBMI致力于为网友们分享Windows、安卓、IOS等主流手机系统相关的资讯以及评测、同时提供相关教程、应用、软件下载等服务。

有一个1G大小的一个资料,里面每一行是一个词,词的大小不超过16字节

IT圈 admin 53浏览 0评论

2023年12月20日发(作者:达陶)

node_no_space *p; for(p = bin[index]; p != NULL; p = p->next) { if(strcmp(p_word, p->word) == 0) { (p->count)++; return; } } p = (node_no_space*)malloc(sizeof(node_no_space)); p->count = 1; p->word = (char*)malloc(strlen(p_word) + 1); strcpy(p->word, p_word); p->next = bin[index]; bin[index] = p;}void min_heap(node_has_space *heap, int i, int len) { int left = 2 * i; int right = 2 * i + 1; int min_index = 0; if(left <= len && heap[left].count < heap[i].count) { min_index = left; } else { min_index = i; } if(right <= len && heap[right].count < heap[min_index].count) { min_index = right; } if(min_index != i) { swap(&heap[min_index].count, &heap[i].count); char buffer[WORDLEN]; strcpy(buffer, heap[min_index].word); strcpy(heap[min_index].word, heap[i].word); strcpy(heap[i].word, buffer); min_heap(heap, min_index, len); }}void build_min_heap(node_has_space *heap, int len) { int index = len / 2; int i; for(i = index; i >= 1; i--) { min_heap(heap, i, len); }}void destroy_bin() { node_no_space *p, *q; int i = 0; while(i < HASHLEN) { p = bin[i]; while(p) { q = p->next; if(p->word) { free(p->word); p->word = NULL; } free(p); p = NULL; p = q; } bin[i] = NULL; i++; }

}}void write_to_file(char *path) { FILE *out; if((out = fopen(path, "w")) == NULL) { cout << "error, open " << path << " failed!" << endl; return; } int i; node_no_space *p; i = 0; while(i < HASHLEN) { for(p = bin[i]; p != NULL; p = p->next) { fprintf(out, "%s %dn", p->word, p->count); } i++; } fclose(out); destroy_bin();}void main() { char word[WORDLEN]; char path[20]; int count; int n = 10; unsigned int index = 0; int i; FILE *fin[10]; FILE *fout; FILE *f_message; node_has_space *heap = (node_has_space*)malloc(sizeof(node_has_space) * (n + 1)); // divide word into n files if((f_message = fopen("", "r")) == NULL) { cout << "error, open source file failed!" << endl; return; } for(i = 0; i < n; i++) { sprintf(path, "tmp%", i); fin[i] = fopen(path, "w"); } while(fscanf(f_message, "%s", word) != EOF) { if(trim_word(word)) { index = hash(word) % n; fprintf(fin[index], "%sn", word); } } for(i = 0; i < n; i++) { fclose(fin[i]); } // do hash count for(i = 0; i < n; i++) { sprintf(path, "tmp%", i); fin[i] = fopen(path, "r"); while(fscanf(fin[i], "%s", word) != EOF) { insert_word(word); } fclose(fin[i]); write_to_file(path); } // heap find

for(i = 1; i <= n; i++) { strcpy(heap[i].word, ""); heap[i].count = 0; }

2023年12月20日发(作者:达陶)

node_no_space *p; for(p = bin[index]; p != NULL; p = p->next) { if(strcmp(p_word, p->word) == 0) { (p->count)++; return; } } p = (node_no_space*)malloc(sizeof(node_no_space)); p->count = 1; p->word = (char*)malloc(strlen(p_word) + 1); strcpy(p->word, p_word); p->next = bin[index]; bin[index] = p;}void min_heap(node_has_space *heap, int i, int len) { int left = 2 * i; int right = 2 * i + 1; int min_index = 0; if(left <= len && heap[left].count < heap[i].count) { min_index = left; } else { min_index = i; } if(right <= len && heap[right].count < heap[min_index].count) { min_index = right; } if(min_index != i) { swap(&heap[min_index].count, &heap[i].count); char buffer[WORDLEN]; strcpy(buffer, heap[min_index].word); strcpy(heap[min_index].word, heap[i].word); strcpy(heap[i].word, buffer); min_heap(heap, min_index, len); }}void build_min_heap(node_has_space *heap, int len) { int index = len / 2; int i; for(i = index; i >= 1; i--) { min_heap(heap, i, len); }}void destroy_bin() { node_no_space *p, *q; int i = 0; while(i < HASHLEN) { p = bin[i]; while(p) { q = p->next; if(p->word) { free(p->word); p->word = NULL; } free(p); p = NULL; p = q; } bin[i] = NULL; i++; }

}}void write_to_file(char *path) { FILE *out; if((out = fopen(path, "w")) == NULL) { cout << "error, open " << path << " failed!" << endl; return; } int i; node_no_space *p; i = 0; while(i < HASHLEN) { for(p = bin[i]; p != NULL; p = p->next) { fprintf(out, "%s %dn", p->word, p->count); } i++; } fclose(out); destroy_bin();}void main() { char word[WORDLEN]; char path[20]; int count; int n = 10; unsigned int index = 0; int i; FILE *fin[10]; FILE *fout; FILE *f_message; node_has_space *heap = (node_has_space*)malloc(sizeof(node_has_space) * (n + 1)); // divide word into n files if((f_message = fopen("", "r")) == NULL) { cout << "error, open source file failed!" << endl; return; } for(i = 0; i < n; i++) { sprintf(path, "tmp%", i); fin[i] = fopen(path, "w"); } while(fscanf(f_message, "%s", word) != EOF) { if(trim_word(word)) { index = hash(word) % n; fprintf(fin[index], "%sn", word); } } for(i = 0; i < n; i++) { fclose(fin[i]); } // do hash count for(i = 0; i < n; i++) { sprintf(path, "tmp%", i); fin[i] = fopen(path, "r"); while(fscanf(fin[i], "%s", word) != EOF) { insert_word(word); } fclose(fin[i]); write_to_file(path); } // heap find

for(i = 1; i <= n; i++) { strcpy(heap[i].word, ""); heap[i].count = 0; }

发布评论

评论列表 (0)

  1. 暂无评论