#include #include #include #include #include #define KEYLEN 8 int threads = 1; pthread_mutex_t lock; struct sortdata { int n; int arg; int compares; int writes; int keylen; char (*data)[KEYLEN]; }; void builddata(char (*data)[KEYLEN], int n, int x, int y, int z, int extra); void dump(struct sortdata *sort); void *insertionsort(void *sortv); void *quicksort(void *sortv); void *quickinssort(void *sortv); void *quickmidsort(void *sortv); void heapifydown(struct sortdata *sort, int item); void heapify(struct sortdata *sort); void *myheapsort(void *sortv); void *myradixsort(void *sortv); void *mymergesort(void *sortv); void *mergeinssort(void *sortv); // void *quicksortmt(void *sortv); int main(int argc, char *argv[]) { int i, x, size; void *(*algo)(void *); struct sortdata sort; struct timeval stop, start; char tmp[KEYLEN]; if (argc != 3 && argc != 4) { printf("Usage: %s [argument]\n", argv[0]); printf("\nSupported sorts:\n\n"); printf(" * insertion: insertion sort\n"); printf(" * heap: heap sort\n"); printf(" * quick: quicksort (uses median of first/middle/last items as pivot)\n"); printf(" * quickmid: quicksort that uses middle item as the pivot\n"); printf(" * quickins: quicksort switches to insertion sort at n <= [argument]\n"); printf(" * merge: merge sort\n"); printf(" * mergeins: merge sort switches to insertion sort at n <= [argument]\n"); printf(" * radix: radix sort\n"); printf("\nn < 50 prints the items after sorting.\n"); exit(1); } else { sscanf(argv[2], "%d", &sort.n); if (argc == 4) sscanf(argv[3], "%d", &sort.arg); else sort.arg = 0; } if (strcmp(argv[1], "insertion") == 0) algo = &insertionsort; else if (strcmp(argv[1], "quick") == 0) algo = &quicksort; else if (strcmp(argv[1], "quickins") == 0) algo = &quickinssort; else if (strcmp(argv[1], "quickmid") == 0) algo = &quickmidsort; else if (strcmp(argv[1], "heap") == 0) algo = &myheapsort; else if (strcmp(argv[1], "radix") == 0) algo = &myradixsort; else if (strcmp(argv[1], "merge") == 0) algo = &mymergesort; else if (strcmp(argv[1], "mergeins") == 0) algo = &mergeinssort; // else if (strcmp(argv[1], "quickmt") == 0) // algo = &quicksortmt; else { printf("don't know that one\n"); exit(1); } sort.data = malloc(KEYLEN * sort.n); if (!sort.data) { printf("Out of memory\n"); exit(10); } // builddata(sort.data, sort.n, 0, 343, sort.n * 10); // builddata(sort.data, sort.n, 1000000, 1, sort.n * 1000000000); sort.keylen = 7; builddata(sort.data, sort.n, 1000000, 65535343, 10000000, 0); // dump(&sort); // semi-random sort.compares = 0; sort.writes = 0; gettimeofday(&start, NULL); algo(&sort); gettimeofday(&stop, NULL); printf("%s sort on %d unsorted items ", argv[1], sort.n); printf("took %5.3f sec, %3.1f compares, %3.1f writes\n", (float)(stop.tv_sec - start.tv_sec) + (float)(stop.tv_usec - start.tv_usec) / 1000000, (float)sort.compares/sort.n, (float)sort.writes/sort.n); // printf("\n"); if (sort.n < 50) dump(&sort); // sorted sort.compares = 0; sort.writes = 0; gettimeofday(&start, NULL); algo(&sort); gettimeofday(&stop, NULL); printf("%s sort on %d sorted items ", argv[1], sort.n); printf("took %5.3f sec, %3.1f compares, %3.1f writes\n", (float)(stop.tv_sec - start.tv_sec) + (float)(stop.tv_usec - start.tv_usec) / 1000000, (float)sort.compares/sort.n, (float)sort.writes/sort.n); // printf("\n"); // dump(&sort); for (i = 0; i < sort.n / 2; i++) { strncpy(tmp, sort.data[i], KEYLEN); strncpy(sort.data[i], sort.data[sort.n-i-1], KEYLEN); strncpy(sort.data[sort.n-i-1], tmp, KEYLEN); } // reverse sorted sort.compares = 0; sort.writes = 0; gettimeofday(&start, NULL); algo(&sort); gettimeofday(&stop, NULL); printf("%s sort on %d reverse sorted items ", argv[1], sort.n); printf("took %5.3f sec, %3.1f compares, %3.1f writes\n", (float)(stop.tv_sec - start.tv_sec) + (float)(stop.tv_usec - start.tv_usec) / 1000000, (float)sort.compares/sort.n, (float)sort.writes/sort.n); // printf("\n"); // dump(&sort); } void builddata(char (*data)[KEYLEN], int n, int x, int y, int z, int extra) { int i; char moredigits[KEYLEN]; for (i = 0; i < extra; i++) moredigits[i] = '0'; moredigits[i] = 0; for (i = 0; i < n; i++) { // data[i] = x; sprintf(data[i], "%s%d", moredigits, x); x = (x + y) % z; } } void dump(struct sortdata *sort) { int i; for (i = 0; i < sort->n; i++) printf("data[%d] = %s\n", i, sort->data[i]); } void *insertionsort(void *sortv) { int i, j; char tmp[KEYLEN]; struct sortdata *sort; sort = sortv; for (i = 1; i < sort->n; i++) { strncpy(tmp, sort->data[i], KEYLEN); j = i; sort->compares++; while (j > 0 && strcmp(tmp, sort->data[j-1]) < 0) { sort->compares++; strncpy(sort->data[j], sort->data[j-1], KEYLEN); sort->writes++; j--; } if (j != i) { strncpy(sort->data[j], tmp, KEYLEN); sort->writes++; } } return NULL; } void *quicksort(void *sortv) { int i, low, high; char pivot[KEYLEN], tmp[KEYLEN]; struct sortdata *sort, arg; sort = sortv; // if n == 0 or 1, nothing to do if (sort->n <= 1) return NULL; // if n == 2, see if we need to swap them, then done if (sort->n == 2) { sort->compares++; if (strcmp(sort->data[1], sort->data[0]) < 0) { sort->writes += 2; strncpy(tmp, sort->data[0], KEYLEN); strncpy(sort->data[0], sort->data[1], KEYLEN); strncpy(sort->data[1], tmp, KEYLEN); } return NULL; } low = 0; i = sort->n / 2; high = sort->n - 1; // find pivot sort->compares += 3; // sort->writes++; if (strcmp(sort->data[low], sort->data[i]) < 0 && strcmp(sort->data[i], sort->data[high]) < 0) strncpy(pivot, sort->data[i], KEYLEN); // sorted, pivot is middle else if (strcmp(sort->data[low], sort->data[i]) > 0 && strcmp(sort->data[i], sort->data[high]) > 0) strncpy(pivot, sort->data[i], KEYLEN); // reverse sorted, pivot is middle else if (strcmp(sort->data[i], sort->data[low]) < 0 && strcmp(sort->data[i], sort->data[high]) < 0) { // middle is smaller than both ends, select smallest end if (strcmp(sort->data[low], sort->data[high]) < 0) strncpy(pivot, sort->data[low], KEYLEN); else strncpy(pivot, sort->data[high], KEYLEN); } else { // middle is bigger than both ends, select biggest end if (strcmp(sort->data[low], sort->data[high]) > 0) strncpy(pivot, sort->data[low], KEYLEN); else strncpy(pivot, sort->data[high], KEYLEN); } while (low < high) { // search for data > pivot low to high sort->compares++; while (low < high && strcmp(sort->data[low], pivot) < 0) { sort->compares++; low++; } // search for data < pivot high to low sort->compares++; while (low < high && strcmp(sort->data[high], pivot) > 0) { sort->compares++; high--; } if (low < high) { // swap sort->writes += 2; strncpy(tmp, sort->data[low], KEYLEN); strncpy(sort->data[low], sort->data[high], KEYLEN); strncpy(sort->data[high], tmp, KEYLEN); low++; } } // now sort both halves // first part arg.n = low; arg.arg = sort->arg; arg.compares = 0; arg.writes = 0; arg.data = sort->data; quicksort(&arg); sort->compares += arg.compares; sort->writes += arg.writes; // second part arg.n = sort->n - high; arg.data = sort->data + high; arg.arg = sort->arg; arg.compares = 0; arg.writes = 0; quicksort(&arg); sort->compares += arg.compares; sort->writes += arg.writes; return NULL; } /* // multithreaded version of quicksort // doesn't work, probably a concurrency bug void *quicksortmt(void *sortv) { int i, low, high; char pivot[KEYLEN], tmp[KEYLEN]; struct sortdata *sort, arg; pthread_t secondthread; int nothread; sort = sortv; // printf("quicksortmt with n = %d\n", sort->n); // if n == 0 or 1, nothing to do if (sort->n <= 1) return NULL; // if n == 2, see if we need to swap them, then done if (sort->n == 2) { sort->compares++; if (strcmp(sort->data[1], sort->data[0]) < 0) { sort->writes += 2; strncpy(tmp, sort->data[0], KEYLEN); strncpy(sort->data[0], sort->data[1], KEYLEN); strncpy(sort->data[1], tmp, KEYLEN); } return NULL; } low = 0; i = sort->n / 2; high = sort->n - 1; // find pivot if (strcmp(sort->data[low], sort->data[i]) < 0 && strcmp(sort->data[i], sort->data[high]) < 0) strncpy(pivot, sort->data[i], KEYLEN); // sorted, pivot is middle else if (strcmp(sort->data[low], sort->data[i]) > 0 && strcmp(sort->data[i], sort->data[high]) > 0) strncpy(pivot, sort->data[i], KEYLEN); // reverse sorted, pivot is middle else if (strcmp(sort->data[i], sort->data[low]) < 0 && strcmp(sort->data[i], sort->data[high]) < 0) { // middle is smaller than both ends, select smallest end if (strcmp(sort->data[low], sort->data[high]) < 0) strncpy(pivot, sort->data[low], KEYLEN); else strncpy(pivot, sort->data[high], KEYLEN); } else { // middle is bigger than both ends, select biggest end if (strcmp(sort->data[low], sort->data[high]) > 0) strncpy(pivot, sort->data[low], KEYLEN); else strncpy(pivot, sort->data[high], KEYLEN); } while (low < high) { // search for data > pivot low to high while (low < high && strcmp(sort->data[low], pivot) < 0) low++; // search for data < pivot high to low while (low < high && strcmp(sort->data[high], pivot) > 0) high--; if (low < high) { // swap strncpy(tmp, sort->data[low], KEYLEN); strncpy(sort->data[low], sort->data[high], KEYLEN); strncpy(sort->data[high], tmp, KEYLEN); low++; } } // int thr = 1; // now sort both halves // first part in a new thread if possible arg.n = low; arg.arg = sort->arg; arg.compares = 0; arg.writes = 0; arg.data = sort->data; pthread_mutex_lock(&lock); if (sort->arg > 1 && threads < sort->arg) { threads++; // thr = threads; // printf("starting thread %d\n", thr); pthread_mutex_unlock(&lock); nothread = pthread_create(&secondthread, NULL, quicksortmt, (void *)&arg); // pthread_join(secondthread, NULL); } else { pthread_mutex_unlock(&lock); nothread = 1; } if (nothread) quicksortmt(&arg); // second part arg.n = sort->n - low; arg.arg = sort->arg; arg.compares = 0; arg.writes = 0; arg.data = sort->data + low; quicksortmt(&arg); // wait for other thread to finish if (!nothread) { // printf("joining thread %d...\n", thr); pthread_join(secondthread, NULL); // pthread_mutex_lock(&lock); // threads--; // pthread_mutex_unlock(&lock); // printf("joined thread %d\n", thr); } return NULL; } */ // quicksort with insertion sort for small sets void *quickinssort(void *sortv) { int i, low, high; char pivot[KEYLEN], tmp[KEYLEN]; struct sortdata *sort, arg; sort = sortv; // if n == 0 or 1, nothing to do if (sort->n <= 1) return NULL; // if number of items to sort is small, do insertion sort if (sort->n <= sort->arg) { insertionsort(sort); return NULL; } /* // if n == 2, see if we need to swap them, then done if (sort->n == 2) { sort->compares++; if (strcmp(sort->data[1], sort->data[0]) < 0) { sort->writes += 2; strncpy(tmp, sort->data[0], KEYLEN); strncpy(sort->data[0], sort->data[1], KEYLEN); strncpy(sort->data[1], tmp, KEYLEN); } return NULL; } */ low = 0; i = sort->n / 2; high = sort->n - 1; // find pivot sort->compares += 3; // sort->writes++; if (strcmp(sort->data[low], sort->data[i]) < 0 && strcmp(sort->data[i], sort->data[high]) < 0) strncpy(pivot, sort->data[i], KEYLEN); // sorted, pivot is middle else if (strcmp(sort->data[low], sort->data[i]) > 0 && strcmp(sort->data[i], sort->data[high]) > 0) strncpy(pivot, sort->data[i], KEYLEN); // reverse sorted, pivot is middle else if (strcmp(sort->data[i], sort->data[low]) < 0 && strcmp(sort->data[i], sort->data[high]) < 0) { // middle is smaller than both ends, select smallest end if (strcmp(sort->data[low], sort->data[high]) < 0) strncpy(pivot, sort->data[low], KEYLEN); else strncpy(pivot, sort->data[high], KEYLEN); } else { // middle is bigger than both ends, select biggest end if (strcmp(sort->data[low], sort->data[high]) > 0) strncpy(pivot, sort->data[low], KEYLEN); else strncpy(pivot, sort->data[high], KEYLEN); } while (low < high) { // search for data > pivot low to high sort->compares++; while (low < high && strcmp(sort->data[low], pivot) < 0) { sort->compares++; low++; } // search for data < pivot high to low sort->compares++; while (low < high && strcmp(sort->data[high], pivot) > 0) { sort->compares++; high--; } if (low < high) { // swap sort->writes += 2; strncpy(tmp, sort->data[low], KEYLEN); strncpy(sort->data[low], sort->data[high], KEYLEN); strncpy(sort->data[high], tmp, KEYLEN); low++; } } // now sort both halves // first part in a new thread arg.n = low; arg.arg = sort->arg; arg.compares = 0; arg.writes = 0; arg.data = sort->data; quickinssort(&arg); sort->compares += arg.compares; sort->writes += arg.writes; // second part arg.n = sort->n - high; arg.arg = sort->arg; arg.data = sort->data + high; arg.compares = 0; arg.writes = 0; quickinssort(&arg); sort->compares += arg.compares; sort->writes += arg.writes; return NULL; } void *quickmidsort(void *sortv) { int low, high; char pivot[KEYLEN], tmp[KEYLEN]; struct sortdata *sort, arg; sort = sortv; // if n == 0 or 1, nothing to do if (sort->n <= 1) return NULL; // if n == 2, see if we need to swap them, then done if (sort->n == 2) { sort->compares++; if (strcmp(sort->data[1], sort->data[0]) < 0) { sort->writes += 2; strncpy(tmp, sort->data[0], KEYLEN); strncpy(sort->data[0], sort->data[1], KEYLEN); strncpy(sort->data[1], tmp, KEYLEN); } return NULL; } low = 0; high = sort->n - 1; // use middle element as the pivot strncpy(pivot, sort->data[sort->n / 2], KEYLEN); while (low < high) { // search for data > pivot low to high sort->compares++; while (low < high && strcmp(sort->data[low], pivot) < 0) { sort->compares++; low++; } // search for data < pivot high to low sort->compares++; while (low < high && strcmp(sort->data[high], pivot) > 0) { sort->compares++; high--; } if (low < high) { // swap sort->writes += 2; strncpy(tmp, sort->data[low], KEYLEN); strncpy(sort->data[low], sort->data[high], KEYLEN); strncpy(sort->data[high], tmp, KEYLEN); low++; } } // now sort both halves // first part in a new thread arg.n = low; arg.arg = sort->arg; arg.compares = 0; arg.writes = 0; arg.data = sort->data; quickmidsort(&arg); sort->compares += arg.compares; sort->writes += arg.writes; // second part arg.n = sort->n - high; arg.arg = sort->arg; arg.data = sort->data + high; arg.compares = 0; arg.writes = 0; quickmidsort(&arg); sort->compares += arg.compares; sort->writes += arg.writes; return NULL; } void *myradixsort(void *sortv) { int i, j, k, b; struct sortdata *sort; char (*bucket[11])[KEYLEN]; int bucketsize[11]; sort = sortv; for (b = 0; b < 11; b++) { bucket[b] = malloc(KEYLEN * sort->n / 5 + 8); if (!sort->data) { printf("Out of memory\n"); exit(10); } bucketsize[b] = 0; } for (k = sort->keylen - 1; k >= 0; k--) { for (i = 0; i < sort->n; i++) { if (strlen(sort->data[i]) <= k) b = 0; else b = sort->data[i][k] - 47; sort->writes++; strncpy(bucket[b][bucketsize[b]], sort->data[i], KEYLEN); bucketsize[b]++; } i = 0; for (b = 0; b < 11; b++) { for (j = 0; j < bucketsize[b]; j++) { sort->writes++; strncpy(sort->data[i], bucket[b][j], KEYLEN); i++; } bucketsize[b] = 0; } } for (b = 0; b < 11; b++) free(bucket[b]); return NULL; } void *myheapsort(void *sortv) { int i, realsize; struct sortdata *sort, arg; char tmp[KEYLEN]; sort = sortv; heapify(sort); realsize = sort->n; for (i = realsize - 1; i > 0; i--) { strncpy(tmp, sort->data[0], KEYLEN); sort->n--; sort->writes++; strncpy(sort->data[0], sort->data[sort->n], KEYLEN); heapifydown(sort, 0); sort->writes++; strncpy(sort->data[i], tmp, KEYLEN); } sort->n = realsize; return NULL; } void heapifydown(struct sortdata *sort, int item) { int child1, child2; char tmp[KEYLEN]; child1 = item * 2 + 1; child2 = child1 + 1; sort->compares += 2; if (child2 < sort->n && strcmp(sort->data[item], sort->data[child2]) < 0 && strcmp(sort->data[child1], sort->data[child2]) < 0) { // swap item, child2 sort->writes += 2; strncpy(tmp, sort->data[item], KEYLEN); strncpy(sort->data[item], sort->data[child2], KEYLEN); strncpy(sort->data[child2], tmp, KEYLEN); heapifydown(sort, child2); } else if (child1 < sort->n && strcmp(sort->data[item], sort->data[child1]) < 0) { // swap item, child1 sort->writes += 2; strncpy(tmp, sort->data[item], KEYLEN); strncpy(sort->data[item], sort->data[child1], KEYLEN); strncpy(sort->data[child1], tmp, KEYLEN); heapifydown(sort, child1); } } void heapify(struct sortdata *sort) { int i; // struct timeval stop, start; // gettimeofday(&start, NULL); for (i = sort->n / 2 + 1; i >= 0; i--) heapifydown(sort, i); // gettimeofday(&stop, NULL); // printf("initial heapify took %5.3f sec\n", (float)(stop.tv_sec - start.tv_sec) + (float)(stop.tv_usec - start.tv_usec) / 1000000); } // mergesort void *mymergesort(void *sortv) { int split, i, j, first, second; char tmp[KEYLEN]; struct sortdata *sort, arg; char (*merge)[KEYLEN]; sort = sortv; // if n == 0 or 1, nothing to do if (sort->n <= 1) return NULL; // if n == 2, see if we need to swap them, then done if (sort->n == 2) { sort->compares++; if (strcmp(sort->data[1], sort->data[0]) < 0) { sort->writes += 2; strncpy(tmp, sort->data[0], KEYLEN); strncpy(sort->data[0], sort->data[1], KEYLEN); strncpy(sort->data[1], tmp, KEYLEN); } return NULL; } split = sort->n / 2; // sort first half arg.n = split; arg.arg = sort->arg; arg.compares = 0; arg.writes = 0; arg.data = sort->data; mymergesort(&arg); sort->compares += arg.compares; sort->writes += arg.writes; // sort second half arg.n = sort->n - split; arg.arg = sort->arg; arg.compares = 0; arg.writes = 0; arg.data = sort->data + split; mymergesort(&arg); sort->compares += arg.compares; sort->writes += arg.writes; // if the data was already sorted, the last item in first half is // smaller than the first item in the second half, so we have nothing to do sort->compares++; if (strcmp(sort->data[split-1], sort->data[split]) <= 0) return NULL; // merge merge = malloc(KEYLEN * sort->n); if (!sort->data) { printf("Out of memory\n"); exit(10); } i = 0; first = 0; second = split; // while data left in both halves while (first < split && second < sort->n) { sort->compares++; if (strcmp(sort->data[first], sort->data[second]) <= 0) { strncpy(merge[i], sort->data[first], KEYLEN); first++; } else { strncpy(merge[i], sort->data[second], KEYLEN); second++; } sort->writes++; i++; } // if there's still data in the first half, copy it if (first < split) { sort->writes += split - first; memcpy(merge[i], sort->data[first], (split - first) * KEYLEN); } // if there's still data in the second half, we leave it in place // copy back the merge buffer to the original data store sort->writes += sort->n - second; memcpy(sort->data[0], merge[0], second * KEYLEN); free(merge); return NULL; } // mergeinssort void *mergeinssort(void *sortv) { int split, i, j, first, second; char tmp[KEYLEN]; struct sortdata *sort, arg; char (*merge)[KEYLEN]; sort = sortv; // if n == 0 or 1, nothing to do if (sort->n <= 1) return NULL; // if number of items to sort is small, do insertion sort if (sort->n <= sort->arg) { insertionsort(sort); return NULL; } /* // if n == 2, see if we need to swap them, then done if (sort->n == 2) { sort->compares++; if (strcmp(sort->data[1], sort->data[0]) < 0) { sort->writes += 2; strncpy(tmp, sort->data[0], KEYLEN); strncpy(sort->data[0], sort->data[1], KEYLEN); strncpy(sort->data[1], tmp, KEYLEN); } return NULL; } */ split = sort->n / 2; // sort first half arg.n = split; arg.arg = sort->arg; arg.compares = 0; arg.writes = 0; arg.data = sort->data; mergeinssort(&arg); sort->compares += arg.compares; sort->writes += arg.writes; // sort second half arg.n = sort->n - split; arg.arg = sort->arg; arg.compares = 0; arg.writes = 0; arg.data = sort->data + split; mergeinssort(&arg); sort->compares += arg.compares; sort->writes += arg.writes; // if the data was already sorted, the last item in first half is // smaller than the first item in the second half, so we have nothing to do sort->compares++; if (strcmp(sort->data[split-1], sort->data[split]) <= 0) return NULL; // merge merge = malloc(KEYLEN * sort->n); if (!sort->data) { printf("Out of memory\n"); exit(10); } i = 0; first = 0; second = split; // while data left in both halves while (first < split && second < sort->n) { sort->compares++; if (strcmp(sort->data[first], sort->data[second]) <= 0) { strncpy(merge[i], sort->data[first], KEYLEN); first++; } else { strncpy(merge[i], sort->data[second], KEYLEN); second++; } sort->writes++; i++; } // if there's still data in the first half, copy it if (first < split) { sort->writes += split - first; memcpy(merge[i], sort->data[first], (split - first) * KEYLEN); } // if there's still data in the second half, we leave it in place // copy back the merge buffer to the original data store sort->writes += sort->n - second; memcpy(sort->data[0], merge[0], second * KEYLEN); free(merge); return NULL; }