OSDN Git Service

Do *not* restart the "best" thread + added random-replace optimization.
[mhash384/mhash384.git] / tools / GenTables / src / gen_table_xor.c
index cbc4ad4..016ec33 100644 (file)
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------------------------------- */
 /* MHash-384 - Generate tables utility program                                                     */
-/* Copyright(c) 2016 LoRd_MuldeR <mulder2@gmx.de>                                                  */
+/* Copyright(c) 2016-2017 LoRd_MuldeR <mulder2@gmx.de>                                             */
 /*                                                                                                 */
 /* Permission is hereby granted, free of charge, to any person obtaining a copy of this software   */
 /* and associated documentation files(the "Software"), to deal in the Software without             */
 #include <io.h>
 #include <float.h>
 #include <math.h>
+#include <intrin.h>
 
 //-----------------------------------------------------------------------------
 // Const
 //-----------------------------------------------------------------------------
 
-#define HASH_LEN 384
+#define HASH_LEN 384U
 
-#define DISTANCE_MIN 180
-#define DISTANCE_MAX DISTANCE_MIN + 32
+#define DISTANCE_MIN 182U
+#define DISTANCE_MAX DISTANCE_MIN + 16U
 
-#define THREAD_COUNT 8
+#define THREAD_COUNT 8U
+
+#undef ENABLE_STATS
+#undef ENABLE_TRACE
 
 #define ROW_NUM (UINT8_MAX+2)           /*total number of rows*/
 #define ROW_LEN (HASH_LEN / CHAR_BIT)   /*number of bits per row*/
@@ -51,6 +55,7 @@
 #define DISTANCE_STR _DISTANCE_STR(DISTANCE_MIN)
 
 #define MAGIC_NUMBER 0x3C6058A7C1132CB2ui64
+#define THREAD_ID (pthread_getw32threadid_np(pthread_self()))
 
 //-----------------------------------------------------------------------------
 // Globals
@@ -62,10 +67,21 @@ static uint8_t g_thread_buffer[THREAD_COUNT][ROW_LEN];
 static size_t g_spinpos = 0;
 static char SPINNER[4] = { '/', '-', '\\', '|' };
 
+#ifdef ENABLE_STATS
+static volatile long long g_stat_too_hi = 0i64;
+static volatile long long g_stat_too_lo = 0i64;
+#endif //ENABLE_STATS
+
 //-----------------------------------------------------------------------------
 // Utility Functions
 //-----------------------------------------------------------------------------
 
+#ifdef ENABLE_TRACE
+#define TRACE(X, ...) printf("[%04X] " X "\n", THREAD_ID, __VA_ARGS__)
+#else
+#define TRACE(X, ...) __noop()
+#endif
+
 static inline void print_row(uint8_t *const row_buffer)
 {
        for (size_t w = 0; w < ROW_LEN; ++w)
@@ -77,18 +93,10 @@ static inline void print_row(uint8_t *const row_buffer)
 
 static inline void flip_bit_at(uint8_t *const row_buffer, const size_t pos)
 {
-       row_buffer[(pos / CHAR_BIT)] ^= ((uint8_t)(1U << (pos % CHAR_BIT)));
+       row_buffer[pos >> 3] ^= ((uint8_t)(1U << (pos & 0x7)));
 }
 
-static inline void flip_all_bits(uint8_t *const row_buffer, const size_t *const pos_list, const size_t n)
-{
-       for (size_t i = 0; i < n; ++i)
-       {
-               flip_bit_at(row_buffer, pos_list[i]);
-       }
-}
-
-static inline void rand_indices_n(size_t *const indices_out, twister_t *const rand, const uint32_t n)
+static inline void flip_rand_n(uint8_t *const row_buffer, twister_t *const rand, const uint32_t n)
 {
        bool taken[HASH_LEN];
        memset(&taken, 0, sizeof(bool) * HASH_LEN);
@@ -100,7 +108,8 @@ static inline void rand_indices_n(size_t *const indices_out, twister_t *const ra
                        next = next_rand_range(rand, HASH_LEN);
                }
                while (taken[next]);
-               taken[indices_out[i] = next] = true;
+               flip_bit_at(row_buffer, next);
+               taken[next] = true;
        }
 }
 
@@ -110,24 +119,69 @@ static inline bool check_distance_rows(const size_t index_1, const size_t index_
        return (dist <= DISTANCE_MAX) && (dist >= DISTANCE_MIN);
 }
 
-static inline uint32_t check_distance_buff(const size_t index, const uint8_t *const row_buffer)
+static inline uint32_t check_distance_buff(const size_t index, const uint8_t *const row_buffer, const uint32_t limit)
 {
        uint32_t error = 0U;
+#ifdef ENABLE_STATS
+       bool reason;
+#endif //ENABLE_STATS
        for (size_t k = 0; k < index; k++)
        {
+
                const uint32_t dist = hamming_distance(&g_table[k][0], row_buffer, ROW_LEN);
                if (dist > DISTANCE_MAX)
                {
-                       error = max_ui32(error, (dist - DISTANCE_MAX));
+                       const uint32_t current = dist - DISTANCE_MAX;
+                       if (current > error)
+                       {
+#ifdef ENABLE_STATS
+                               reason = false;
+#endif //ENABLE_STATS
+                               if ((error = current) >= limit)
+                               {
+                                       break; /*early termination*/
+                               }
+                       }
                }
                else if (dist < DISTANCE_MIN)
                {
-                       error = max_ui32(error, (DISTANCE_MIN - dist));
+                       const uint32_t current = DISTANCE_MIN - dist;
+                       if (current > error)
+                       {
+#ifdef ENABLE_STATS
+                               reason = true;
+#endif //ENABLE_STATS
+                               if ((error = current) >= limit)
+                               {
+                                       break; /*early termination*/
+                               }
+                       }
                }
        }
+#ifdef ENABLE_STATS
+       _InterlockedIncrement64(reason ? &g_stat_too_lo : &g_stat_too_hi);
+#endif //ENABLE_STATS
        return error;
 }
 
+static inline bool update_global_minimum(volatile long *const global_minimum, const long value)
+{
+       long old_minimum = LONG_MAX;
+       for (;;)
+       {
+               if (value > old_minimum)
+               {
+                       return false;
+               }
+               const long new_minimum = _InterlockedCompareExchange(global_minimum, value, old_minimum);
+               if (new_minimum == old_minimum)
+               {
+                       return true;
+               }
+               old_minimum = new_minimum;
+       }
+}
+
 static dump_table(FILE *out)
 {
        fputs("uint8_t MHASH_384_TABLE_XOR[UINT8_MAX+2][MHASH_384_LEN] =\n{\n", out);
@@ -157,82 +211,222 @@ typedef struct
        uint8_t *row_buffer;
        sem_t *stop;
        pthread_mutex_t *mutex;
-       twister_t *rand;
+       volatile long *minimum;
 }
 thread_data_t;
 
 static void* thread_main(void *const param)
 {
        const thread_data_t *const data = ((const thread_data_t*)param);
+       twister_t rand;
        bxmller_t bxmller;
-       gaussian_noise_init(&bxmller);
+       uint8_t temp[ROW_LEN];
        for(;;)
        {
-               rand_next_bytes(data->rand, data->row_buffer, ROW_LEN);
-               uint32_t error = check_distance_buff(data->index, data->row_buffer);
+               rand_init(&rand, make_seed());
+               gaussian_noise_init(&bxmller);
+               rand_next_bytes(&rand, data->row_buffer, ROW_LEN);
+               uint32_t error = check_distance_buff(data->index, data->row_buffer, HASH_LEN);
                if(error > 0U)
                {
-                       for (size_t round = 0; round < HASH_LEN; ++round)
+                       do
                        {
-                               bool improved = false;
-                               if (SEM_TRYWAIT(data->stop))
-                               {
-                                       return NULL;
-                               }
-                               for (size_t flip_pos = 0U; flip_pos < HASH_LEN; ++flip_pos)
+                               for (int32_t round = 0; round < 4999; ++round)
                                {
-                                       flip_bit_at(data->row_buffer, flip_pos);
-                                       const uint32_t next_error = check_distance_buff(data->index, data->row_buffer);
-                                       if (next_error >= error)
+                                       if (!(round & 0x3FF))
                                        {
-                                               flip_bit_at(data->row_buffer, flip_pos); /*revert*/
-                                               continue;
+                                               if (SEM_TRYWAIT(data->stop))
+                                               {
+                                                       return NULL;
+                                               }
                                        }
-                                       else
+                                       for (size_t rand_loop = 0; rand_loop < 99991U; ++rand_loop)
                                        {
-                                               improved = true;
-                                               if (!((error = next_error) > 0U))
+                                               rand_next_bytes(&rand, temp, ROW_LEN);
+                                               const uint32_t next_error = check_distance_buff(data->index, temp, error);
+                                               if (next_error < error)
                                                {
-                                                       goto success;
+                                                       round = -1;
+                                                       memcpy(data->row_buffer, temp, sizeof(uint8_t) * ROW_LEN);
+                                                       if (!((error = next_error) > 0U))
+                                                       {
+                                                               TRACE("Success by rand-init");
+                                                               goto success;
+                                                       }
                                                }
                                        }
                                }
-                               for (size_t refine_loop = 0; refine_loop < 9973U; ++refine_loop)
+                               for (int32_t round = 0; round < 743; ++round)
                                {
-                                       size_t flip_indices[HASH_LEN];
-                                       if (!(refine_loop % 97))
+                                       TRACE("Optimizer round %u of 743", round);
+                                       if (!round)
                                        {
-                                               if (SEM_TRYWAIT(data->stop))
+                                               for (size_t xchg_pos = 0U; xchg_pos < ROW_LEN; ++xchg_pos)
                                                {
-                                                       return NULL;
+                                                       uint8_t original = data->row_buffer[xchg_pos];
+                                                       for (uint16_t xchg_val = 0U; xchg_val <= UINT8_MAX; ++xchg_val)
+                                                       {
+                                                               data->row_buffer[xchg_pos] = (uint8_t)xchg_val;
+                                                               const uint32_t next_error = check_distance_buff(data->index, data->row_buffer, error);
+                                                               if (next_error < error)
+                                                               {
+                                                                       TRACE("Improved by xchg-byte");
+                                                                       original = (uint8_t)xchg_val;
+                                                                       round = -1;
+                                                                       if (!((error = next_error) > 0U))
+                                                                       {
+                                                                               TRACE("Success by xchg-byte");
+                                                                               goto success;
+                                                                       }
+                                                               }
+                                                       }
+                                                       data->row_buffer[xchg_pos] = original;
+                                               }
+                                               for (size_t flip_pos_w = 0U; flip_pos_w < HASH_LEN; ++flip_pos_w)
+                                               {
+                                                       if (SEM_TRYWAIT(data->stop))
+                                                       {
+                                                               return NULL;
+                                                       }
+                                                       flip_bit_at(data->row_buffer, flip_pos_w);
+                                                       bool revert_w = true;
+                                                       const uint32_t next_error = check_distance_buff(data->index, data->row_buffer, error);
+                                                       if (next_error < error)
+                                                       {
+                                                               TRACE("Improved by flip-1");
+                                                               revert_w = false;
+                                                               round = -1;
+                                                               if (!((error = next_error) > 0U))
+                                                               {
+                                                                       TRACE("Sucess by flip-1");
+                                                                       goto success;
+                                                               }
+                                                       }
+                                                       for (size_t flip_pos_x = flip_pos_w + 1U; flip_pos_x < HASH_LEN; ++flip_pos_x)
+                                                       {
+                                                               flip_bit_at(data->row_buffer, flip_pos_x);
+                                                               bool revert_x = true;
+                                                               const uint32_t next_error = check_distance_buff(data->index, data->row_buffer, error);
+                                                               if (next_error < error)
+                                                               {
+                                                                       TRACE("Improved by flip-2");
+                                                                       revert_w = false;
+                                                                       revert_x = false;
+                                                                       round = -1;
+                                                                       if (!((error = next_error) > 0U))
+                                                                       {
+                                                                               TRACE("Sucess by flip-2");
+                                                                               goto success;
+                                                                       }
+                                                               }
+                                                               for (size_t flip_pos_y = flip_pos_x + 1U; flip_pos_y < HASH_LEN; ++flip_pos_y)
+                                                               {
+                                                                       flip_bit_at(data->row_buffer, flip_pos_y);
+                                                                       bool revert_y = true;
+                                                                       const uint32_t next_error = check_distance_buff(data->index, data->row_buffer, error);
+                                                                       if (next_error < error)
+                                                                       {
+                                                                               TRACE("Improved by flip-3");
+                                                                               revert_w = false;
+                                                                               revert_x = false;
+                                                                               revert_y = false;
+                                                                               round = -1;
+                                                                               if (!((error = next_error) > 0U))
+                                                                               {
+                                                                                       TRACE("Sucess by flip-3");
+                                                                                       goto success;
+                                                                               }
+                                                                       }
+                                                                       for (size_t flip_pos_z = flip_pos_y + 1U; flip_pos_z < HASH_LEN; ++flip_pos_z)
+                                                                       {
+                                                                               flip_bit_at(data->row_buffer, flip_pos_z);
+                                                                               const uint32_t next_error = check_distance_buff(data->index, data->row_buffer, error);
+                                                                               if (next_error < error)
+                                                                               {
+                                                                                       TRACE("Improved by flip-4");
+                                                                                       revert_w = false;
+                                                                                       revert_x = false;
+                                                                                       revert_y = false;
+                                                                                       round = -1;
+                                                                                       if (!((error = next_error) > 0U))
+                                                                                       {
+                                                                                               TRACE("Sucess by flip-4");
+                                                                                               goto success;
+                                                                                       }
+                                                                               }
+                                                                               else
+                                                                               {
+                                                                                       flip_bit_at(data->row_buffer, flip_pos_z);
+                                                                               }
+                                                                       }
+                                                                       if (revert_y)
+                                                                       {
+                                                                               flip_bit_at(data->row_buffer, flip_pos_y);
+                                                                       }
+                                                               }
+                                                               if (revert_x)
+                                                               {
+                                                                       flip_bit_at(data->row_buffer, flip_pos_x);
+                                                               }
+                                                       }
+                                                       if (revert_w)
+                                                       {
+                                                               flip_bit_at(data->row_buffer, flip_pos_w);
+                                                       }
                                                }
                                        }
-                                       const uint32_t flip_count = gaussian_noise_next(data->rand, &bxmller, 29.0, 2U, HASH_LEN);
-                                       for (size_t refine_step = 0; refine_step < 997U; ++refine_step)
+                                       for (size_t rand_mode = 0; rand_mode < 2U; ++rand_mode)
                                        {
-                                               rand_indices_n(flip_indices, data->rand, flip_count);
-                                               flip_all_bits(data->row_buffer, flip_indices, flip_count);
-                                               const uint32_t next_error = check_distance_buff(data->index, data->row_buffer);
-                                               if (next_error >= error)
+                                               memcpy(temp, &data->row_buffer, sizeof(uint8_t) * ROW_LEN);
+                                               for (size_t rand_loop = 0; rand_loop < 99991U; ++rand_loop)
                                                {
-                                                       flip_all_bits(data->row_buffer, flip_indices, flip_count); /*revert*/
-                                                       continue;
+                                                       rand_next_bytes(&rand, &temp[rand_mode ? (ROW_LEN / 2U) : 0U], ROW_LEN / 2U);
+                                                       const uint32_t next_error = check_distance_buff(data->index, temp, error);
+                                                       if (next_error < error)
+                                                       {
+                                                               TRACE("Improved by rand-replace #%zu", rand_mode);
+                                                               round = -1;
+                                                               memcpy(data->row_buffer, temp, sizeof(uint8_t) * ROW_LEN);
+                                                               if (!((error = next_error) > 0U))
+                                                               {
+                                                                       TRACE("Success by rand-replace");
+                                                                       goto success;
+                                                               }
+                                                       }
                                                }
-                                               else
+                                       }
+                                       for (size_t refine_loop = 0; refine_loop < 9973U; ++refine_loop)
+                                       {
+                                               if (!(refine_loop & 0x3FF))
                                                {
-                                                       improved = true;
-                                                       if (!((error = next_error) > 0U))
+                                                       if (SEM_TRYWAIT(data->stop))
                                                        {
-                                                               goto success;
+                                                               return NULL;
+                                                       }
+                                               }
+                                               const uint32_t flip_count = gaussian_noise_next(&rand, &bxmller, 3.14159, 5U, HASH_LEN);
+                                               for (size_t refine_step = 0; refine_step < 997U; ++refine_step)
+                                               {
+                                                       memcpy(temp, data->row_buffer, sizeof(uint8_t) * ROW_LEN);
+                                                       flip_rand_n(temp, &rand, flip_count);
+                                                       const uint32_t next_error = check_distance_buff(data->index, temp, error);
+                                                       if (next_error < error)
+                                                       {
+                                                               TRACE("Improved by rand-flip (%u)", flip_count);
+                                                               round = -1;
+                                                               memcpy(data->row_buffer, temp, sizeof(uint8_t) * ROW_LEN);
+                                                               if (!((error = next_error) > 0U))
+                                                               {
+                                                                       TRACE("Success by rand-flip (%u)", flip_count);
+                                                                       goto success;
+                                                               }
                                                        }
                                                }
                                        }
                                }
-                               if (!improved)
-                               {
-                                       break; /*early termination*/
-                               }
                        }
+                       while (update_global_minimum(data->minimum, (long)error));
+                       TRACE("Restarting");
                }
                else
                {
@@ -241,6 +435,12 @@ static void* thread_main(void *const param)
        }
 
 success:
+       if (check_distance_buff(data->index, data->row_buffer, HASH_LEN))
+       {
+               fprintf(stderr, "ERROR MISCOMPARE!\n");
+               abort();
+       }
+
        MUTEX_LOCK(data->mutex);
        if (SEM_TRYWAIT(data->stop))
        {
@@ -267,6 +467,11 @@ static void* thread_spin(void *const param)
                _sleep(delay);
                if (delay >= 500)
                {
+#ifdef ENABLE_STATS
+                       const long long s_too_lo = g_stat_too_lo, s_too_hi = g_stat_too_hi;
+                       const long long s_too_ac = s_too_lo + s_too_hi;
+                       printf("Too low: %lld (%.2f), too high: %lld (%.2f)\n", s_too_lo, s_too_lo / ((double)s_too_ac), s_too_hi, s_too_hi / ((double)s_too_ac));
+#endif //ENABLE_STATS
                        printf("\b\b\b[%c]", SPINNER[g_spinpos]);
                        g_spinpos = (g_spinpos + 1) % 4;
                }
@@ -394,11 +599,11 @@ int wmain(int argc, wchar_t *argv[])
 {
        pthread_t thread_id[THREAD_COUNT + 1];
        thread_data_t thread_data[THREAD_COUNT];
-       twister_t thread_rand[THREAD_COUNT];
        sem_t stop_flag;
        pthread_mutex_t stop_mutex;
        FILE *file_out = NULL;
        size_t initial_row_index = 0;
+       volatile long minimum;
 
        printf("MHash GenTableXOR [%s]\n\n", __DATE__);
        printf("HashLen: %d, Distance Min/Max: %d/%d, Threads: %d\n\n", HASH_LEN, DISTANCE_MIN, DISTANCE_MAX, THREAD_COUNT);
@@ -407,7 +612,7 @@ int wmain(int argc, wchar_t *argv[])
        {
                crit_exit("FATAL: Hash length must be a multiple of 32 bits!");
        }
-       
+
        if (argc < 2)
        {
                printf("Table file not specified!\n\n");
@@ -426,7 +631,6 @@ int wmain(int argc, wchar_t *argv[])
        for (size_t t = 0; t < THREAD_COUNT; t++)
        {
                memset(&g_thread_buffer[t][0], 0, sizeof(uint8_t) * ROW_LEN);
-               rand_init(&thread_rand[t], make_seed());
        }
 
        SEM_INIT(&stop_flag);
@@ -444,8 +648,12 @@ int wmain(int argc, wchar_t *argv[])
        for (size_t i = initial_row_index; i < ROW_NUM; i++)
        {
                char time_string[64];
-               printf("Row %03u of %03u [%c]", (uint32_t)(i+1U), ROW_NUM, SPINNER[g_spinpos]);
+               printf("\aRow %03u of %03u [%c]", (uint32_t)(i+1U), ROW_NUM, SPINNER[g_spinpos]);
+               minimum = LONG_MAX;
                g_spinpos = (g_spinpos + 1) % 4;
+#ifdef ENABLE_STATS
+               g_stat_too_hi = g_stat_too_lo = 0i64;
+#endif //INCREMENT_STAT
 
                PTHREAD_CREATE(&thread_id[THREAD_COUNT], NULL, thread_spin, &stop_flag);
                for (size_t t = 0; t < THREAD_COUNT; t++)
@@ -454,8 +662,9 @@ int wmain(int argc, wchar_t *argv[])
                        thread_data[t].row_buffer = &g_thread_buffer[t][0];
                        thread_data[t].stop = &stop_flag;
                        thread_data[t].mutex = &stop_mutex;
-                       thread_data[t].rand = &thread_rand[t];
+                       thread_data[t].minimum = &minimum;
                        PTHREAD_CREATE(&thread_id[t], NULL, thread_main, &thread_data[t]);
+                       PTHREAD_SET_PRIORITY(thread_id[t], -15);
                }
 
                for (size_t t = 0; t < THREAD_COUNT; t++)