OSDN Git Service

Do *not* restart the "best" thread + added random-replace optimization.
[mhash384/mhash384.git] / tools / GenTables / src / gen_table_xor.c
index 6093207..016ec33 100644 (file)
@@ -31,6 +31,7 @@
 #include <io.h>
 #include <float.h>
 #include <math.h>
+#include <intrin.h>
 
 //-----------------------------------------------------------------------------
 // Const
@@ -42,7 +43,9 @@
 #define DISTANCE_MAX DISTANCE_MIN + 16U
 
 #define THREAD_COUNT 8U
+
 #undef ENABLE_STATS
+#undef ENABLE_TRACE
 
 #define ROW_NUM (UINT8_MAX+2)           /*total number of rows*/
 #define ROW_LEN (HASH_LEN / CHAR_BIT)   /*number of bits per row*/
 #define DISTANCE_STR _DISTANCE_STR(DISTANCE_MIN)
 
 #define MAGIC_NUMBER 0x3C6058A7C1132CB2ui64
-#define THREAD_ID ((uint32_t)((uintptr_t)pthread_self().p))
-
-#ifdef ENABLE_STATS
-#include <intrin.h>
-#endif
+#define THREAD_ID (pthread_getw32threadid_np(pthread_self()))
 
 //-----------------------------------------------------------------------------
 // Globals
@@ -77,6 +76,12 @@ static volatile long long g_stat_too_lo = 0i64;
 // Utility Functions
 //-----------------------------------------------------------------------------
 
+#ifdef ENABLE_TRACE
+#define TRACE(X, ...) printf("[%04X] " X "\n", THREAD_ID, __VA_ARGS__)
+#else
+#define TRACE(X, ...) __noop()
+#endif
+
 static inline void print_row(uint8_t *const row_buffer)
 {
        for (size_t w = 0; w < ROW_LEN; ++w)
@@ -88,18 +93,10 @@ static inline void print_row(uint8_t *const row_buffer)
 
 static inline void flip_bit_at(uint8_t *const row_buffer, const size_t pos)
 {
-       row_buffer[(pos / CHAR_BIT)] ^= ((uint8_t)(1U << (pos % CHAR_BIT)));
+       row_buffer[pos >> 3] ^= ((uint8_t)(1U << (pos & 0x7)));
 }
 
-static inline void flip_all_bits(uint8_t *const row_buffer, const size_t *const pos_list, const size_t n)
-{
-       for (size_t i = 0; i < n; ++i)
-       {
-               flip_bit_at(row_buffer, pos_list[i]);
-       }
-}
-
-static inline void rand_indices_n(size_t *const indices_out, twister_t *const rand, const uint32_t n)
+static inline void flip_rand_n(uint8_t *const row_buffer, twister_t *const rand, const uint32_t n)
 {
        bool taken[HASH_LEN];
        memset(&taken, 0, sizeof(bool) * HASH_LEN);
@@ -111,7 +108,8 @@ static inline void rand_indices_n(size_t *const indices_out, twister_t *const ra
                        next = next_rand_range(rand, HASH_LEN);
                }
                while (taken[next]);
-               taken[indices_out[i] = next] = true;
+               flip_bit_at(row_buffer, next);
+               taken[next] = true;
        }
 }
 
@@ -166,6 +164,24 @@ static inline uint32_t check_distance_buff(const size_t index, const uint8_t *co
        return error;
 }
 
+static inline bool update_global_minimum(volatile long *const global_minimum, const long value)
+{
+       long old_minimum = LONG_MAX;
+       for (;;)
+       {
+               if (value > old_minimum)
+               {
+                       return false;
+               }
+               const long new_minimum = _InterlockedCompareExchange(global_minimum, value, old_minimum);
+               if (new_minimum == old_minimum)
+               {
+                       return true;
+               }
+               old_minimum = new_minimum;
+       }
+}
+
 static dump_table(FILE *out)
 {
        fputs("uint8_t MHASH_384_TABLE_XOR[UINT8_MAX+2][MHASH_384_LEN] =\n{\n", out);
@@ -195,143 +211,222 @@ typedef struct
        uint8_t *row_buffer;
        sem_t *stop;
        pthread_mutex_t *mutex;
-       twister_t *rand;
+       volatile long *minimum;
 }
 thread_data_t;
 
 static void* thread_main(void *const param)
 {
        const thread_data_t *const data = ((const thread_data_t*)param);
+       twister_t rand;
        bxmller_t bxmller;
        uint8_t temp[ROW_LEN];
-       gaussian_noise_init(&bxmller);
        for(;;)
        {
-               rand_next_bytes(data->rand, data->row_buffer, ROW_LEN);
+               rand_init(&rand, make_seed());
+               gaussian_noise_init(&bxmller);
+               rand_next_bytes(&rand, data->row_buffer, ROW_LEN);
                uint32_t error = check_distance_buff(data->index, data->row_buffer, HASH_LEN);
                if(error > 0U)
                {
-                       for (size_t round = 0; round < 257; ++round)
+                       do
                        {
-                               if (SEM_TRYWAIT(data->stop))
+                               for (int32_t round = 0; round < 4999; ++round)
                                {
-                                       return NULL;
-                               }
-                               for (size_t rand_loop = 0; rand_loop < 99991U; ++rand_loop)
-                               {
-                                       rand_next_bytes(data->rand, temp, ROW_LEN);
-                                       const uint32_t next_error = check_distance_buff(data->index, temp, error);
-                                       if (next_error < error)
+                                       if (!(round & 0x3FF))
                                        {
-                                               round = 0U;
-                                               memcpy(data->row_buffer, temp, sizeof(uint8_t) * ROW_LEN);
-                                               if (!((error = next_error) > 0U))
+                                               if (SEM_TRYWAIT(data->stop))
                                                {
-                                                       goto success;
+                                                       return NULL;
                                                }
                                        }
-                               }
-                       }
-                       for (size_t round = 0U; round < 5U; ++round)
-                       {
-                               if (round == 1U)
-                               {
-                                       for (size_t flip_pos_x = 0U; flip_pos_x < HASH_LEN; ++flip_pos_x)
+                                       for (size_t rand_loop = 0; rand_loop < 99991U; ++rand_loop)
                                        {
-                                               if (!(flip_pos_x % 31))
+                                               rand_next_bytes(&rand, temp, ROW_LEN);
+                                               const uint32_t next_error = check_distance_buff(data->index, temp, error);
+                                               if (next_error < error)
                                                {
-                                                       if (SEM_TRYWAIT(data->stop))
+                                                       round = -1;
+                                                       memcpy(data->row_buffer, temp, sizeof(uint8_t) * ROW_LEN);
+                                                       if (!((error = next_error) > 0U))
                                                        {
-                                                               return NULL;
+                                                               TRACE("Success by rand-init");
+                                                               goto success;
                                                        }
                                                }
-                                               flip_bit_at(data->row_buffer, flip_pos_x);
-                                               bool revert_x = true;
-                                               const uint32_t next_error = check_distance_buff(data->index, data->row_buffer, error);
-                                               if (next_error < error)
+                                       }
+                               }
+                               for (int32_t round = 0; round < 743; ++round)
+                               {
+                                       TRACE("Optimizer round %u of 743", round);
+                                       if (!round)
+                                       {
+                                               for (size_t xchg_pos = 0U; xchg_pos < ROW_LEN; ++xchg_pos)
                                                {
-                                                       revert_x = false;
-                                                       round = 0U;
-                                                       if (!((error = next_error) > 0U))
+                                                       uint8_t original = data->row_buffer[xchg_pos];
+                                                       for (uint16_t xchg_val = 0U; xchg_val <= UINT8_MAX; ++xchg_val)
                                                        {
-                                                               goto success;
+                                                               data->row_buffer[xchg_pos] = (uint8_t)xchg_val;
+                                                               const uint32_t next_error = check_distance_buff(data->index, data->row_buffer, error);
+                                                               if (next_error < error)
+                                                               {
+                                                                       TRACE("Improved by xchg-byte");
+                                                                       original = (uint8_t)xchg_val;
+                                                                       round = -1;
+                                                                       if (!((error = next_error) > 0U))
+                                                                       {
+                                                                               TRACE("Success by xchg-byte");
+                                                                               goto success;
+                                                                       }
+                                                               }
                                                        }
+                                                       data->row_buffer[xchg_pos] = original;
                                                }
-                                               for (size_t flip_pos_y = flip_pos_x + 1U; flip_pos_y < HASH_LEN; ++flip_pos_y)
+                                               for (size_t flip_pos_w = 0U; flip_pos_w < HASH_LEN; ++flip_pos_w)
                                                {
-                                                       flip_bit_at(data->row_buffer, flip_pos_y);
-                                                       bool revert_y = true;
+                                                       if (SEM_TRYWAIT(data->stop))
+                                                       {
+                                                               return NULL;
+                                                       }
+                                                       flip_bit_at(data->row_buffer, flip_pos_w);
+                                                       bool revert_w = true;
                                                        const uint32_t next_error = check_distance_buff(data->index, data->row_buffer, error);
                                                        if (next_error < error)
                                                        {
-                                                               revert_x = revert_y = false;
-                                                               round = 0U;
+                                                               TRACE("Improved by flip-1");
+                                                               revert_w = false;
+                                                               round = -1;
                                                                if (!((error = next_error) > 0U))
                                                                {
+                                                                       TRACE("Sucess by flip-1");
                                                                        goto success;
                                                                }
                                                        }
-                                                       for (size_t flip_pos_z = flip_pos_y + 1U; flip_pos_z < HASH_LEN; ++flip_pos_z)
+                                                       for (size_t flip_pos_x = flip_pos_w + 1U; flip_pos_x < HASH_LEN; ++flip_pos_x)
                                                        {
-                                                               flip_bit_at(data->row_buffer, flip_pos_z);
+                                                               flip_bit_at(data->row_buffer, flip_pos_x);
+                                                               bool revert_x = true;
                                                                const uint32_t next_error = check_distance_buff(data->index, data->row_buffer, error);
                                                                if (next_error < error)
                                                                {
-                                                                       revert_x = revert_y = false;
-                                                                       round = 0U;
+                                                                       TRACE("Improved by flip-2");
+                                                                       revert_w = false;
+                                                                       revert_x = false;
+                                                                       round = -1;
                                                                        if (!((error = next_error) > 0U))
                                                                        {
+                                                                               TRACE("Sucess by flip-2");
                                                                                goto success;
                                                                        }
                                                                }
-                                                               else
+                                                               for (size_t flip_pos_y = flip_pos_x + 1U; flip_pos_y < HASH_LEN; ++flip_pos_y)
                                                                {
-                                                                       flip_bit_at(data->row_buffer, flip_pos_z);
+                                                                       flip_bit_at(data->row_buffer, flip_pos_y);
+                                                                       bool revert_y = true;
+                                                                       const uint32_t next_error = check_distance_buff(data->index, data->row_buffer, error);
+                                                                       if (next_error < error)
+                                                                       {
+                                                                               TRACE("Improved by flip-3");
+                                                                               revert_w = false;
+                                                                               revert_x = false;
+                                                                               revert_y = false;
+                                                                               round = -1;
+                                                                               if (!((error = next_error) > 0U))
+                                                                               {
+                                                                                       TRACE("Sucess by flip-3");
+                                                                                       goto success;
+                                                                               }
+                                                                       }
+                                                                       for (size_t flip_pos_z = flip_pos_y + 1U; flip_pos_z < HASH_LEN; ++flip_pos_z)
+                                                                       {
+                                                                               flip_bit_at(data->row_buffer, flip_pos_z);
+                                                                               const uint32_t next_error = check_distance_buff(data->index, data->row_buffer, error);
+                                                                               if (next_error < error)
+                                                                               {
+                                                                                       TRACE("Improved by flip-4");
+                                                                                       revert_w = false;
+                                                                                       revert_x = false;
+                                                                                       revert_y = false;
+                                                                                       round = -1;
+                                                                                       if (!((error = next_error) > 0U))
+                                                                                       {
+                                                                                               TRACE("Sucess by flip-4");
+                                                                                               goto success;
+                                                                                       }
+                                                                               }
+                                                                               else
+                                                                               {
+                                                                                       flip_bit_at(data->row_buffer, flip_pos_z);
+                                                                               }
+                                                                       }
+                                                                       if (revert_y)
+                                                                       {
+                                                                               flip_bit_at(data->row_buffer, flip_pos_y);
+                                                                       }
+                                                               }
+                                                               if (revert_x)
+                                                               {
+                                                                       flip_bit_at(data->row_buffer, flip_pos_x);
                                                                }
                                                        }
-                                                       if (revert_y)
+                                                       if (revert_w)
                                                        {
-                                                               flip_bit_at(data->row_buffer, flip_pos_y);
+                                                               flip_bit_at(data->row_buffer, flip_pos_w);
                                                        }
                                                }
-                                               if (revert_x)
-                                               {
-                                                       flip_bit_at(data->row_buffer, flip_pos_x);
-                                               }
                                        }
-                               }
-                               for (size_t refine_loop = 0; refine_loop < 99991U; ++refine_loop)
-                               {
-                                       size_t flip_indices[HASH_LEN];
-                                       if (!(refine_loop % 97))
+                                       for (size_t rand_mode = 0; rand_mode < 2U; ++rand_mode)
                                        {
-                                               if (SEM_TRYWAIT(data->stop))
+                                               memcpy(temp, &data->row_buffer, sizeof(uint8_t) * ROW_LEN);
+                                               for (size_t rand_loop = 0; rand_loop < 99991U; ++rand_loop)
                                                {
-                                                       return NULL;
+                                                       rand_next_bytes(&rand, &temp[rand_mode ? (ROW_LEN / 2U) : 0U], ROW_LEN / 2U);
+                                                       const uint32_t next_error = check_distance_buff(data->index, temp, error);
+                                                       if (next_error < error)
+                                                       {
+                                                               TRACE("Improved by rand-replace #%zu", rand_mode);
+                                                               round = -1;
+                                                               memcpy(data->row_buffer, temp, sizeof(uint8_t) * ROW_LEN);
+                                                               if (!((error = next_error) > 0U))
+                                                               {
+                                                                       TRACE("Success by rand-replace");
+                                                                       goto success;
+                                                               }
+                                                       }
                                                }
                                        }
-                                       const uint32_t flip_count = gaussian_noise_next(data->rand, &bxmller, 11.0, 4U, HASH_LEN);
-                                       for (size_t refine_step = 0; refine_step < 997U; ++refine_step)
+                                       for (size_t refine_loop = 0; refine_loop < 9973U; ++refine_loop)
                                        {
-                                               rand_indices_n(flip_indices, data->rand, flip_count);
-                                               flip_all_bits(data->row_buffer, flip_indices, flip_count);
-                                               const uint32_t next_error = check_distance_buff(data->index, data->row_buffer, error);
-                                               if (next_error >= error)
+                                               if (!(refine_loop & 0x3FF))
                                                {
-                                                       flip_all_bits(data->row_buffer, flip_indices, flip_count); /*revert*/
-                                                       continue;
+                                                       if (SEM_TRYWAIT(data->stop))
+                                                       {
+                                                               return NULL;
+                                                       }
                                                }
-                                               else
+                                               const uint32_t flip_count = gaussian_noise_next(&rand, &bxmller, 3.14159, 5U, HASH_LEN);
+                                               for (size_t refine_step = 0; refine_step < 997U; ++refine_step)
                                                {
-                                                       round = 0U;
-                                                       if (!((error = next_error) > 0U))
+                                                       memcpy(temp, data->row_buffer, sizeof(uint8_t) * ROW_LEN);
+                                                       flip_rand_n(temp, &rand, flip_count);
+                                                       const uint32_t next_error = check_distance_buff(data->index, temp, error);
+                                                       if (next_error < error)
                                                        {
-                                                               goto success;
+                                                               TRACE("Improved by rand-flip (%u)", flip_count);
+                                                               round = -1;
+                                                               memcpy(data->row_buffer, temp, sizeof(uint8_t) * ROW_LEN);
+                                                               if (!((error = next_error) > 0U))
+                                                               {
+                                                                       TRACE("Success by rand-flip (%u)", flip_count);
+                                                                       goto success;
+                                                               }
                                                        }
                                                }
                                        }
                                }
                        }
+                       while (update_global_minimum(data->minimum, (long)error));
+                       TRACE("Restarting");
                }
                else
                {
@@ -340,6 +435,12 @@ static void* thread_main(void *const param)
        }
 
 success:
+       if (check_distance_buff(data->index, data->row_buffer, HASH_LEN))
+       {
+               fprintf(stderr, "ERROR MISCOMPARE!\n");
+               abort();
+       }
+
        MUTEX_LOCK(data->mutex);
        if (SEM_TRYWAIT(data->stop))
        {
@@ -498,11 +599,11 @@ int wmain(int argc, wchar_t *argv[])
 {
        pthread_t thread_id[THREAD_COUNT + 1];
        thread_data_t thread_data[THREAD_COUNT];
-       twister_t thread_rand[THREAD_COUNT];
        sem_t stop_flag;
        pthread_mutex_t stop_mutex;
        FILE *file_out = NULL;
        size_t initial_row_index = 0;
+       volatile long minimum;
 
        printf("MHash GenTableXOR [%s]\n\n", __DATE__);
        printf("HashLen: %d, Distance Min/Max: %d/%d, Threads: %d\n\n", HASH_LEN, DISTANCE_MIN, DISTANCE_MAX, THREAD_COUNT);
@@ -530,7 +631,6 @@ int wmain(int argc, wchar_t *argv[])
        for (size_t t = 0; t < THREAD_COUNT; t++)
        {
                memset(&g_thread_buffer[t][0], 0, sizeof(uint8_t) * ROW_LEN);
-               rand_init(&thread_rand[t], make_seed());
        }
 
        SEM_INIT(&stop_flag);
@@ -548,11 +648,13 @@ int wmain(int argc, wchar_t *argv[])
        for (size_t i = initial_row_index; i < ROW_NUM; i++)
        {
                char time_string[64];
-               printf("Row %03u of %03u [%c]", (uint32_t)(i+1U), ROW_NUM, SPINNER[g_spinpos]);
+               printf("\aRow %03u of %03u [%c]", (uint32_t)(i+1U), ROW_NUM, SPINNER[g_spinpos]);
+               minimum = LONG_MAX;
                g_spinpos = (g_spinpos + 1) % 4;
 #ifdef ENABLE_STATS
                g_stat_too_hi = g_stat_too_lo = 0i64;
 #endif //INCREMENT_STAT
+
                PTHREAD_CREATE(&thread_id[THREAD_COUNT], NULL, thread_spin, &stop_flag);
                for (size_t t = 0; t < THREAD_COUNT; t++)
                {
@@ -560,7 +662,7 @@ int wmain(int argc, wchar_t *argv[])
                        thread_data[t].row_buffer = &g_thread_buffer[t][0];
                        thread_data[t].stop = &stop_flag;
                        thread_data[t].mutex = &stop_mutex;
-                       thread_data[t].rand = &thread_rand[t];
+                       thread_data[t].minimum = &minimum;
                        PTHREAD_CREATE(&thread_id[t], NULL, thread_main, &thread_data[t]);
                        PTHREAD_SET_PRIORITY(thread_id[t], -15);
                }