OSDN Git Service

Import unkotim217 unkotim217
authorStarg <starg@users.osdn.me>
Sat, 12 May 2018 05:59:41 +0000 (14:59 +0900)
committerStarg <starg@users.osdn.me>
Sat, 12 May 2018 05:59:41 +0000 (14:59 +0900)
57 files changed:
interface/ncurs_c.c
interface/npsyn_c.c
interface/portmidisyn_c.c
interface/w32g.h
interface/w32g_c.c
interface/w32g_i.c
interface/w32g_playlist.c
interface/w32g_pref.c
interface/w32g_subwin.h
interface/w32g_subwin3.c
interface/w32g_syn.c
interface/w32g_ut2.h
interface/w32g_utl.c
interface/winsyn_c.c
interface/wrdt_w32g.c
pdcurses/win32/pdcwin.h
portmidi/pm_win/pmwinmm.c
portmidi/porttime/ptwinmm.c
speex/config.h
speex/speex.h
timidity/au_a.c
timidity/common.h
timidity/controls.c
timidity/effect.c
timidity/flac_a.c
timidity/flac_a.h [new file with mode: 0644]
timidity/instrum.c
timidity/instrum.h
timidity/int_synth.c
timidity/int_synth.h
timidity/mix.c
timidity/mod2midi.c
timidity/myini.h
timidity/optcode.c
timidity/optcode.h
timidity/output.c
timidity/playmidi.c
timidity/playmidi.h
timidity/portaudio_a.c
timidity/readmidi.h
timidity/resample.c
timidity/sfz.cpp [new file with mode: 0644]
timidity/sfz.h [new file with mode: 0644]
timidity/smplfile.c
timidity/sndfont.c
timidity/sndfontini.c
timidity/sysdep.h
timidity/thread_effect.c
timidity/thread_mix.c
timidity/timidity.c
timidity/voice_effect.c
timidity/w32_a.c
timidity/w32_libFLAC_dll_g.h
timidity/w32g_ogg_dll.c
timidity/wasapi_a.c
timidity/wdmks_a.c
utils/tmdy_getopt.h

index fee22ac..42e6a99 100644 (file)
@@ -685,6 +685,18 @@ static void init_trace_window_chan(int ch)
                        wprintw(dftwin, "(%s)", prog->comment);
                }
 #endif
+#ifdef ENABLE_SFZ
+               else if(type == INST_SFZ)
+               {
+                       if (prog->name)
+                       {
+                               waddch(dftwin, ' ');
+                               waddstr(dftwin, prog->name);
+                       }
+                       if (prog->comment != NULL)
+                               wprintw(dftwin, "(%s)", prog->comment);
+               }
+#endif
     }
     }
     }
index 7c4dc8b..68c56eb 100755 (executable)
 #include "wsgtk_main.h"
 #endif
 
-#ifndef __W32__
 #include <stdio.h>
+#ifndef __W32__
 #include <termios.h>
 //#include <term.h>
 #include <unistd.h>
 #endif
 
-#ifdef __GNUC__
-#include <termios.h>
-#endif
-
-#if defined(__W32__) && !defined(__GNUC__)
+#if defined(__W32__)
+#include <conio.h>
+#define kbhit _kbhit
 #define HAVE_DOS_KEYBOARD 1
 #endif
 
@@ -228,7 +226,7 @@ static int ctl_pass_playing_list(int n, char *args[])
 static int ctl_pass_playing_list(int n, char *args[])
 #else
 // 0: OK, 2: Require to reset.
-int ctl_pass_playing_list2(int n, char *args[])
+static int ctl_pass_playing_list2(int n, char *args[])
 #endif
 {
   TIMECAPS tcaps;
@@ -434,43 +432,6 @@ static void doit(void)
 
 #endif /* !IA_W32G_SYN */
 
-
-#ifdef IA_W32G_SYN
-static int winplaymidi_sleep_level = 2;
-static DWORD winplaymidi_active_start_time = 0;
-
-
-void winplaymidi(void) {
-
-  if (winplaymidi_sleep_level < 1) {
-    winplaymidi_sleep_level = 1;
-  }
-  if (0 != rtsyn_buf_check()) {
-      winplaymidi_sleep_level =0;
-  }
-  rtsyn_np_play_some_data();
-  if (winplaymidi_sleep_level == 1) {
-    DWORD ct = GetCurrentTime();
-    if (winplaymidi_active_start_time == 0 || ct < winplaymidi_active_start_time) {
-      winplaymidi_active_start_time = ct;
-    } else if (ct - winplaymidi_active_start_time > 60000) {
-      winplaymidi_sleep_level = 2;
-    }
-  } else if (winplaymidi_sleep_level == 0) {
-    winplaymidi_active_start_time = 0;
-  }
-
-  rtsyn_play_calculate();
-
-  if (winplaymidi_sleep_level >= 2) {
-    Sleep(100);
-  } else if (winplaymidi_sleep_level > 0) {
-    Sleep(1);
-  }
-}
-#endif /* IA_W32G_SYN */
-
-
 /*
  * interface_<id>_loader();
  */
index e9a4851..b638494 100755 (executable)
 #include "wsgtk_main.h"
 #endif
 
-#ifndef __W32__
 #include <stdio.h>
+#ifndef __W32__
 #include <termios.h>
 //#include <term.h>
 #include <unistd.h>
 #endif
 
+#if defined(__W32__)
+#include <conio.h>
+#define kbhit _kbhit
+#define HAVE_DOS_KEYBOARD 1
+#endif
 
 #ifndef __W32__
 static struct termios initial_settings, new_settings;
@@ -228,7 +233,7 @@ static int ctl_pass_playing_list(int n, char *args[])
 static int ctl_pass_playing_list(int n, char *args[])
 #else
 // 0: OK, 2: Require to reset.
-int ctl_pass_playing_list2(int n, char *args[])
+static int ctl_pass_playing_list2(int n, char *args[])
 #endif
 {
        int i, j,devnum,devok;
@@ -469,43 +474,6 @@ static void doit(void)
 
 #endif /* !IA_W32G_SYN */
 
-
-#ifdef IA_W32G_SYN
-static int winplaymidi_sleep_level = 2;
-static DWORD winplaymidi_active_start_time = 0;
-
-
-void winplaymidi(void){
-
-       if ( winplaymidi_sleep_level < 1 ) {
-               winplaymidi_sleep_level = 1;
-       }
-       if( 0 != rtsyn_buf_check() ){
-                       winplaymidi_sleep_level =0;
-       }
-       rtsyn_play_some_data();
-       if ( winplaymidi_sleep_level == 1 ) {
-               DWORD ct = GetCurrentTime ();
-               if ( winplaymidi_active_start_time == 0 || ct < winplaymidi_active_start_time ) {
-                       winplaymidi_active_start_time = ct;
-               } else if ( ct - winplaymidi_active_start_time > 60000 ) {
-                       winplaymidi_sleep_level = 2;
-               }
-       } else if ( winplaymidi_sleep_level == 0 ) {
-               winplaymidi_active_start_time = 0;
-       }
-       
-       rtsyn_play_calculate();
-       
-       if ( winplaymidi_sleep_level >= 2) {
-               Sleep ( 100 );
-       } else if ( winplaymidi_sleep_level > 0 ) {
-               Sleep ( 1 );
-       }
-}
-#endif
-               
-
 /*
  * interface_<id>_loader();
  */
index c44f410..e1424aa 100644 (file)
@@ -28,6 +28,8 @@
 #undef RC_NONE
 #endif
 #include <windows.h>
+#include <commdlg.h>
+#include <shellapi.h>
 #ifdef RC_NONE
 #undef RC_NONE
 #endif
@@ -75,7 +77,7 @@ typedef LPTHREAD_START_ROUTINE BCC_BEGINTHREAD_START_ADDRESS;
 // (HANDLE)crt_beginthreadex(LPSECURITY_ATTRIBUTES security, DWORD stack_size, LPTHREAD_START_ROUTINE start_address, LPVOID arglist, DWORD initflag, LPDWORD thrdaddr );
 #if defined(_MSC_VER) || defined(__WATCOMC__)
 #define crt_beginthreadex(security,stack_size,start_address,arglist,initflag,thrdaddr ) \
-(HANDLE)_beginthreadex((void *)security,(unsigned)stack_size,(MSVC_BEGINTHREAD_START_ADDRESS)start_address,(void *)arglist,(unsigned)initflag,(unsigned *)thrdaddr)
+(HANDLE)_beginthreadex((void *)security,(unsigned)stack_size,start_address,(void *)arglist,(unsigned)initflag,(unsigned *)thrdaddr)
 #elif defined(_BORLANDC_)
 #define crt_beginthreadex(security,stack_size,start_address,arglist,initflag,thrdaddr ) \
 (HANDLE)_beginthreadNT((BCC_BEGINTHREAD_START_ADDRESS)start_address,(unsigned)stack_size,(void *)arglist,(void *)security_attrib,(unsigned long)create_flags,(unsigned long *)thread_id)
index a5b3b85..8153e0c 100644 (file)
@@ -102,7 +102,7 @@ static int rc_thread = 0;
 static ptr_size_t value_thread = 0;
 static void w32g_ext_control_sub_thread(int rc, ptr_size_t value);
 
-static void w32g_ext_control_thread(void)
+static unsigned __stdcall w32g_ext_control_thread(void *param)
 {
        for(;;){                
                WaitForSingleObject(hEventTcv, INFINITE); // \83X\83\8c\83b\83h\8aJ\8en\83C\83x\83\93\83g\91Ò\8b@
@@ -111,7 +111,8 @@ static void w32g_ext_control_thread(void)
                ResetEvent(hEventTcv); // \83X\83\8c\83b\83h\8aJ\8en\83C\83x\83\93\83g\83\8a\83Z\83b\83g
                thread_finish = 1; // \83X\83\8c\83b\83h\8fI\97¹\83t\83\89\83O\83Z\83b\83g
        }
-       crt_endthread();
+
+    return 0;
 }
 
 static void w32g_uninit_ext_control_thread(void)
@@ -155,7 +156,7 @@ static void w32g_init_ext_control_thread(void)
        hEventTcv = CreateEvent(NULL,FALSE,FALSE,NULL); // reset manual
        if(hEventTcv == NULL)
                return;
-       hThread = crt_beginthreadex(NULL, 0, (LPTHREAD_START_ROUTINE)w32g_ext_control_thread, 0, 0, &ThreadID);
+       hThread = crt_beginthreadex(NULL, 0, w32g_ext_control_thread, 0, 0, &ThreadID);
        if(hThread == NULL)
                return;
 }
index 53795d1..09f76d3 100644 (file)
@@ -29,6 +29,7 @@
 #include <process.h>
 #include <stddef.h>
 #include <windows.h>
+#include <mmsystem.h>
 #undef RC_NONE
 #include <shlobj.h>
 // #include <prsht.h>
@@ -1750,7 +1751,7 @@ void OnHide(void)
 }
 
 #ifdef W32GUI_DEBUG
-void DebugThread(void *args)
+void WINAPI DebugThread(void *args)
 {
        MSG msg;
        DebugThreadExit = 0;
@@ -1773,7 +1774,7 @@ void DebugThreadInit(void)
        DWORD dwThreadID;
        if(!DebugThreadExit)
        return;
-       hDebugThread = crt_beginthreadex(NULL,0,DebugThread,0,0,&dwThreadID);
+       hDebugThread = crt_beginthreadex(NULL,0,(LPTHREAD_START_ROUTINE)DebugThread,0,0,&dwThreadID);   
 }
 #endif
 
@@ -4557,21 +4558,14 @@ static void VersionWnd(HWND hParentWnd)
 {
        char VersionText[2024];
   sprintf(VersionText,
-"TiMidity++ %s%s%s" NLS NLS
+"TiMidity++ %s%s %s" NLS NLS
 "TiMidity-0.2i by Tuukka Toivonen <tt@cgs.fi>." NLS
 "TiMidity Win32 version by Davide Moretti <dave@rimini.com>." NLS
 "TiMidity Windows 95 port by Nicolas Witczak." NLS
 "TiMidity Win32 GUI by Daisuke Aoki <dai@y7.net>." NLS
 " Japanese menu, dialog, etc by Saito <timidity@flashmail.com>." NLS
 "TiMidity++ by Masanao Izumo <mo@goice.co.jp>." NLS
-,(strcmp(timidity_version, "current")) ? "version " : "", timidity_version,
-#if defined(_M_X64) || defined(__x86_64__)
-" [x64]"
-#elif defined(_M_IX86) || defined(__i386__)
-" [x86]"
-#else
-""
-#endif
+,(strcmp(timidity_version, "current")) ? "version " : "", timidity_version, arch_string
 );
        MessageBox(hParentWnd, VersionText, "Version", MB_OK);
 }
@@ -4580,7 +4574,7 @@ static void TiMidityWnd(HWND hParentWnd)
 {
        char TiMidityText[2024];
   sprintf(TiMidityText,
-" TiMidity++ %s%s -- MIDI to WAVE converter and player" NLS
+" TiMidity++ %s%s %s -- MIDI to WAVE converter and player" NLS
 " Copyright (C) 1999-2002 Masanao Izumo <mo@goice.co.jp>" NLS
 " Copyright (C) 1995 Tuukka Toivonen <tt@cgs.fi>" NLS
 NLS
@@ -4602,7 +4596,7 @@ NLS
 " along with this program; if not, write to the Free Software" NLS
 " Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA" NLS
 ,
-(strcmp(timidity_version, "current")) ? "version " : "", timidity_version
+(strcmp(timidity_version, "current")) ? "version " : "", timidity_version, arch_string
        );
        MessageBox(hParentWnd, TiMidityText, "TiMidity++", MB_OK);
 }
@@ -5334,8 +5328,12 @@ void VprintfEditCtlWnd(HWND hwnd, char *fmt, va_list argList)
                  in++;
                  i++;
         }
-        Edit_SetSel(hwnd,-1,-1);
-        Edit_ReplaceSel(hwnd,out);
+
+     {
+         int len = GetWindowTextLength(hwnd);
+         Edit_SetSel(hwnd, len, len);
+         Edit_ReplaceSel(hwnd, out);
+     }
 }
 
 void PrintfEditCtlWnd(HWND hwnd, char *fmt, ...)
@@ -5370,9 +5368,8 @@ void PutsEditCtlWnd(HWND hwnd, char *str)
     i++;
   }
        if(IsWindow(hwnd)){
-               SendMessage(hwnd, WM_SETREDRAW, 0, 0);
-               Edit_SetSel(hwnd,-1,-1);
-               SendMessage(hwnd, WM_SETREDRAW, 1, 0);
+        int len = GetWindowTextLength(hwnd);
+               Edit_SetSel(hwnd, len, len);
                Edit_ReplaceSel(hwnd,out);
        }
 }
index 3cc1f45..ef4d552 100644 (file)
@@ -606,8 +606,7 @@ void w32g_update_playlist_pos(int pos)
                w32g_get_midi_file_info_post(entry);
                {
                        char *title;
-                       volatile LVITEM lvi0;           
-                       int len;
+                       volatile LVITEM lvi0;
                        lvi0.iItem = i;
                        lvi0.iSubItem = 0;
                        lvi0.mask = LVIF_TEXT;
@@ -1258,7 +1257,7 @@ char *w32g_get_playlist_play(int idx)
 #ifdef LISTVIEW_PLAYLIST
 void w32g_copy_playlist(void)
 {
-    int i, num, pos, selnum = 0, next;
+    int i, num, pos, selnum = 0;
     HWND hList;
        PlayListEntry *entry;
 
@@ -1330,11 +1329,10 @@ void w32g_copy_playlist(void)
 
 void w32g_paste_playlist(int uniq, int refine)
 {
-    int i, num, pos, select = 0, selnum = 0, skip = 0;
+    int i, num, pos, select = 0, skip = 0;
     HWND hList;
 
     PlayListEntry *entry;
-    char *title;
     struct midi_file_info *info;
        
     if(!(hList = playlist_box()))
index dbbcd65..78abf6b 100644 (file)
 #include "resample.h"
 #include "mix.h"
 #include "thread.h"
+#include "sfz.h"
 
 #include <tchar.h>
 #include "w32g.h"
 #include "w32g_res.h"
 #include "w32g_utl.h"
+#include "w32g_ut2.h"
 #include "w32g_pref.h"
 ///r
 #ifdef AU_W32
 #include "gogo_a.h"
 #endif
 
+#ifdef AU_FLAC
+#include "flac_a.h"
+#endif
+
 
 /*****************************************************************************************************************************/
 
@@ -153,9 +159,8 @@ static int get_winver(void)
 
 /* TiMidity Win32GUI preference / PropertySheet */
 
-#if !defined(IA_W32G_SYN)
 extern void w32g_restart(void);
-#endif
+
 extern void set_gogo_opts_use_commandline_options(char *commandline);
 
 extern void restore_voices(int save_voices);
@@ -192,7 +197,6 @@ static int DlgOpenOutputDir(char *Dirname, HWND hwnd);
 
 static int vorbisCofigDialog(void);
 static int gogoCofigDialog(void);
-static int flacConfigDialog(void);
 
 static int w32_reset_exe_directory(void)
 {
@@ -660,6 +664,7 @@ extern void TracerWndApplyQuietChannel( ChannelBitMask quietchannels_ );
  * \92\8d\88Ó: MainThread \82©\82ç\82Ì\8cÄ\82Ñ\8fo\82µ\8bÖ\8e~\81A\8aë\8c¯\81I
  */
 extern void OnQuit(void);
+extern void timidity_init_player(void); /* timidity.c */
 
 void PrefSettingApplyReally(void)
 {
@@ -708,6 +713,9 @@ void PrefSettingApplyReally(void)
 #ifdef INT_SYNTH
        init_int_synth();
 #endif // INT_SYNTH
+#ifdef ENABLE_SFZ
+       init_sfz();
+#endif
        initialize_resampler_coeffs();
     timidity_init_player();
        restore_voices(1);
@@ -831,6 +839,9 @@ void reload_cfg(void)
     free_special_patch(-1);
     tmdy_free_config();
     free_soundfonts();
+#ifdef ENABLE_SFZ
+       free_sfz();
+#endif
 #ifdef INT_SYNTH
        free_int_synth();
 #endif // INT_SYNTH
@@ -3456,7 +3467,6 @@ PrefTiMidity2DialogProc(HWND hwnd, UINT uMess, WPARAM wParam, LPARAM lParam)
        static int pref_channel_page;
        static ChannelBitMask channelbitmask;
        int i, j, tmp;
-       const TCHAR **cb_info;
        switch (uMess){
        case WM_INITDIALOG:
                // BANK
index 2ae588e..fe56ec8 100644 (file)
@@ -57,6 +57,5 @@ void InitSoundSpecWnd(HWND hParentWnd);
 void w32g_setup_doc(int idx);
 void w32g_open_doc(int close_if_no_doc);
 void w32g_free_doc(void);
-extern void w32g_free_doc(void);
 
 #endif /* ___W32G_SUBWIN_H_ */
index 024eb16..4cb060b 100755 (executable)
@@ -653,7 +653,7 @@ void TracerWndClear(int lockflag)
        HPEN hPen;
        HBRUSH hBrush;
        HGDIOBJ hgdiobj_hpen, hgdiobj_hbrush;
-       RECT rc;
+       //RECT rc;
 
        if (lockflag) TRACER_LOCK();
        hPen = CreatePen(PS_SOLID, 1, C_BACK);
index c854871..43fc5dc 100755 (executable)
@@ -64,6 +64,7 @@
 #include "readmidi.h"
 #include "output.h"
 #include "controls.h"
+#include "rtsyn.h"
 
 #ifdef WIN32GCC
 WINAPI void InitCommonControls(void);
@@ -73,6 +74,7 @@ WINAPI void InitCommonControls(void);
 #include "w32g_utl.h"
 #include "w32g_pref.h"
 #include "w32g_res.h"
+#include "w32g_int_synth_editor.h"
 
 #ifdef IA_W32G_SYN
 
@@ -633,7 +635,7 @@ static const TCHAR *syn_thread_priority_name_jp[] = {
     TEXT("\83^\83C\83\80\83N\83\8a\83e\83B\83J\83\8b")
 };
 static const TCHAR *syn_thread_priority_name_en[] = {
-       TEXT("idle")
+       TEXT("idle"),
     TEXT("lowest"),
     TEXT("below normal"),
     TEXT("normal"),
@@ -1530,7 +1532,7 @@ int w32g_syn_ctl_pass_playing_list(int n_, char *args_[])
 #endif /* !TWSYNSRV */
                                SetPriorityClass(GetCurrentProcess(), processPriority);
                                SetThreadPriority(w32g_syn.syn_hThread, syn_ThreadPriority);
-                               result = ctl_pass_playing_list2(w32g_syn_port_num, args);
+                               result = ctl->pass_playing_list(w32g_syn_port_num, args);
                                SetPriorityClass(GetCurrentProcess(), NORMAL_PRIORITY_CLASS);
                                SetThreadPriority(w32g_syn.syn_hThread, THREAD_PRIORITY_NORMAL);
                                if (result == 2) {
@@ -1711,7 +1713,7 @@ static void VersionWnd(HWND hParentWnd)
 {
        char VersionText[2024];
   sprintf(VersionText,
-"TiMidity++ %s%s" NLS NLS
+"TiMidity++ %s%s %s" NLS NLS
 "TiMidity-0.2i by Tuukka Toivonen <tt@cgs.fi>." NLS
 "TiMidity Win32 version by Davide Moretti <dave@rimini.com>." NLS
 "TiMidity Windows 95 port by Nicolas Witczak." NLS
@@ -1719,7 +1721,7 @@ static void VersionWnd(HWND hParentWnd)
 "Twsynth GUI by Daisuke Aoki <dai@y7.net>." NLS
 " Japanese menu, dialog, etc by Saito <timidity@flashmail.com>." NLS
 "TiMidity++ by Masanao Izumo <mo@goice.co.jp>." NLS
-, (strcmp(timidity_version, "current")) ? "version " : "", timidity_version);
+, (strcmp(timidity_version, "current")) ? "version " : "", timidity_version, arch_string);
        MessageBoxA(hParentWnd, VersionText, "Version", MB_OK);
 }
 
@@ -1727,7 +1729,7 @@ static void TiMidityWnd(HWND hParentWnd)
 {
        char TiMidityText[2024];
   sprintf(TiMidityText,
-" TiMidity++ %s%s -- MIDI to WAVE converter and player" NLS
+" TiMidity++ %s%s %s -- MIDI to WAVE converter and player" NLS
 " Copyright (C) 1999-2002 Masanao Izumo <mo@goice.co.jp>" NLS
 " Copyright (C) 1995 Tuukka Toivonen <tt@cgs.fi>" NLS
 NLS
@@ -1749,7 +1751,7 @@ NLS
 " along with this program; if not, write to the Free Software" NLS
 " Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA" NLS
 ,
-(strcmp(timidity_version, "current")) ? "version " : "", timidity_version
+(strcmp(timidity_version, "current")) ? "version " : "", timidity_version, arch_string
        );
        MessageBoxA(hParentWnd, TiMidityText, "TiMidity", MB_OK);
 }
@@ -2061,6 +2063,44 @@ static void ConsoleWndVerbosityApplyIncDec(int num)
 
 #endif /* HAVE_SYN_CONSOLE */
 
+#ifdef IA_W32G_SYN
+static int winplaymidi_sleep_level = 2;
+static DWORD winplaymidi_active_start_time = 0;
+
+void winplaymidi(void) {
+
+    if (winplaymidi_sleep_level < 1) {
+        winplaymidi_sleep_level = 1;
+    }
+    if (0 != rtsyn_buf_check()) {
+        winplaymidi_sleep_level = 0;
+    }
+    rtsyn_play_some_data();
+    if (winplaymidi_sleep_level == 1) {
+        DWORD ct = GetCurrentTime();
+        if (winplaymidi_active_start_time == 0 || ct < winplaymidi_active_start_time) {
+            winplaymidi_active_start_time = ct;
+        }
+        else if (ct - winplaymidi_active_start_time > 60000) {
+            winplaymidi_sleep_level = 2;
+        }
+    }
+    else if (winplaymidi_sleep_level == 0) {
+        winplaymidi_active_start_time = 0;
+    }
+
+    rtsyn_play_calculate();
+
+    if (winplaymidi_sleep_level >= 2) {
+        Sleep(100);
+    }
+    else if (winplaymidi_sleep_level > 0) {
+        Sleep(1);
+    }
+}
+#endif /* IA_W32G_SYN */
+
+
 #ifdef HAVE_SYN_SOUNDSPEC
 
 // ***************************************************************************
index dcf8211..7d236fb 100644 (file)
@@ -156,4 +156,6 @@ extern int INILoadConsoleWnd(void);
 extern int INISaveSoundSpecWnd(void);
 extern int INILoadSoundSpecWnd(void);
 
+extern int PauseOldTiMidity(void);
+
 #endif /* __W32G_UT2_H__ */
index 07c494c..0305121 100644 (file)
@@ -53,6 +53,7 @@
 #endif /* SUPPORT_SOUNDSPEC */
 #include "wrd.h"
 #include "w32g.h"
+#include "w32g_pref.h"
 #include "w32g_utl.h"
 #ifdef HAVE_SYS_STAT_H
 #include <sys/stat.h>
@@ -112,10 +113,6 @@ extern int AutoloadPlaylist;
 extern int AutosavePlaylist;
 extern int PosSizeSave;
 extern unsigned char opt_normal_chorus_plus; 
-#ifdef AU_LAME
-extern void lame_ConfigDialogInfoLoadINI();
-extern void lame_ConfigDialogInfoSaveINI();
-#endif
 
 ///r
 //char DefaultPlaylistName[PLAYLIST_MAX][] = {"default.pls"};
index 4f1ab28..75372a4 100755 (executable)
 #include "wsgtk_main.h"
 #endif /* USE_GTK_GUI */
 
-#ifndef __W32__
 #include <stdio.h>
+#ifndef __W32__
 #include <termios.h>
 //#include <term.h>
 #include <unistd.h>
 #endif /* __W32__ */
 
 #if defined(__W32__)
+#include <conio.h>
+#define kbhit _kbhit
 #define HAVE_DOS_KEYBOARD 1
 #endif /* __W32__ */
 
@@ -280,7 +282,7 @@ static int ctl_pass_playing_list(int n, char *args[])
 static int ctl_pass_playing_list(int n, char *args[])
 #else
 // 0: OK, 2: Require to reset.
-int ctl_pass_playing_list2(int n, char *args[])
+static int ctl_pass_playing_list2(int n, char *args[])
 #endif
 {
   int i, j, devnum, devok;
@@ -546,41 +548,6 @@ static void doit(void)
 #endif /* !IA_W32G_SYN */
 
 
-#ifdef IA_W32G_SYN
-static int winplaymidi_sleep_level = 2;
-static DWORD winplaymidi_active_start_time = 0;
-
-void winplaymidi(void) {
-
-  if (winplaymidi_sleep_level < 1) {
-    winplaymidi_sleep_level = 1;
-  }
-  if (0 != rtsyn_buf_check()) {
-      winplaymidi_sleep_level = 0;
-  }
-  rtsyn_play_some_data();
-  if (winplaymidi_sleep_level == 1) {
-    DWORD ct = GetCurrentTime();
-    if (winplaymidi_active_start_time == 0 || ct < winplaymidi_active_start_time) {
-      winplaymidi_active_start_time = ct;
-    } else if (ct - winplaymidi_active_start_time > 60000) {
-      winplaymidi_sleep_level = 2;
-    }
-  } else if (winplaymidi_sleep_level == 0) {
-    winplaymidi_active_start_time = 0;
-  }
-
-  rtsyn_play_calculate();
-
-  if (winplaymidi_sleep_level >= 2) {
-    Sleep(100);
-  } else if (winplaymidi_sleep_level > 0) {
-    Sleep(1);
-  }
-}
-#endif /* IA_W32G_SYN */
-
-
 /*
  * interface_<id>_loader();
  */
index 916c877..fe82fd6 100644 (file)
@@ -62,6 +62,7 @@
 #include <windows.h>
 #endif
 #include "w32g_dib.h"
+#include "w32g_mag.h"
 #include "w32g_wrd.h"
 
 static int wrdt_open(char *dummy);
index bb3396f..2b05687 100644 (file)
@@ -6,6 +6,8 @@
 # define UNICODE
 #endif
 
+#include <stdio.h>
+#include <stdlib.h>
 #include <windows.h>
 #undef MOUSE_MOVED
 #include <curspriv.h>
index defc126..2de8109 100644 (file)
 
 /* callback routines */
 static void CALLBACK winmm_in_callback(HMIDIIN hMidiIn,
-                                       WORD wMsg, DWORD_PTR dwInstance, 
-                                       DWORD_PTR dwParam1, DWORD_PTR dwParam2);
+                                       WORD wMsg, DWORD dwInstance, 
+                                       DWORD dwParam1, DWORD dwParam2);
 static void CALLBACK winmm_streamout_callback(HMIDIOUT hmo, UINT wMsg,
-                                              DWORD_PTR dwInstance, DWORD_PTR dwParam1, 
-                                              DWORD_PTR dwParam2);
+                                              DWORD dwInstance, DWORD dwParam1, 
+                                              DWORD dwParam2);
 #ifdef USE_SYSEX_BUFFERS
 static void CALLBACK winmm_out_callback(HMIDIOUT hmo, UINT wMsg,
-                                        DWORD_PTR dwInstance, DWORD_PTR dwParam1, 
-                                        DWORD_PTR dwParam2);
+                                        DWORD dwInstance, DWORD dwParam1, 
+                                        DWORD dwParam2);
 #endif
 
 extern pm_fns_node pm_winmm_in_dictionary;
index 17675bd..1c0c65e 100644 (file)
@@ -3,6 +3,7 @@
 
 #include "porttime.h"
 #include "windows.h"
+#include <mmsystem.h>
 #include "time.h"
 
 
index abd35f0..f50aeb4 100644 (file)
@@ -6,6 +6,7 @@
 // SSE is enabled.
 #ifndef _USE_SSE
 #  define USE_ALLOCA
+#  define alloca _alloca
 #endif
 
 /* Default to floating point */
index 82ba016..968b2f1 100644 (file)
@@ -411,7 +411,7 @@ extern const SpeexMode * const speex_mode_list[SPEEX_NB_MODES];
 /** Obtain one of the modes available */
 const SpeexMode * speex_lib_get_mode (int mode);
 
-#ifndef WIN32
+#ifndef _WIN32
 /* We actually override the function in the narrowband case so that we can avoid linking in the wideband stuff */
 #define speex_lib_get_mode(mode) ((mode)==SPEEX_MODEID_NB ? &speex_nb_mode : speex_lib_get_mode (mode))
 #endif
index 25b304c..e10b02e 100644 (file)
@@ -193,30 +193,12 @@ static int au_output_open(const char *fname, const char *comment)
 
 static int auto_au_output_open(const char *input_filename)
 {
-  char *output_filename = (char *)safe_malloc(strlen(input_filename) + 5);
-  char *ext, *p;
-
-  strcpy(output_filename, input_filename);
-  if((ext = strrchr(output_filename, '.')) == NULL)
-    ext = output_filename + strlen(output_filename);
-  else {
-    /* strip ".gz" */
-    if(strcasecmp(ext, ".gz") == 0) {
-      *ext = '\0';
-      if((ext = strrchr(output_filename, '.')) == NULL)
-       ext = output_filename + strlen(output_filename);
-    }
-  }
+  char *output_filename = create_auto_output_name(input_filename, ".au", NULL, 0);
 
-  /* replace '.' and '#' before ext */
-  for(p = output_filename; p < ext; p++)
-    if(*p == '.' || *p == '#')
-      *p = '_';
+  if (!output_filename) {
+    return -1;
+  }
 
-  if(*ext && isupper(*(ext + 1)))
-    strcpy(ext, ".AU");
-  else
-    strcpy(ext, ".au");
   if(au_output_open(output_filename, input_filename) == -1) {
     free(output_filename);
     return -1;
index 55783a7..15df714 100644 (file)
@@ -177,6 +177,7 @@ extern void code_convert(char *in, char *out, size_t outsiz,
 extern void safe_exit(int status);
 
 extern const char *timidity_version;
+extern const char *arch_string; /* optcode.c */
 extern MBlockList tmpbuffer;
 extern char *output_text_code;
 
index baa1a5e..5c0b526 100644 (file)
@@ -39,7 +39,7 @@ extern ControlMode mac_control_mode;
 #elif defined(IA_W32GUI)
 extern ControlMode w32gui_control_mode;
 #define DEFAULT_CONTROL_MODE &w32gui_control_mode
-#elif defined(IA_W32G_SYN)
+#elif defined(IA_W32G_SYN) || defined(IA_WINSYN)
 extern ControlMode winsyn_control_mode;
 #define DEFAULT_CONTROL_MODE &winsyn_control_mode
 #else
@@ -209,7 +209,7 @@ ControlMode *ctl_list[]={
 #ifdef IA_W32G_SYN
   &winsyn_control_mode,
 #endif /* IA_W32GUI */
-#if !defined(__MACOS__)  && !defined(IA_W32GUI) && !defined(IA_W32G_SYN)
+#if !defined(__MACOS__)  && !defined(IA_W32GUI) && !defined(IA_W32G_SYN) && !defined(IA_WINSYN)
        &dumb_control_mode,
 #endif
 #ifdef IA_PLUGIN
index 00f6160..5b5e6b1 100644 (file)
@@ -7028,8 +7028,8 @@ static void do_reverb_ex_mod_chST(DATA_T *buf, int32 count, InfoReverbEX *info)
                _mm_storeu_pd(&info->mcount[i][REV_EX_RV_L1], vc[1]);
                vr[0] = _mm_add_pd(vc[0], _mm_loadu_pd(&info->mphase[i][REV_EX_ER_L1])); // mcount+mphase
                vr[1] = _mm_add_pd(vc[1], _mm_loadu_pd(&info->mphase[i][REV_EX_RV_L1])); // mcount+mphase
-               vd[0] = _mm_set_pd(lookup2_sine_p(vr[0].m128d_f64[1]), lookup2_sine_p(vr[0].m128d_f64[0])); // lookup2_sine_p(mc)
-               vd[1] = _mm_set_pd(lookup2_sine_p(vr[1].m128d_f64[1]), lookup2_sine_p(vr[1].m128d_f64[0])); // lookup2_sine_p(mc)       
+               vd[0] = _mm_set_pd(lookup2_sine_p(MM_EXTRACT_F64(vr[0],1)), lookup2_sine_p(MM_EXTRACT_F64(vr[0],0))); // lookup2_sine_p(mc)
+               vd[1] = _mm_set_pd(lookup2_sine_p(MM_EXTRACT_F64(vr[1],1)), lookup2_sine_p(MM_EXTRACT_F64(vr[1],0))); // lookup2_sine_p(mc)     
                vd[0] = _mm_mul_pd(_mm_loadu_pd(&info->mdepth[i][REV_EX_ER_L1]), vd[0]); // mdepth* sine
                vd[1] = _mm_mul_pd(_mm_loadu_pd(&info->mdepth[i][REV_EX_RV_L1]), vd[1]); // mdepth* sine
                vfp[0] = _mm_sub_pd(_mm_sub_pd(vmi, _mm_loadu_pd(&info->mdelay[i][REV_EX_ER_L1])), vd[0]); // mindex-mdelay-mdepth
@@ -7045,10 +7045,10 @@ static void do_reverb_ex_mod_chST(DATA_T *buf, int32 count, InfoReverbEX *info)
                vfp[0] = _mm_sub_pd(vfp[0], _mm_cvtepi32_pd(vindex[0])); // fp-vindex
                vfp[1] = _mm_sub_pd(vfp[1], _mm_cvtepi32_pd(vindex[1])); // fp-vindex
 #endif
-               vtmp[0] = _mm_loadu_pd(&info->buf[i][REV_EX_ER_L1][vindex[0].m128i_i32[0]]); // v1v2
-               vtmp[1] = _mm_loadu_pd(&info->buf[i][REV_EX_ER_R1][vindex[0].m128i_i32[1]]); // v1v2
-               vtmp[2] = _mm_loadu_pd(&info->buf[i][REV_EX_RV_L1][vindex[1].m128i_i32[0]]); // v1v2
-               vtmp[3] = _mm_loadu_pd(&info->buf[i][REV_EX_RV_R1][vindex[1].m128i_i32[1]]); // v1v2
+               vtmp[0] = _mm_loadu_pd(&info->buf[i][REV_EX_ER_L1][MM_EXTRACT_I32(vindex[0],0)]); // v1v2
+               vtmp[1] = _mm_loadu_pd(&info->buf[i][REV_EX_ER_R1][MM_EXTRACT_I32(vindex[0],1)]); // v1v2
+               vtmp[2] = _mm_loadu_pd(&info->buf[i][REV_EX_RV_L1][MM_EXTRACT_I32(vindex[1],0)]); // v1v2
+               vtmp[3] = _mm_loadu_pd(&info->buf[i][REV_EX_RV_R1][MM_EXTRACT_I32(vindex[1],1)]); // v1v2
                vv1[0] = _mm_shuffle_pd(vtmp[0], vtmp[1], 0x0);
                vv1[1] = _mm_shuffle_pd(vtmp[2], vtmp[3], 0x0);
                vv2[0] = _mm_shuffle_pd(vtmp[0], vtmp[1], 0x3);
@@ -7151,8 +7151,8 @@ static void do_reverb_ex_mod_chMS(DATA_T *buf, int32 count, InfoReverbEX *info)
                _mm_storeu_pd(&info->mcount[i][REV_EX_RV_L1], vc[1]);
                vr[0] = _mm_add_pd(vc[0], _mm_loadu_pd(&info->mphase[i][REV_EX_ER_L1])); // mcount+mphase
                vr[1] = _mm_add_pd(vc[1], _mm_loadu_pd(&info->mphase[i][REV_EX_RV_L1])); // mcount+mphase
-               vd[0] = _mm_set_pd(lookup2_sine_p(vr[0].m128d_f64[1]), lookup2_sine_p(vr[0].m128d_f64[0])); // lookup2_sine_p(mc)
-               vd[1] = _mm_set_pd(lookup2_sine_p(vr[1].m128d_f64[1]), lookup2_sine_p(vr[1].m128d_f64[0])); // lookup2_sine_p(mc)       
+               vd[0] = _mm_set_pd(lookup2_sine_p(MM_EXTRACT_F64(vr[0],1)), lookup2_sine_p(MM_EXTRACT_F64(vr[0],0))); // lookup2_sine_p(mc)
+               vd[1] = _mm_set_pd(lookup2_sine_p(MM_EXTRACT_F64(vr[1],1)), lookup2_sine_p(MM_EXTRACT_F64(vr[1],0))); // lookup2_sine_p(mc)     
                vd[0] = _mm_mul_pd(_mm_loadu_pd(&info->mdepth[i][REV_EX_ER_L1]), vd[0]); // mdepth* sine
                vd[1] = _mm_mul_pd(_mm_loadu_pd(&info->mdepth[i][REV_EX_RV_L1]), vd[1]); // mdepth* sine
                vfp[0] = _mm_sub_pd(_mm_sub_pd(vmi, _mm_loadu_pd(&info->mdelay[i][REV_EX_ER_L1])), vd[0]); // mindex-mdelay-mdepth
@@ -7168,10 +7168,10 @@ static void do_reverb_ex_mod_chMS(DATA_T *buf, int32 count, InfoReverbEX *info)
                vfp[0] = _mm_sub_pd(vfp[0], _mm_cvtepi32_pd(vindex[0])); // fp-vindex
                vfp[1] = _mm_sub_pd(vfp[1], _mm_cvtepi32_pd(vindex[1])); // fp-vindex
 #endif
-               vtmp[0] = _mm_loadu_pd(&info->buf[i][REV_EX_ER_L1][vindex[0].m128i_i32[0]]); // v1v2
-               vtmp[1] = _mm_loadu_pd(&info->buf[i][REV_EX_ER_R1][vindex[0].m128i_i32[1]]); // v1v2
-               vtmp[2] = _mm_loadu_pd(&info->buf[i][REV_EX_RV_L1][vindex[1].m128i_i32[0]]); // v1v2
-               vtmp[3] = _mm_loadu_pd(&info->buf[i][REV_EX_RV_R1][vindex[1].m128i_i32[1]]); // v1v2
+               vtmp[0] = _mm_loadu_pd(&info->buf[i][REV_EX_ER_L1][MM_EXTRACT_I32(vindex[0],0)]); // v1v2
+               vtmp[1] = _mm_loadu_pd(&info->buf[i][REV_EX_ER_R1][MM_EXTRACT_I32(vindex[0],1)]); // v1v2
+               vtmp[2] = _mm_loadu_pd(&info->buf[i][REV_EX_RV_L1][MM_EXTRACT_I32(vindex[1],0)]); // v1v2
+               vtmp[3] = _mm_loadu_pd(&info->buf[i][REV_EX_RV_R1][MM_EXTRACT_I32(vindex[1],1)]); // v1v2
                vv1[0] = _mm_shuffle_pd(vtmp[0], vtmp[1], 0x0);
                vv1[1] = _mm_shuffle_pd(vtmp[2], vtmp[3], 0x0);
                vv2[0] = _mm_shuffle_pd(vtmp[0], vtmp[1], 0x3);
@@ -7278,8 +7278,8 @@ static void do_reverb_ex_mod_chST_ap8(DATA_T *buf, int32 count, InfoReverbEX *in
                _mm_storeu_pd(&info->mcount[i][REV_EX_RV_L1], vc[1]);
                vr[0] = _mm_add_pd(vc[0], _mm_loadu_pd(&info->mphase[i][REV_EX_ER_L1])); // mcount+mphase
                vr[1] = _mm_add_pd(vc[1], _mm_loadu_pd(&info->mphase[i][REV_EX_RV_L1])); // mcount+mphase
-               vd[0] = _mm_set_pd(lookup2_sine_p(vr[0].m128d_f64[1]), lookup2_sine_p(vr[0].m128d_f64[0])); // lookup2_sine_p(mc)
-               vd[1] = _mm_set_pd(lookup2_sine_p(vr[1].m128d_f64[1]), lookup2_sine_p(vr[1].m128d_f64[0])); // lookup2_sine_p(mc)       
+               vd[0] = _mm_set_pd(lookup2_sine_p(MM_EXTRACT_F64(vr[0],1)), lookup2_sine_p(MM_EXTRACT_F64(vr[0],0))); // lookup2_sine_p(mc)
+               vd[1] = _mm_set_pd(lookup2_sine_p(MM_EXTRACT_F64(vr[1],1)), lookup2_sine_p(MM_EXTRACT_F64(vr[1],0))); // lookup2_sine_p(mc)     
                vd[0] = _mm_mul_pd(_mm_loadu_pd(&info->mdepth[i][REV_EX_ER_L1]), vd[0]); // mdepth* sine
                vd[1] = _mm_mul_pd(_mm_loadu_pd(&info->mdepth[i][REV_EX_RV_L1]), vd[1]); // mdepth* sine
                vfp[0] = _mm_sub_pd(_mm_sub_pd(vmi, _mm_loadu_pd(&info->mdelay[i][REV_EX_ER_L1])), vd[0]); // mindex-mdelay-mdepth
@@ -7295,10 +7295,10 @@ static void do_reverb_ex_mod_chST_ap8(DATA_T *buf, int32 count, InfoReverbEX *in
                vfp[0] = _mm_sub_pd(vfp[0], _mm_cvtepi32_pd(vindex[0])); // fp-vindex
                vfp[1] = _mm_sub_pd(vfp[1], _mm_cvtepi32_pd(vindex[1])); // fp-vindex
 #endif
-               vtmp[0] = _mm_loadu_pd(&info->buf[i][REV_EX_ER_L1][vindex[0].m128i_i32[0]]); // v1v2
-               vtmp[1] = _mm_loadu_pd(&info->buf[i][REV_EX_ER_R1][vindex[0].m128i_i32[1]]); // v1v2
-               vtmp[2] = _mm_loadu_pd(&info->buf[i][REV_EX_RV_L1][vindex[1].m128i_i32[0]]); // v1v2
-               vtmp[3] = _mm_loadu_pd(&info->buf[i][REV_EX_RV_R1][vindex[1].m128i_i32[1]]); // v1v2
+               vtmp[0] = _mm_loadu_pd(&info->buf[i][REV_EX_ER_L1][MM_EXTRACT_I32(vindex[0],0)]); // v1v2
+               vtmp[1] = _mm_loadu_pd(&info->buf[i][REV_EX_ER_R1][MM_EXTRACT_I32(vindex[0],1)]); // v1v2
+               vtmp[2] = _mm_loadu_pd(&info->buf[i][REV_EX_RV_L1][MM_EXTRACT_I32(vindex[1],0)]); // v1v2
+               vtmp[3] = _mm_loadu_pd(&info->buf[i][REV_EX_RV_R1][MM_EXTRACT_I32(vindex[1],1)]); // v1v2
                vv1[0] = _mm_shuffle_pd(vtmp[0], vtmp[1], 0x0);
                vv1[1] = _mm_shuffle_pd(vtmp[2], vtmp[3], 0x0);
                vv2[0] = _mm_shuffle_pd(vtmp[0], vtmp[1], 0x3);
@@ -7368,8 +7368,8 @@ static void do_reverb_ex_mod_chST_ap8(DATA_T *buf, int32 count, InfoReverbEX *in
                _mm_storeu_pd(&info->acount[i][REV_EX_RV_L1], vc[1]);
                vr[0] = _mm_add_pd(vc[0], _mm_loadu_pd(&info->aphase[i][REV_EX_ER_L1])); // count+phase
                vr[1] = _mm_add_pd(vc[1], _mm_loadu_pd(&info->aphase[i][REV_EX_RV_L1])); // count+phase
-               vd[0] = _mm_set_pd(lookup2_sine_p(vr[0].m128d_f64[1]), lookup2_sine_p(vr[0].m128d_f64[0])); // lookup2_sine_p(count)
-               vd[1] = _mm_set_pd(lookup2_sine_p(vr[1].m128d_f64[1]), lookup2_sine_p(vr[1].m128d_f64[0])); // lookup2_sine_p(cuont)    
+               vd[0] = _mm_set_pd(lookup2_sine_p(MM_EXTRACT_F64(vr[0],1)), lookup2_sine_p(MM_EXTRACT_F64(vr[0],0))); // lookup2_sine_p(count)
+               vd[1] = _mm_set_pd(lookup2_sine_p(MM_EXTRACT_F64(vr[1],1)), lookup2_sine_p(MM_EXTRACT_F64(vr[1],0))); // lookup2_sine_p(cuont)  
                vd[0] = _mm_mul_pd(_mm_loadu_pd(&info->adepth[i][REV_EX_ER_L1]), vd[0]); // depth* sine
                vd[1] = _mm_mul_pd(_mm_loadu_pd(&info->adepth[i][REV_EX_RV_L1]), vd[1]); // depth* sine
                vfp[0] = _mm_sub_pd(_mm_sub_pd(vai, _mm_loadu_pd(&info->adelay[i][REV_EX_ER_L1])), vd[0]); // index-delay-depth
@@ -7385,10 +7385,10 @@ static void do_reverb_ex_mod_chST_ap8(DATA_T *buf, int32 count, InfoReverbEX *in
                vfp[0] = _mm_sub_pd(vfp[0], _mm_cvtepi32_pd(vindex[0])); // fp-vindex
                vfp[1] = _mm_sub_pd(vfp[1], _mm_cvtepi32_pd(vindex[1])); // fp-vindex
 #endif
-               vtmp[0] = _mm_loadu_pd(&info->abuf[REV_EX_ER_L1][vindex[0].m128i_i32[0]]); // v1v2
-               vtmp[1] = _mm_loadu_pd(&info->abuf[REV_EX_ER_R1][vindex[0].m128i_i32[1]]); // v1v2
-               vtmp[2] = _mm_loadu_pd(&info->abuf[REV_EX_RV_L1][vindex[1].m128i_i32[0]]); // v1v2
-               vtmp[3] = _mm_loadu_pd(&info->abuf[REV_EX_RV_R1][vindex[1].m128i_i32[1]]); // v1v2
+               vtmp[0] = _mm_loadu_pd(&info->abuf[REV_EX_ER_L1][MM_EXTRACT_I32(vindex[0],0)]); // v1v2
+               vtmp[1] = _mm_loadu_pd(&info->abuf[REV_EX_ER_R1][MM_EXTRACT_I32(vindex[0],1)]); // v1v2
+               vtmp[2] = _mm_loadu_pd(&info->abuf[REV_EX_RV_L1][MM_EXTRACT_I32(vindex[1],0)]); // v1v2
+               vtmp[3] = _mm_loadu_pd(&info->abuf[REV_EX_RV_R1][MM_EXTRACT_I32(vindex[1],1)]); // v1v2
                vv1[0] = _mm_shuffle_pd(vtmp[0], vtmp[1], 0x0);
                vv1[1] = _mm_shuffle_pd(vtmp[2], vtmp[3], 0x0);
                vv2[0] = _mm_shuffle_pd(vtmp[0], vtmp[1], 0x3);
@@ -7466,8 +7466,8 @@ static void do_reverb_ex_mod_chMS_ap8(DATA_T *buf, int32 count, InfoReverbEX *in
                _mm_storeu_pd(&info->mcount[i][REV_EX_RV_L1], vc[1]);
                vr[0] = _mm_add_pd(vc[0], _mm_loadu_pd(&info->mphase[i][REV_EX_ER_L1])); // mcount+mphase
                vr[1] = _mm_add_pd(vc[1], _mm_loadu_pd(&info->mphase[i][REV_EX_RV_L1])); // mcount+mphase
-               vd[0] = _mm_set_pd(lookup2_sine_p(vr[0].m128d_f64[1]), lookup2_sine_p(vr[0].m128d_f64[0])); // lookup2_sine_p(mc)
-               vd[1] = _mm_set_pd(lookup2_sine_p(vr[1].m128d_f64[1]), lookup2_sine_p(vr[1].m128d_f64[0])); // lookup2_sine_p(mc)       
+               vd[0] = _mm_set_pd(lookup2_sine_p(MM_EXTRACT_F64(vr[0],1)), lookup2_sine_p(MM_EXTRACT_F64(vr[0],0))); // lookup2_sine_p(mc)
+               vd[1] = _mm_set_pd(lookup2_sine_p(MM_EXTRACT_F64(vr[1],1)), lookup2_sine_p(MM_EXTRACT_F64(vr[1],0))); // lookup2_sine_p(mc)     
                vd[0] = _mm_mul_pd(_mm_loadu_pd(&info->mdepth[i][REV_EX_ER_L1]), vd[0]); // mdepth* sine
                vd[1] = _mm_mul_pd(_mm_loadu_pd(&info->mdepth[i][REV_EX_RV_L1]), vd[1]); // mdepth* sine
                vfp[0] = _mm_sub_pd(_mm_sub_pd(vmi, _mm_loadu_pd(&info->mdelay[i][REV_EX_ER_L1])), vd[0]); // mindex-mdelay-mdepth
@@ -7483,10 +7483,10 @@ static void do_reverb_ex_mod_chMS_ap8(DATA_T *buf, int32 count, InfoReverbEX *in
                vfp[0] = _mm_sub_pd(vfp[0], _mm_cvtepi32_pd(vindex[0])); // fp-vindex
                vfp[1] = _mm_sub_pd(vfp[1], _mm_cvtepi32_pd(vindex[1])); // fp-vindex
 #endif
-               vtmp[0] = _mm_loadu_pd(&info->buf[i][REV_EX_ER_L1][vindex[0].m128i_i32[0]]); // v1v2
-               vtmp[1] = _mm_loadu_pd(&info->buf[i][REV_EX_ER_R1][vindex[0].m128i_i32[1]]); // v1v2
-               vtmp[2] = _mm_loadu_pd(&info->buf[i][REV_EX_RV_L1][vindex[1].m128i_i32[0]]); // v1v2
-               vtmp[3] = _mm_loadu_pd(&info->buf[i][REV_EX_RV_R1][vindex[1].m128i_i32[1]]); // v1v2
+               vtmp[0] = _mm_loadu_pd(&info->buf[i][REV_EX_ER_L1][MM_EXTRACT_I32(vindex[0],0)]); // v1v2
+               vtmp[1] = _mm_loadu_pd(&info->buf[i][REV_EX_ER_R1][MM_EXTRACT_I32(vindex[0],1)]); // v1v2
+               vtmp[2] = _mm_loadu_pd(&info->buf[i][REV_EX_RV_L1][MM_EXTRACT_I32(vindex[1],0)]); // v1v2
+               vtmp[3] = _mm_loadu_pd(&info->buf[i][REV_EX_RV_R1][MM_EXTRACT_I32(vindex[1],1)]); // v1v2
                vv1[0] = _mm_shuffle_pd(vtmp[0], vtmp[1], 0x0);
                vv1[1] = _mm_shuffle_pd(vtmp[2], vtmp[3], 0x0);
                vv2[0] = _mm_shuffle_pd(vtmp[0], vtmp[1], 0x3);
@@ -7555,8 +7555,8 @@ static void do_reverb_ex_mod_chMS_ap8(DATA_T *buf, int32 count, InfoReverbEX *in
                _mm_storeu_pd(&info->acount[i][REV_EX_RV_L1], vc[1]);
                vr[0] = _mm_add_pd(vc[0], _mm_loadu_pd(&info->aphase[i][REV_EX_ER_L1])); // count+phase
                vr[1] = _mm_add_pd(vc[1], _mm_loadu_pd(&info->aphase[i][REV_EX_RV_L1])); // count+phase
-               vd[0] = _mm_set_pd(lookup2_sine_p(vr[0].m128d_f64[1]), lookup2_sine_p(vr[0].m128d_f64[0])); // lookup2_sine_p(count)
-               vd[1] = _mm_set_pd(lookup2_sine_p(vr[1].m128d_f64[1]), lookup2_sine_p(vr[1].m128d_f64[0])); // lookup2_sine_p(cuont)    
+               vd[0] = _mm_set_pd(lookup2_sine_p(MM_EXTRACT_F64(vr[0],1)), lookup2_sine_p(MM_EXTRACT_F64(vr[0],0))); // lookup2_sine_p(count)
+               vd[1] = _mm_set_pd(lookup2_sine_p(MM_EXTRACT_F64(vr[1],1)), lookup2_sine_p(MM_EXTRACT_F64(vr[1],0))); // lookup2_sine_p(cuont)  
                vd[0] = _mm_mul_pd(_mm_loadu_pd(&info->adepth[i][REV_EX_ER_L1]), vd[0]); // depth* sine
                vd[1] = _mm_mul_pd(_mm_loadu_pd(&info->adepth[i][REV_EX_RV_L1]), vd[1]); // depth* sine
                vfp[0] = _mm_sub_pd(_mm_sub_pd(vai, _mm_loadu_pd(&info->adelay[i][REV_EX_ER_L1])), vd[0]); // index-delay-depth
@@ -7572,10 +7572,10 @@ static void do_reverb_ex_mod_chMS_ap8(DATA_T *buf, int32 count, InfoReverbEX *in
                vfp[0] = _mm_sub_pd(vfp[0], _mm_cvtepi32_pd(vindex[0])); // fp-vindex
                vfp[1] = _mm_sub_pd(vfp[1], _mm_cvtepi32_pd(vindex[1])); // fp-vindex
 #endif
-               vtmp[0] = _mm_loadu_pd(&info->abuf[REV_EX_ER_L1][vindex[0].m128i_i32[0]]); // v1v2
-               vtmp[1] = _mm_loadu_pd(&info->abuf[REV_EX_ER_R1][vindex[0].m128i_i32[1]]); // v1v2
-               vtmp[2] = _mm_loadu_pd(&info->abuf[REV_EX_RV_L1][vindex[1].m128i_i32[0]]); // v1v2
-               vtmp[3] = _mm_loadu_pd(&info->abuf[REV_EX_RV_R1][vindex[1].m128i_i32[1]]); // v1v2
+               vtmp[0] = _mm_loadu_pd(&info->abuf[REV_EX_ER_L1][MM_EXTRACT_I32(vindex[0],0)]); // v1v2
+               vtmp[1] = _mm_loadu_pd(&info->abuf[REV_EX_ER_R1][MM_EXTRACT_I32(vindex[0],1)]); // v1v2
+               vtmp[2] = _mm_loadu_pd(&info->abuf[REV_EX_RV_L1][MM_EXTRACT_I32(vindex[1],0)]); // v1v2
+               vtmp[3] = _mm_loadu_pd(&info->abuf[REV_EX_RV_R1][MM_EXTRACT_I32(vindex[1],1)]); // v1v2
                vv1[0] = _mm_shuffle_pd(vtmp[0], vtmp[1], 0x0);
                vv1[1] = _mm_shuffle_pd(vtmp[2], vtmp[3], 0x0);
                vv2[0] = _mm_shuffle_pd(vtmp[0], vtmp[1], 0x3);
@@ -11827,10 +11827,10 @@ static inline void do_pitch_shifter_core(DATA_T *buf, InfoPitchShifter_core *inf
                vfp[0] = _mm_sub_pd(vfp[0], _mm_cvtepi32_pd(vindex[0])); // fp-vindex
                vfp[1] = _mm_sub_pd(vfp[1], _mm_cvtepi32_pd(vindex[1])); // fp-vindex
 #endif
-               vin[0] = _mm_loadu_pd(&info->ptr[vindex[0].m128i_i32[0]]); // v1v2
-               vin[1] = _mm_loadu_pd(&info->ptr[vindex[0].m128i_i32[1]]); // v1v2
-               vin[2] = _mm_loadu_pd(&info->ptr[vindex[1].m128i_i32[0]]); // v1v2
-               vin[3] = _mm_loadu_pd(&info->ptr[vindex[1].m128i_i32[1]]); // v1v2
+               vin[0] = _mm_loadu_pd(&info->ptr[MM_EXTRACT_I32(vindex[0],0)]); // v1v2
+               vin[1] = _mm_loadu_pd(&info->ptr[MM_EXTRACT_I32(vindex[0],1)]); // v1v2
+               vin[2] = _mm_loadu_pd(&info->ptr[MM_EXTRACT_I32(vindex[1],0)]); // v1v2
+               vin[3] = _mm_loadu_pd(&info->ptr[MM_EXTRACT_I32(vindex[1],1)]); // v1v2
                vv1[0] = _mm_shuffle_pd(vin[0], vin[1], 0x0); // v1v1
                vv1[1] = _mm_shuffle_pd(vin[2], vin[3], 0x0); // v1v1
                vv2[0] = _mm_shuffle_pd(vin[0], vin[1], 0x3); // v2v2
index d8b57ce..a212f56 100644 (file)
 #include "playmidi.h"
 #include "readmidi.h"
 #include "miditrace.h"
+#include "flac_a.h"
 
 #ifdef __W32G__
 #include "w32g.h"
+#include "w32g_pref.h"
 #endif /* __W32G__ */
 
 static int open_output(void); /* 0=success, 1=warning, -1=fatal error */
@@ -868,7 +870,7 @@ static int open_output(void)
   int include_enc, exclude_enc;
 
 #ifdef AU_FLAC_DLL
-       if (g_load_libFLAC_dll("libFLAC.dll")) {
+       if (g_load_libFLAC_dll()) {
                ctl->cmsg(CMSG_ERROR, VERB_NORMAL,
                          "DLL load failed: %s", "libFLAC.dll, ogg.dll");
                return -1;
diff --git a/timidity/flac_a.h b/timidity/flac_a.h
new file mode 100644 (file)
index 0000000..1dc0fc9
--- /dev/null
@@ -0,0 +1,22 @@
+
+#pragma once
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "timidity.h"
+#include "common.h"
+
+#ifdef AU_FLAC
+
+extern void flac_set_option_verify(int);
+extern void flac_set_option_padding(int);
+extern void flac_set_compression_level(int);
+extern void flac_set_compression_level(int compression_level);
+
+#ifdef AU_OGGFLAC
+extern void flac_set_option_oggflac(int isogg);
+#endif
+
+#endif /* AU_FLAC */
index e0f3e3f..d73d885 100644 (file)
@@ -50,6 +50,7 @@
 #include "quantity.h"
 #include "freq.h"
 #include "support.h"
+#include "sfz.h"
 
 #define INSTRUMENT_HASH_SIZE 128
 struct InstrumentCache
@@ -142,6 +143,9 @@ void free_instrument(Instrument *ip)
 #ifdef INT_SYNTH
        extern void free_int_synth_file(Instrument *ip);
 #endif
+#ifdef ENABLE_SFZ
+       extern void free_sfz_file(Instrument *ip);
+#endif
 
        if (!ip) return;
 
@@ -166,6 +170,11 @@ void free_instrument(Instrument *ip)
                free_int_synth_file(ip);
                break;
 #endif
+#ifdef ENABLE_SFZ
+       case INST_SFZ:
+               free_sfz_file(ip);
+               break;
+#endif
        }
        safe_free(ip);
 }
@@ -1656,6 +1665,11 @@ Instrument *load_instrument(int dr, int b, int prog, int elm)
                ip = extract_scc_file(bank->tone[prog][elm]->name, bank->tone[prog][elm]->is_preset);
                break;
 #endif
+#ifdef ENABLE_SFZ
+       case 5: /* sfz extension */
+               ip = extract_sfz_file(bank->tone[prog][elm]->name);
+               break;
+#endif
        default:
                goto TONEBANK_INSTRUMENT_NULL;
                break;
@@ -2213,7 +2227,7 @@ void free_instruments(int reload_default_inst)
                                if(bank->tone[j][elm] == NULL)
                                        continue;
                                ip = bank->tone[j][elm]->instrument;
-                               if(ip && (ip->type == INST_SF2 || ip->type == INST_PCM || ip->type == INST_MMS || ip->type == INST_SCC) &&
+                               if(ip && (ip->type == INST_SF2 || ip->type == INST_PCM || ip->type == INST_MMS || ip->type == INST_SCC || ip->type == INST_SFZ) &&
                                        (i == 0 || !tonebank[0]->tone[j][elm] || ip != tonebank[0]->tone[j][elm]->instrument) )
                                                free_instrument(ip);
                                bank->tone[j][elm]->instrument = NULL;
@@ -2225,7 +2239,7 @@ void free_instruments(int reload_default_inst)
                                if(bank->tone[j][elm] == NULL)
                                        continue;
                                ip = bank->tone[j][elm]->instrument;
-                               if(ip && (ip->type == INST_SF2 || ip->type == INST_PCM || ip->type == INST_MMS || ip->type == INST_SCC) &&
+                               if(ip && (ip->type == INST_SF2 || ip->type == INST_PCM || ip->type == INST_MMS || ip->type == INST_SCC || ip->type == INST_SFZ) &&
                                   (i == 0 || !drumset[0]->tone[j][elm] || ip != drumset[0]->tone[j][elm]->instrument) )
                                        free_instrument(ip);
                                bank->tone[j][elm]->instrument = NULL;
index 2168585..2c936ea 100644 (file)
@@ -109,6 +109,7 @@ enum {
 ///r
 #define INST_MMS       4       /* %mms */
 #define INST_SCC       5       /* %scc */
+#define INST_SFZ       6       /* %sfz */
 
 /* sfSampleType */
 #define SF_SAMPLETYPE_MONO 1
@@ -141,7 +142,8 @@ typedef struct {
        2: %sample // wav,aiff
        3: %mms
        4: %scc
-       5-255: reserved
+       5: %sfz
+       6-255: reserved
 */
        int16 amp;
        int8 amp_normalize;
index 5e4c0c3..b7ed23a 100644 (file)
@@ -262,36 +262,19 @@ static inline void is_resample_core(Info_Resample *rs, DATA_T *is_buf, IS_RS_DAT
        vofsi = _mm_srli_epi32(vofs, FRACTION_BITS);
        vofsf = _mm_and_si128(vofs, vfmask);
        vfp = _mm_mul_ps(_mm_cvtepi32_ps(vofsf), vec_divf); // int32 to float // calc fp
-#if !(defined(_MSC_VER) || defined(MSC_VER))
-       ofsp1 = (int32 *)vofsi;
 #if defined(IS_RS_DATA_T_DOUBLE)
-       tmp1 = _mm_cvtpd_ps(_mm_loadu_pd(&rs_buf[ofsp1[0]])); // ofsi\82Æofsi+1\82ð\83\8d\81[\83h
-       tmp2 = _mm_cvtpd_ps(_mm_loadu_pd(&rs_buf[ofsp1[1]])); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
-       tmp3 = _mm_cvtpd_ps(_mm_loadu_pd(&rs_buf[ofsp1[2]])); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
-       tmp4 = _mm_cvtpd_ps(_mm_loadu_pd(&rs_buf[ofsp1[3]])); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶     
+       tmp1 = _mm_cvtpd_ps(_mm_loadu_pd(&rs_buf[MM_EXTRACT_I32(vofsi,0)])); // ofsi\82Æofsi+1\82ð\83\8d\81[\83h
+       tmp2 = _mm_cvtpd_ps(_mm_loadu_pd(&rs_buf[MM_EXTRACT_I32(vofsi,1)])); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
+       tmp3 = _mm_cvtpd_ps(_mm_loadu_pd(&rs_buf[MM_EXTRACT_I32(vofsi,2)])); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
+       tmp4 = _mm_cvtpd_ps(_mm_loadu_pd(&rs_buf[MM_EXTRACT_I32(vofsi,3)])); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à  
        tmp1 = _mm_shuffle_ps(tmp1, tmp2, 0x44);
        tmp3 = _mm_shuffle_ps(tmp3, tmp4, 0x44);
 #else // defined(IS_RS_DATA_T_FLOAT)
-       tmp1 = _mm_loadl_pi(tmp1, (__m64 *)&rs_buf[ofsp1[0]]); // L64bit ofsi\82Æofsi+1\82ð\83\8d\81[\83h
-       tmp1 = _mm_loadh_pi(tmp1, (__m64 *)&rs_buf[ofsp1[1]]); // H64bit \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
-       tmp3 = _mm_loadl_pi(tmp3, (__m64 *)&rs_buf[ofsp1[2]]); // L64bit \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
-       tmp3 = _mm_loadh_pi(tmp3, (__m64 *)&rs_buf[ofsp1[3]]); // H64bit \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
+       tmp1 = _mm_loadl_pi(tmp1, (__m64 *)&rs_buf[MM_EXTRACT_I32(vofsi,0)]); // L64bit ofsi\82Æofsi+1\82ð\83\8d\81[\83h
+       tmp1 = _mm_loadh_pi(tmp1, (__m64 *)&rs_buf[MM_EXTRACT_I32(vofsi,1)]); // H64bit \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
+       tmp3 = _mm_loadl_pi(tmp3, (__m64 *)&rs_buf[MM_EXTRACT_I32(vofsi,2)]); // L64bit \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
+       tmp3 = _mm_loadh_pi(tmp3, (__m64 *)&rs_buf[MM_EXTRACT_I32(vofsi,3)]); // H64bit \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
 #endif
-#else
-#if defined(IS_RS_DATA_T_DOUBLE)
-       tmp1 = _mm_cvtpd_ps(_mm_loadu_pd(&rs_buf[vofsi.m128i_i32[0]])); // ofsi\82Æofsi+1\82ð\83\8d\81[\83h
-       tmp2 = _mm_cvtpd_ps(_mm_loadu_pd(&rs_buf[vofsi.m128i_i32[1]])); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
-       tmp3 = _mm_cvtpd_ps(_mm_loadu_pd(&rs_buf[vofsi.m128i_i32[2]])); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
-       tmp4 = _mm_cvtpd_ps(_mm_loadu_pd(&rs_buf[vofsi.m128i_i32[3]])); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à       
-       tmp1 = _mm_shuffle_ps(tmp1, tmp2, 0x44);
-       tmp3 = _mm_shuffle_ps(tmp3, tmp4, 0x44);
-#else // defined(IS_RS_DATA_T_FLOAT)
-       tmp1 = _mm_loadl_pi(tmp1, (__m64 *)&rs_buf[vofsi.m128i_i32[0]]); // L64bit ofsi\82Æofsi+1\82ð\83\8d\81[\83h
-       tmp1 = _mm_loadh_pi(tmp1, (__m64 *)&rs_buf[vofsi.m128i_i32[1]]); // H64bit \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
-       tmp3 = _mm_loadl_pi(tmp3, (__m64 *)&rs_buf[vofsi.m128i_i32[2]]); // L64bit \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
-       tmp3 = _mm_loadh_pi(tmp3, (__m64 *)&rs_buf[vofsi.m128i_i32[3]]); // H64bit \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
-#endif
-#endif // !(defined(_MSC_VER) || defined(MSC_VER))     
        vv1 = _mm_shuffle_ps(tmp1, tmp3, 0x88); // v1[0,1,2,3]  // ofsi\82Ív1\82É
        vv2 = _mm_shuffle_ps(tmp1, tmp3, 0xdd); // v2[0,1,2,3]  // ofsi+1\82Ív2\82É\88Ú\93®
        vec_out = MM_FMA_PS(_mm_sub_ps(vv2, vv1), vfp, vv1);    
@@ -328,18 +311,10 @@ static inline void is_resample_core(Info_Resample *rs, DATA_T *is_buf, IS_RS_DAT
        vofsf = _mm_and_si128(vofs, vfmask);
        vfp1 = _mm_mul_pd(_mm_cvtepi32_pd(vofsf), vec_divf); // int32 to double // calc fp
        vfp2 = _mm_mul_pd(_mm_cvtepi32_pd(_mm_shuffle_epi32(vofsf, 0x4E)), vec_divf); // int32 to double // calc fp
-#if !(defined(_MSC_VER) || defined(MSC_VER))
-       ofsp1 = (int32 *)vofsi;
-       tmp1 = _mm_loadu_pd(&rs_buf[ofsp1[0]]); // ofsi\82Æofsi+1\82ð\83\8d\81[\83h
-       tmp2 = _mm_loadu_pd(&rs_buf[ofsp1[1]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
-       tmp3 = _mm_loadu_pd(&rs_buf[ofsp1[2]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
-       tmp4 = _mm_loadu_pd(&rs_buf[ofsp1[3]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶   
-#else
-       tmp1 = _mm_loadu_pd(&rs_buf[vofsi.m128i_i32[0]]); // ofsi\82Æofsi+1\82ð\83\8d\81[\83h
-       tmp2 = _mm_loadu_pd(&rs_buf[vofsi.m128i_i32[1]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
-       tmp3 = _mm_loadu_pd(&rs_buf[vofsi.m128i_i32[2]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
-       tmp4 = _mm_loadu_pd(&rs_buf[vofsi.m128i_i32[3]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à     
-#endif // !(defined(_MSC_VER) || defined(MSC_VER))     
+       tmp1 = _mm_loadu_pd(&rs_buf[MM_EXTRACT_I32(vofsi,0)]); // ofsi\82Æofsi+1\82ð\83\8d\81[\83h
+       tmp2 = _mm_loadu_pd(&rs_buf[MM_EXTRACT_I32(vofsi,1)]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
+       tmp3 = _mm_loadu_pd(&rs_buf[MM_EXTRACT_I32(vofsi,2)]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
+       tmp4 = _mm_loadu_pd(&rs_buf[MM_EXTRACT_I32(vofsi,3)]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à        
        vv11 = _mm_shuffle_pd(tmp1, tmp2, 0x00); // v1[0,1] // ofsi\82Ív1\82É
        vv21 = _mm_shuffle_pd(tmp1, tmp2, 0x03); // v2[0,1] // ofsi+1\82Ív2\82É\88Ú\93®
        vv12 = _mm_shuffle_pd(tmp3, tmp4, 0x00); // v1[2,3] // ofsi\82Ív1\82É
index e564a02..ef01ef0 100644 (file)
@@ -303,6 +303,7 @@ extern int scc_editor_override;
 extern int mms_editor_override; 
 extern const char *scc_data_editor_load_name(int num);
 extern void scc_data_editor_store_name(int num, const char *name);
+extern void scc_data_editor_clear_param(void);
 extern void scc_data_editor_set_default_param(int set_num);
 extern int scc_data_editor_get_param(int num);
 extern void scc_data_editor_set_param(int num, int val);
index 5698812..a136263 100644 (file)
@@ -559,6 +559,9 @@ void mix_voice(DATA_T *buf, int v, int32 c)
        case INST_SF2:
        case INST_MOD:
        case INST_PCM:
+#ifdef ENABLE_SFZ
+       case INST_SFZ:
+#endif
                if(opt_resample_over_sampling){
                        int32 c2 = c * opt_resample_over_sampling;
                        resample_voice(v, sp, c2);
index 45253bf..b91e36d 100644 (file)
@@ -600,8 +600,8 @@ void shrink_huge_sample (Sample *sp)
     safe_free(sp->data);
     sp->data = new_data;
     sp->sample_rate = new_rate;
-
-    sp->data_length = new_data_length << FRACTION_BITS;
+       
+    sp->data_length = (splen_t)new_data_length << FRACTION_BITS;
     sp->loop_start = loop_start * (1 << FRACTION_BITS);
     sp->loop_end = loop_end * (1 << FRACTION_BITS);
 }
index 3e500b2..f6c19d0 100644 (file)
@@ -6,6 +6,10 @@
 
 
 #ifdef MYINI_LIBRARY_DEFIND_VAR
+#ifdef _WIN32
+#include <windows.h>
+#else
+
 #ifndef INT8
 typedef char INT8;
 #endif
@@ -51,6 +55,7 @@ typedef unsigned long long UINT64;
 #endif
 
 #endif
+#endif
 
 
 #ifndef _TCHAR_DEFINED
index 56ae5cf..41e518a 100755 (executable)
 #endif /* stdc */
 
 #include "timidity.h"
+#include "common.h"
+
+const char *arch_string =
+#ifdef IX64CPU
+       #if USE_X64_EXT_INTRIN == 9
+               "[x64 AVX2]"
+       #elif USE_X64_EXT_INTRIN == 8
+               "[x64 AVX]"
+       #elif USE_X64_EXT_INTRIN == 7
+               "[x64 SSE4.2]"
+       #elif USE_X64_EXT_INTRIN == 6
+               "[x64 SSE4.1]"
+       #elif USE_X64_EXT_INTRIN == 5
+               "[x64 SSSE3]"
+       #elif USE_X64_EXT_INTRIN == 4
+               "[x64 SSE3]"
+       #elif USE_X64_EXT_INTRIN == 3
+               "[x64 SSE2]"
+       #elif USE_X64_EXT_INTRIN == 2
+               "[x64 SSE]"
+       #elif USE_X64_EXT_INTRIN == 1
+               "[x64 MMX]"
+       #else
+               "[x64]"
+       #endif
+#elif defined(IX86CPU)
+       #if USE_X86_EXT_INTRIN == 9
+               "[x86 AVX2]"
+       #elif USE_X86_EXT_INTRIN == 8
+               "[x86 AVX]"
+       #elif USE_X86_EXT_INTRIN == 7
+               "[x86 SSE4.2]"
+       #elif USE_X86_EXT_INTRIN == 6
+               "[x86 SSE4.1]"
+       #elif USE_X86_EXT_INTRIN == 5
+               "[x86 SSSE3]"
+       #elif USE_X86_EXT_INTRIN == 4
+               "[x86 SSE3]"
+       #elif USE_X86_EXT_INTRIN == 3
+               "[x86 SSE2]"
+       #elif USE_X86_EXT_INTRIN == 2
+               "[x86 SSE]"
+       #elif USE_X86_EXT_INTRIN == 1
+               "[x86 MMX]"
+       #else
+               "[x86]"
+       #endif
+#else
+       ""
+#endif
+;
 
 
 /*****************************************************************************/
@@ -170,7 +221,33 @@ int32 imuldiv28(int32 a, int32 b) {
 
 /*****************************************************************************/
 #if (USE_X86_EXT_ASM || USE_X86_EXT_INTRIN || USE_X86_AMD_EXT_ASM || USE_X86_AMD_EXT_INTRIN)
-
+#ifdef __GNUC__
+inline void CPUID(int32 *regs, uint32 eax)
+{
+       uint32 ebx,ecx,edx;
+       __asm__ __volatile__ (
+#ifdef __x86_64__
+               "push           %%rbx           \n\t"
+#else
+               "push           %%ebx           \n\t"
+#endif
+               "cpuid                                  \n\t"
+               "mov            %%ebx, %1       \n\t"
+#ifdef __x86_64__
+               "pop            %%rbx           \n\t"
+#else
+               "pop            %%ebx           \n\t"
+#endif
+               : "+a"(eax), "=r"(ebx), "=c"(ecx), "=d"(edx)
+       );
+       regs[0] = eax;
+       regs[1] = ebx;
+       regs[2] = ecx;
+       regs[3] = edx;
+}
+#else
+#define CPUID __cpuid
+#endif
 enum{
        X86_VENDER_INTEL=0,
        X86_VENDER_AMD,
@@ -180,13 +257,22 @@ enum{
 static const char* x86_vendors[] = 
 {
        "GenuineIntel",
-       "AuthenticAMD"
+       "AuthenticAMD",
        "Unknown     ",
 };
 
 // \8ag\92£\83t\83\89\83O\8eæ\93¾
 static inline int64    xgetbv(int index)
 {
+#if defined(__GNUC__)
+       unsigned int eax, edx;
+       __asm__ __volatile__ (
+               "xgetbv         \n\t"
+               : "=a"(eax), "=d"(edx)
+               : "c"(index)
+       );
+       return (uint64)eax|((uint64)edx<<32);
+#else
 #if (USE_X86_EXT_ASM || USE_X86_AMD_EXT_ASM)
        uint64 flg = 0;
        //_asm {
@@ -197,9 +283,13 @@ static inline int64        xgetbv(int index)
        //return flg;
        return 0xFFFFFFFFFFFFFFFF; // asm\82Åxgetbv index \82Ç\82±\81E\81E\82í\82©\82ç\82ñ\82Ì\82Å\83X\83\8b\81[
 #elif (USE_X86_EXT_INTRIN || USE_X86_AMD_EXT_INTRIN)
+#if _MSC_VER < 1600 // VC2010 (immintrin.h _xgetbv()
+       return 0xFFFFFFFFFFFFFFFF;
+#else
        return _xgetbv(index);
+#endif /* _MSC_VER */
+#endif
 #endif
-
 }
 
 
@@ -219,7 +309,7 @@ int is_x86ext_available(void)
        uint32 flg4; // extended feature flg pg2
 
        memset(vendor, 0, sizeof(vendor));
-       __cpuid(reg, 0);
+       CPUID(reg,0);
        cmd = reg[0];
        ((uint32*)vendor)[0] = reg[1];
        ((uint32*)vendor)[1] = reg[3];
@@ -229,14 +319,14 @@ int is_x86ext_available(void)
                        break;
        }
        if(cmd >= 0x00000001){
-               __cpuid(reg, 0x00000001);
+               CPUID(reg,0x00000001);
                flg1 = reg[3];
                flg2 = reg[2];
        }
-       __cpuid(reg, 0x80000000);
+       CPUID(reg,0x80000000);
        cmd = reg[ 0 ];
        if(cmd >= 0x80000001){
-               __cpuid(reg, 0x80000001);
+               CPUID(reg,0x80000001);
                flg4 = reg[2];
                flg3 = reg[3];
        }
index b1313ac..f59f820 100755 (executable)
 #ifndef OPTCODE_H_INCLUDED
 #define OPTCODE_H_INCLUDED 1
 
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wmacro-redefined"
+#endif
+
 #if defined(_M_IX86) || defined(__i386__) || defined(__i386) || defined(_X86_) || defined(__X86__) || defined(__I86__)
 #define IX86CPU 1
 #endif
 
-#if defined(_M_X64) || defined(_AMD64_) || defined(_X64_) || defined(__X64__)
+#if defined(_M_X64) || defined(_AMD64_) || defined(_X64_) || defined(__X64__) || defined(__x86_64__)
 #define IX64CPU 1
 #undef IX86CPU
 #undef IA64CPU
@@ -227,7 +232,7 @@ enum{
 
 #if defined(USE_SSE5) // _MSC_VER >= 1700 VC2012?
 #define USE_X86_AMD_EXT_INTRIN  6
-#eiif defined(USE_SSE4A) // _MSC_VER >= 1600 VC2010?
+#elif defined(USE_SSE4A) // _MSC_VER >= 1600 VC2010?
 #define USE_X86_AMD_EXT_INTRIN  5
 #elif defined(USE_3DNOW_PRO)
 #define USE_X86_AMD_EXT_INTRIN  4
@@ -300,35 +305,30 @@ enum{
 #endif
 
 /* asm/intrin\95s\89Â\8fð\8c\8f \91¼\82É\82 \82ê\82Î\92Ç\89Á */
-#if !defined(IX86CPU)
-#define USE_X86_EXT_ASM      0
-#define USE_X86_AMD_EXT_ASM  0
-#endif
 #if !defined(IX64CPU)
-#define USE_X64_EXT_ASM      0
-#define USE_X64_AMD_EXT_ASM  0
+#undef USE_X64_EXT_INTRIN
 #define USE_X64_EXT_INTRIN   0
+#undef USE_X64_AMD_EXT_INTRIN
 #define USE_X64_AMD_EXT_INTRIN  0
 #endif
 #if !defined(IX86CPU) && !defined(IX64CPU)
+#undef USE_X86_EXT_INTRIN
 #define USE_X86_EXT_INTRIN      0
+#undef USE_X86_AMD_EXT_INTRIN
 #define USE_X86_AMD_EXT_INTRIN  0
 #endif
 
-#if defined(__GNUC__)
-#define USE_X86_EXT_INTRIN  0
-#define USE_X86_AMD_EXT_INTRIN  0
-#define USE_X64_EXT_INTRIN      0
-#define USE_X64_AMD_EXT_INTRIN  0
-#endif
-#if defined(__GNUC__)
-#define USE_X86_EXT_INTRIN  0
-#define USE_X86_AMD_EXT_INTRIN  0
-#define USE_X64_EXT_INTRIN      0
-#define USE_X64_AMD_EXT_INTRIN  0
-#endif
-
+/* Always disable inline asm */
+#undef USE_X86_EXT_ASM
+#define USE_X86_EXT_ASM      0
+#undef USE_X86_AMD_EXT_ASM
+#define USE_X86_AMD_EXT_ASM  0
+#undef USE_X64_EXT_ASM
+#define USE_X64_EXT_ASM      0
+#undef USE_X64_AMD_EXT_ASM
+#define USE_X64_AMD_EXT_ASM  0
 
+#undef SUPPORT_ASM_INTEL
 
 /*****************************************************************************/
 /* PowerPC's AltiVec enhancement */
@@ -361,7 +361,7 @@ enum{
 #endif
 
 /*****************************************************************************/
-#if OPT_MODE == 1
+#if OPT_MODE == 1 && USE_X86_EXT_ASM > 0
 
 #ifdef LITTLE_ENDIAN
 #define iman_ 0
@@ -619,28 +619,7 @@ static inline int32 signlong(int32 a)
 
 #if (USE_X86_EXT_INTRIN || USE_X86_AMD_EXT_INTRIN)
 #ifdef __GNUC__
-//#if defined(USE_X86_EXT_INTRIN) && (USE_X86_EXT_INTRIN >= 8)
-//#include <avxintrin.h>
-//#elif defined(USE_X86_EXT_INTRIN) && (USE_X86_EXT_INTRIN >= 7)
-//#include <nmmintrin.h>
-//#elif defined(USE_X86_EXT_INTRIN) && (USE_X86_EXT_INTRIN >= 6)
-//#include <smmintrin.h>
-//#elif defined(USE_X86_EXT_INTRIN) && (USE_X86_EXT_INTRIN >= 5)
-//#include <tmmintrin.h>
-//#elif defined(USE_X86_EXT_INTRIN) && (USE_X86_EXT_INTRIN >= 4)
-//#include <pmmintrin.h>
-//#elif defined(USE_X86_EXT_INTRIN) && (USE_X86_EXT_INTRIN >= 3)
-//#include <emmintrin.h>
-//#elif defined(USE_X86_EXT_INTRIN) && (USE_X86_EXT_INTRIN >= 2)
-//#include <xmmintrin.h>
-//#else
-//#include <mmintrin.h>
-//#endif
-//#if defined(USE_X86_AMD_EXT_INTRIN) && (USE_X86_AMD_EXT_INTRIN >= 2)
-//#include <mm3dnow.h>
-//#endif
-#include <immintrin.h>
-
+#include <x86intrin.h>
 #elif (_MSC_VER >= 1600) // VC2010(VC10)
 #include <intrin.h>
 #else // VC2003(VC7) VC2005(VC8) VC2008(VC9)
@@ -764,8 +743,8 @@ LSU : Unalignment (use loadu/storeu
 #define MM_FMA3_PD(v00, v01, v10, v11, v20, v21) _mm_fmadd_pd(v20, v21, _mm_fmadd_pd(v10, v11, _mm_mul_pd(v00, v01)) )
 #define MM_FMA4_PD(v00, v01, v10, v11, v20, v21, v30, v31) _mm_add_pd(\
        _mm_fmadd_pd(v30, v31, _mm_mul_pd(v20, v21)), _mm_fmadd_pd(v10, v11, _mm_mul_pd(v00, v01)) )
-#define MM_FMA5_PD(v00, v01, v10, v11, v20, v21, v30, v31, v40, v41) _mm_fmadd_pd(v40, v41, \
-       _mm_fmadd_pd(v30, v31, _mm_mul_pd(v20, v21)), _mm_fmadd_pd(v10, v11, _mm_mul_pd(v00, v01)) )
+#define MM_FMA5_PD(v00, v01, v10, v11, v20, v21, v30, v31, v40, v41) _mm_add_pd(_mm_fmadd_pd(v40, v41, \
+       _mm_fmadd_pd(v30, v31, _mm_mul_pd(v20, v21))), _mm_fmadd_pd(v10, v11, _mm_mul_pd(v00, v01)) )
 #define MM_FMA6_PD(v00, v01, v10, v11, v20, v21, v30, v31, v40, v41, v50, v51) _mm_add_pd(\
        _mm_fmadd_pd(v50, v51, _mm_fmadd_pd(v40, v41, _mm_mul_pd(v30, v31))), \
        _mm_fmadd_pd(v20, v21, _mm_fmadd_pd(v10, v11, _mm_mul_pd(v00, v01))) )
@@ -848,6 +827,184 @@ LSU : Unalignment (use loadu/storeu
 #define MM_LSU_MUL_PS(ptr, vec_a) _mm_storeu_ps(ptr, _mm_mul_ps(_mm_loadu_ps(ptr), vec_a))
 #endif
 
+#if (USE_X86_EXT_INTRIN >= 1)
+#if !(defined(_MSC_VER) || defined(MSC_VER))
+#define MM_EXTRACT_F32(reg,idx) _mm_cvtss_f32(_mm_shuffle_ps(reg,reg,idx))
+#define MM_EXTRACT_F64(reg,idx) _mm_cvtsd_f64(_mm_shuffle_pd(reg,reg,idx))
+#define MM_EXTRACT_I32(reg,idx) _mm_cvtsi128_si32(_mm_shuffle_epi32(reg,idx))
+#define MM256_EXTRACT_I32(reg,idx) _mm256_extract_epi32(reg,idx)
+#else
+#define MM_EXTRACT_F32(reg,idx) reg.m128_f32[idx]
+#define MM_EXTRACT_F64(reg,idx) reg.m128d_f64[idx]
+#define MM_EXTRACT_I32(reg,idx) reg.m128i_i32[idx]
+#define MM256_EXTRACT_I32(reg,idx) reg.m256i_i32[idx]
+#endif
+#endif // (USE_X86_EXT_INTRIN >= 1)
+
+/*
+       gather and scatter
+*/
+
+#if (USE_X86_EXT_INTRIN >= 9)
+#if (USE_X86_EXT_INTRIN >= 9)
+#define MM256_I32GATHER_I32(base, offset, scale) _mm256_i32gather_epi32(base, offset, scale)
+#else
+
+static TIMIDITY_FORCEINLINE __m256i mm256_i32gather_i32_impl(const int *base, __m256i offset, int scale)
+{
+       ALIGN32 int32 buf[8];
+       __m256i byte_offset = _mm256_mullo_epi32(offset, _mm256_set1_epi32(scale));
+#ifdef IX64CPU
+       __m256i vbase = _mm256_set1_epi64x((int64)base);
+       __m256i vptr0145 = _mm256_add_epi64(vbase, _mm256_unpacklo_epi32(byte_offset, _mm256_setzero_si256()));
+       __m256i vptr2367 = _mm256_add_epi64(vbase, _mm256_unpackhi_epi32(byte_offset, _mm256_setzero_si256()));
+       ALIGN32 const int32 *ptr0145[8];
+       ALIGN32 const int32 *ptr2367[8];
+       _mm256_store_si256((__m256i *)ptr0145, vptr0145);
+       _mm256_store_si256((__m256i *)ptr2367, vptr2367);
+
+       buf[0] = *ptr0145[0];
+       buf[1] = *ptr0145[1];
+       buf[2] = *ptr2367[0];
+       buf[3] = *ptr2367[1];
+       buf[4] = *ptr0145[2];
+       buf[5] = *ptr0145[3];
+       buf[6] = *ptr2367[2];
+       buf[7] = *ptr2367[3];
+#else
+       int i;
+       __m256i pointers = _mm256_add_epi32(_mm256_set1_epi32((int32)base), byte_offset);
+       _mm256_store_si256((__m256i *)buf, pointers);
+
+       for (i = 0; i < 8; i++) {
+               buf[i] = *(const int *)buf[i];
+       }
+#endif
+
+       return _mm256_load_si256((const __m256i *)buf);
+}
+
+#define MM256_I32GATHER_I32(base, offset, scale) mm256_i32gather_i32_impl(base, offset, scale)
+#endif // (USE_X86_EXT_INTRIN >= 9)
+
+static TIMIDITY_FORCEINLINE void mm256_i32scatter_i32_impl(void *base, __m256i offset, __m256i val, int scale)
+{
+       ALIGN32 int32 buf[8];
+       _mm256_store_si256((__m256i *)buf, val);
+
+       __m256i byte_offset = _mm256_mullo_epi32(offset, _mm256_set1_epi32(scale));
+#ifdef IX64CPU
+       __m256i vbase = _mm256_set1_epi64x((int64)base);
+       __m256i vptr0145 = _mm256_add_epi64(vbase, _mm256_unpacklo_epi32(byte_offset, _mm256_setzero_si256()));
+       __m256i vptr2367 = _mm256_add_epi64(vbase, _mm256_unpackhi_epi32(byte_offset, _mm256_setzero_si256()));
+       ALIGN32 int32 *ptr0145[4];
+       ALIGN32 int32 *ptr2367[4];
+       _mm256_store_si256((__m256i *)ptr0145, vptr0145);
+       _mm256_store_si256((__m256i *)ptr2367, vptr2367);
+
+       *ptr0145[0] = buf[0];
+       *ptr0145[1] = buf[1];
+       *ptr2367[0] = buf[2];
+       *ptr2367[1] = buf[3];
+       *ptr0145[2] = buf[4];
+       *ptr0145[3] = buf[5];
+       *ptr2367[2] = buf[6];
+       *ptr2367[3] = buf[7];
+#else
+       __m256i vptr = _mm256_add_epi32(_mm256_set1_epi32((int32)base), byte_offset);
+       ALIGN32 int32 *ptr[8];
+       _mm256_store_si256((__m256i *)ptr, vptr);
+
+       for (int i = 0; i < 8; i++) {
+               *ptr[i] = buf[i];
+       }
+#endif
+}
+
+#define MM256_I32SCATTER_I32(base, offset, val, scale) mm256_i32scatter_i32_impl(base, offset, val, scale)
+
+#endif // (USE_X86_EXT_INTRIN >= 9)
+
+#if (USE_X86_EXT_INTRIN >= 1)
+#if (USE_X86_EXT_INTRIN >= 9)
+#define MM_I32GATHER_I32(base, offset, scale) _mm_i32gather_epi32(base, offset, scale)
+#elif (USE_X86_EXT_INTRIN >= 6)
+
+static TIMIDITY_FORCEINLINE __m128i mm_i32gather_i32_impl(const int *base, __m128i offset, int scale)
+{
+       ALIGN16 int32 buf[4];
+       __m128i byte_offset = _mm_mullo_epi32(offset, _mm_set1_epi32(scale));
+#ifdef IX64CPU
+       __m128i vbase = _mm_set1_epi64x((int64)base);
+       __m128i vptr01 = _mm_add_epi64(vbase, _mm_unpacklo_epi32(byte_offset, _mm_setzero_si128()));
+       __m128i vptr23 = _mm_add_epi64(vbase, _mm_unpackhi_epi32(byte_offset, _mm_setzero_si128()));
+       ALIGN16 const int32 *ptr01[2];
+       ALIGN16 const int32 *ptr23[2];
+       _mm_store_si128((__m128i *)ptr01, vptr01);
+       _mm_store_si128((__m128i *)ptr23, vptr23);
+
+       buf[0] = *ptr01[0];
+       buf[1] = *ptr01[1];
+       buf[2] = *ptr23[0];
+       buf[3] = *ptr23[1];
+#else
+       int i;
+       __m128i pointers = _mm_add_epi32(_mm_set1_epi32((int32)base), byte_offset);
+       _mm_store_si128((__m128i *)buf, pointers);
+
+       for (i = 0; i < 4; i++) {
+               buf[i] = *(const int *)buf[i];
+       }
+#endif
+
+       return _mm_load_si128((const __m128i *)buf);
+}
+
+#define MM_I32GATHER_I32(base, offset, scale) mm_i32gather_i32_impl(base, offset, scale)
+#endif // (USE_X86_EXT_INTRIN >= 6)
+#endif // (USE_X86_EXT_INTRIN >= 1)
+
+#if (USE_X86_EXT_INTRIN >= 6)
+
+static TIMIDITY_FORCEINLINE void mm_i32scatter_i32_impl(void *base, __m128i offset, __m128i val, int scale)
+{
+       ALIGN16 int32 buf[4];
+       __m128i byte_offset;
+
+       _mm_store_si128((__m128i *)buf, val);
+       byte_offset = _mm_mullo_epi32(offset, _mm_set1_epi32(scale));
+#ifdef IX64CPU
+       {
+               __m128i vbase = _mm_set1_epi64x((int64)base);
+               __m128i vptr01 = _mm_add_epi64(vbase, _mm_unpacklo_epi32(byte_offset, _mm_setzero_si128()));
+               __m128i vptr23 = _mm_add_epi64(vbase, _mm_unpackhi_epi32(byte_offset, _mm_setzero_si128()));
+               ALIGN16 int32 *ptr01[2];
+               ALIGN16 int32 *ptr23[2];
+               _mm_store_si128((__m128i *)ptr01, vptr01);
+               _mm_store_si128((__m128i *)ptr23, vptr23);
+
+               *ptr01[0] = buf[0];
+               *ptr01[1] = buf[1];
+               *ptr23[0] = buf[2];
+               *ptr23[1] = buf[3];
+       }
+#else
+       {
+               __m128i vptr = _mm_add_epi32(_mm_set1_epi32((int32)base), byte_offset);
+               ALIGN16 int32 *ptr[4];
+               _mm_store_si128((__m128i *)ptr, vptr);
+
+               *ptr[0] = buf[0];
+               *ptr[1] = buf[1];
+               *ptr[2] = buf[2];
+               *ptr[3] = buf[3];
+       }
+#endif
+}
+
+#define MM_I32SCATTER_I32(base, offset, val, scale) mm_i32scatter_i32_impl(base, offset, val, scale)
+
+#endif // (USE_X86_EXT_INTRIN >= 1)
 
 #define IS_ALIGN(ptr) (!((int32)ptr & (ALIGN_SIZE - 1)))
 extern int is_x86ext_available(void);
@@ -923,4 +1080,8 @@ static inline void *switch_memset(void *destp, int c, size_t len)
 #define memset switch_memset
 #endif /* altivec */
 
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
 #endif /* OPTCODE_H_INCLUDED */
index e7f35c5..df7a0cc 100644 (file)
@@ -123,6 +123,10 @@ extern PlayMode wdmks_play_mode;
 extern PlayMode wasapi_play_mode;
 #endif /* AU_WASAPI */
 
+#ifdef AU_ASIO
+extern PlayMode asio_play_mode;
+#endif /* AU_ASIO */
+
 #ifdef AU_PORTAUDIO
 #ifndef AU_PORTAUDIO_DLL
 extern PlayMode portaudio_play_mode;
@@ -228,6 +232,10 @@ PlayMode *play_mode_list[] = {
   &wasapi_play_mode,
 #endif /* AU_WASAPI */
 
+#if defined(AU_ASIO)
+  &asio_play_mode,
+#endif /* AU_ASIO */
+
 #if defined(AU_PORTAUDIO)
 #ifndef AU_PORTAUDIO_DLL
   &portaudio_play_mode,
@@ -503,20 +511,10 @@ static void CALLINGCONV f64tos8(DATA_T *lp, int32 c)
        __m128 vmul = _mm_set1_ps((float)MAX_8BIT_SIGNED);      
        for(i = 0; i < c; i += 4){
                __m128 vec_f = _mm_mul_ps(F128_CLIP_INPUT(&lp[i], gain), vmul);
-#if !(defined(_MSC_VER) || defined(MSC_VER))
-               {
-               float *out = (float *)vec_f;
-               cp[i] = (int8)(out[0]);
-               cp[i] = (int8)(out[1]);
-               cp[i] = (int8)(out[2]);
-               cp[i] = (int8)(out[3]); 
-               }
-#else
-               cp[i] = (int8)(vec_f.m128_f32[0]);
-               cp[i] = (int8)(vec_f.m128_f32[1]);
-               cp[i] = (int8)(vec_f.m128_f32[2]);
-               cp[i] = (int8)(vec_f.m128_f32[3]);      
-#endif //  !(defined(_MSC_VER) || defined(MSC_VER))
+               cp[i] = (int8)(MM_EXTRACT_F32(vec_f,0));
+               cp[i] = (int8)(MM_EXTRACT_F32(vec_f,1));
+               cp[i] = (int8)(MM_EXTRACT_F32(vec_f,2));
+               cp[i] = (int8)(MM_EXTRACT_F32(vec_f,3));        
        }
 }
 #else
@@ -600,20 +598,10 @@ static void CALLINGCONV f64tou8(DATA_T *lp, int32 c)
        __m128i vex = _mm_set1_epi8(0x80);      
        for(i = 0; i < c; i += 4){
                __m128 vec_f = _mm_mul_ps(F128_CLIP_INPUT(&lp[i], gain), vmul);
-#if !(defined(_MSC_VER) || defined(MSC_VER))
-               {
-               float *out = (float *)vec_f;
-               cp[i] = 0x80 ^ (uint8)(out[0]);
-               cp[i] = 0x80 ^ (uint8)(out[1]);
-               cp[i] = 0x80 ^ (uint8)(out[2]);
-               cp[i] = 0x80 ^ (uint8)(out[3]); 
-               }
-#else
-               cp[i] = 0x80 ^ (uint8)(vec_f.m128_f32[0]);
-               cp[i] = 0x80 ^ (uint8)(vec_f.m128_f32[1]);
-               cp[i] = 0x80 ^ (uint8)(vec_f.m128_f32[2]);
-               cp[i] = 0x80 ^ (uint8)(vec_f.m128_f32[3]);
-#endif //  !(defined(_MSC_VER) || defined(MSC_VER))
+               cp[i] = 0x80 ^ (uint8)(MM_EXTRACT_F32(vec_f,0));
+               cp[i] = 0x80 ^ (uint8)(MM_EXTRACT_F32(vec_f,1));
+               cp[i] = 0x80 ^ (uint8)(MM_EXTRACT_F32(vec_f,2));
+               cp[i] = 0x80 ^ (uint8)(MM_EXTRACT_F32(vec_f,3));
        }
 }
 #else
@@ -636,20 +624,10 @@ static void CALLINGCONV f64toulaw(DATA_T *lp, int32 c)
        __m256d vmul = _mm256_set1_pd((double)MAX_16BIT_SIGNED);        
        for(i = 0; i < c; i += 4){
                __m128i vec0 = _mm256_cvttpd_epi32(_mm256_mul_pd(D256_CLIP_INPUT(&lp[i], gain), vmul));
-#if !(defined(_MSC_VER) || defined(MSC_VER))
-               {
-               int32 *out = (int32 *)vec0;
-               up[i] = AUDIO_S2U(out[0]);
-               up[i + 1] = AUDIO_S2U(out[1]);
-               up[i + 2] = AUDIO_S2U(out[2]);
-               up[i + 3] = AUDIO_S2U(out[3]);
-               }
-#else
-               up[i] = AUDIO_S2U(vec0.m128i_i32[0]);
-               up[i + 1] = AUDIO_S2U(vec0.m128i_i32[1]);
-               up[i + 2] = AUDIO_S2U(vec0.m128i_i32[2]);
-               up[i + 3] = AUDIO_S2U(vec0.m128i_i32[3]);
-#endif //  !(defined(_MSC_VER) || defined(MSC_VER))
+               up[i] = AUDIO_S2U(MM_EXTRACT_I32(vec0,0));
+               up[i + 1] = AUDIO_S2U(MM_EXTRACT_I32(vec0,1));
+               up[i + 2] = AUDIO_S2U(MM_EXTRACT_I32(vec0,2));
+               up[i + 3] = AUDIO_S2U(MM_EXTRACT_I32(vec0,3));
        }
 }
 #elif (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE)
@@ -664,20 +642,10 @@ static void CALLINGCONV f64toulaw(DATA_T *lp, int32 c)
                __m128 vec_f12 = _mm_cvtpd_ps(_mm_load_pd(&lp[i + 2]));
                __m128 vec_f1 = _mm_shuffle_ps(vec_f11, vec_f12, 0x44);
                __m128i vec_i32 = _mm_cvttps_epi32(_mm_mul_ps(F128_CLIP_MM(vec_f1, gain), vmul));
-#if !(defined(_MSC_VER) || defined(MSC_VER))
-               {
-               int32 *out = (int32 *)vec_i32;
-               up[i] = AUDIO_S2U(out[0]);
-               up[i + 1] = AUDIO_S2U(out[1]);
-               up[i + 2] = AUDIO_S2U(out[2]);
-               up[i + 3] = AUDIO_S2U(out[3]);
-               }
-#else
-               up[i] = AUDIO_S2U(vec_i32.m128i_i32[0]);
-               up[i + 1] = AUDIO_S2U(vec_i32.m128i_i32[1]);
-               up[i + 2] = AUDIO_S2U(vec_i32.m128i_i32[2]);
-               up[i + 3] = AUDIO_S2U(vec_i32.m128i_i32[3]);
-#endif //  !(defined(_MSC_VER) || defined(MSC_VER))
+               up[i] = AUDIO_S2U(MM_EXTRACT_I32(vec_i32,0));
+               up[i + 1] = AUDIO_S2U(MM_EXTRACT_I32(vec_i32,1));
+               up[i + 2] = AUDIO_S2U(MM_EXTRACT_I32(vec_i32,2));
+               up[i + 3] = AUDIO_S2U(MM_EXTRACT_I32(vec_i32,3));
        }       
 }
 #elif (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_FLOAT)
@@ -689,20 +657,10 @@ static void CALLINGCONV f64toulaw(DATA_T *lp, int32 c)
        __m128 vmul = _mm_set1_ps((float)MAX_16BIT_SIGNED);
        for(i = 0; i < c; i += 4){
                __m128i vec0 = _mm_cvttps_epi32(_mm_mul_ps(F128_CLIP_INPUT(&lp[i], gain), vmul));
-#if !(defined(_MSC_VER) || defined(MSC_VER))
-               {
-               int32 *out = (int32 *)vec0;
-               up[i] = AUDIO_S2U(out[0]);
-               up[i + 1] = AUDIO_S2U(out[1]);
-               up[i + 2] = AUDIO_S2U(out[2]);
-               up[i + 3] = AUDIO_S2U(out[3]);
-               }
-#else
-               up[i] = AUDIO_S2U(vec0.m128i_i32[0]);
-               up[i + 1] = AUDIO_S2U(vec0.m128i_i32[1]);
-               up[i + 2] = AUDIO_S2U(vec0.m128i_i32[2]);
-               up[i + 3] = AUDIO_S2U(vec0.m128i_i32[3]);
-#endif //  !(defined(_MSC_VER) || defined(MSC_VER))
+               up[i] = AUDIO_S2U(MM_EXTRACT_I32(vec0,0));
+               up[i + 1] = AUDIO_S2U(MM_EXTRACT_I32(vec0,1));
+               up[i + 2] = AUDIO_S2U(MM_EXTRACT_I32(vec0,2));
+               up[i + 3] = AUDIO_S2U(MM_EXTRACT_I32(vec0,3));
        }
 }
 #else
@@ -725,20 +683,10 @@ static void CALLINGCONV f64toalaw(DATA_T *lp, int32 c)
        __m256d vmul = _mm256_set1_pd((double)MAX_16BIT_SIGNED);                
        for(i = 0; i < c; i += 4){
                __m128i vec0 = _mm256_cvttpd_epi32(_mm256_mul_pd(D256_CLIP_INPUT(&lp[i], gain), vmul));
-#if !(defined(_MSC_VER) || defined(MSC_VER))
-               {
-               int32 *out = (int32 *)vec0;
-               up[i] = AUDIO_S2A(out[0]);
-               up[i + 1] = AUDIO_S2A(out[1]);
-               up[i + 2] = AUDIO_S2A(out[2]);
-               up[i + 3] = AUDIO_S2A(out[3]);
-               }
-#else
-               up[i] = AUDIO_S2A(vec0.m128i_i32[0]);
-               up[i + 1] = AUDIO_S2A(vec0.m128i_i32[1]);
-               up[i + 2] = AUDIO_S2A(vec0.m128i_i32[2]);
-               up[i + 3] = AUDIO_S2A(vec0.m128i_i32[3]);
-#endif //  !(defined(_MSC_VER) || defined(MSC_VER))
+               up[i] = AUDIO_S2A(MM_EXTRACT_I32(vec0,0));
+               up[i + 1] = AUDIO_S2A(MM_EXTRACT_I32(vec0,1));
+               up[i + 2] = AUDIO_S2A(MM_EXTRACT_I32(vec0,2));
+               up[i + 3] = AUDIO_S2A(MM_EXTRACT_I32(vec0,3));
        }
 }
 #elif (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE)
@@ -753,20 +701,10 @@ static void CALLINGCONV f64toalaw(DATA_T *lp, int32 c)
                __m128 vec_f12 = _mm_cvtpd_ps(_mm_load_pd(&lp[i + 2]));
                __m128 vec_f1 = _mm_shuffle_ps(vec_f11, vec_f12, 0x44);
                __m128i vec_i32 = _mm_cvttps_epi32(_mm_mul_ps(F128_CLIP_MM(vec_f1, gain), vmul));
-#if !(defined(_MSC_VER) || defined(MSC_VER))
-               {
-               int32 *out = (int32 *)vec_i32;
-               up[i] = AUDIO_S2A(out[0]);
-               up[i + 1] = AUDIO_S2A(out[1]);
-               up[i + 2] = AUDIO_S2A(out[2]);
-               up[i + 3] = AUDIO_S2A(out[3]);
-               }
-#else
-               up[i] = AUDIO_S2A(vec_i32.m128i_i32[0]);
-               up[i + 1] = AUDIO_S2A(vec_i32.m128i_i32[1]);
-               up[i + 2] = AUDIO_S2A(vec_i32.m128i_i32[2]);
-               up[i + 3] = AUDIO_S2A(vec_i32.m128i_i32[3]);
-#endif //  !(defined(_MSC_VER) || defined(MSC_VER))
+               up[i] = AUDIO_S2A(MM_EXTRACT_I32(vec_i32,0));
+               up[i + 1] = AUDIO_S2A(MM_EXTRACT_I32(vec_i32,1));
+               up[i + 2] = AUDIO_S2A(MM_EXTRACT_I32(vec_i32,2));
+               up[i + 3] = AUDIO_S2A(MM_EXTRACT_I32(vec_i32,3));
        }
 }
 #else
@@ -839,20 +777,10 @@ static void CALLINGCONV f64tos16(DATA_T *lp, int32 c)
        __m128 vmul = _mm_set1_ps((float)MAX_16BIT_SIGNED);     
        for(i = 0; i < c; i += 4){
                __m128 vec_f = _mm_mul_ps(F128_CLIP_INPUT(&lp[i], gain), vmul);
-#if !(defined(_MSC_VER) || defined(MSC_VER))
-               {
-               float *out = (float *)vec_f;
-               sp[i] = (int16)(out[0]);
-               sp[i] = (int16)(out[1]);
-               sp[i] = (int16)(out[2]);
-               sp[i] = (int16)(out[3]);        
-               }
-#else
-               sp[i] = (int16)(vec_f.m128_f32[0]);
-               sp[i] = (int16)(vec_f.m128_f32[1]);
-               sp[i] = (int16)(vec_f.m128_f32[2]);
-               sp[i] = (int16)(vec_f.m128_f32[3]);             
-#endif //  !(defined(_MSC_VER) || defined(MSC_VER))
+               sp[i] = (int16)(MM_EXTRACT_F32(vec_f,0));
+               sp[i] = (int16)(MM_EXTRACT_F32(vec_f,1));
+               sp[i] = (int16)(MM_EXTRACT_F32(vec_f,2));
+               sp[i] = (int16)(MM_EXTRACT_F32(vec_f,3));               
        }
 }
 #else
@@ -1061,20 +989,10 @@ static void CALLINGCONV f64tos24(DATA_T *lp, int32 c)
        __m128 vmul = _mm_set1_ps((float)MAX_24BIT_SIGNED);
        for(i = 0; i < c; i += 4){ // 108 inst in loop
                __m128 vec_f = _mm_mul_ps(F128_CLIP_INPUT(&lp[i], gain), vmul);
-#if !(defined(_MSC_VER) || defined(MSC_VER))
-               {
-               float *out = (float *)vec_f;
-               STORE_S24(cp, (int32)(out[0]));
-               STORE_S24(cp, (int32)(out[1]));
-               STORE_S24(cp, (int32)(out[2]));
-               STORE_S24(cp, (int32)(out[3]));
-               }
-#else
-               STORE_S24(cp, (int32)(vec_f.m128_f32[0]));
-               STORE_S24(cp, (int32)(vec_f.m128_f32[1]));
-               STORE_S24(cp, (int32)(vec_f.m128_f32[2]));
-               STORE_S24(cp, (int32)(vec_f.m128_f32[3]));      
-#endif //  !(defined(_MSC_VER) || defined(MSC_VER))
+               STORE_S24(cp, (int32)(MM_EXTRACT_F32(vec_f,0)));
+               STORE_S24(cp, (int32)(MM_EXTRACT_F32(vec_f,1)));
+               STORE_S24(cp, (int32)(MM_EXTRACT_F32(vec_f,2)));
+               STORE_S24(cp, (int32)(MM_EXTRACT_F32(vec_f,3)));        
        }
 }
 #else
@@ -1185,20 +1103,10 @@ static void CALLINGCONV f64tos32(DATA_T *lp, int32 c)
        __m128 vmul = _mm_set1_ps((float)MAX_32BIT_SIGNED);     
        for(i = 0; i < c; i += 4){
                __m128 vec_f = _mm_mul_ps(F128_CLIP_INPUT(&lp[i], gain), vmul);
-#if !(defined(_MSC_VER) || defined(MSC_VER))
-               {
-               float *out = (float *)vec_f;
-               sp[i] = (int32)(out[0]);
-               sp[i] = (int32)(out[1]);
-               sp[i] = (int32)(out[2]);
-               sp[i] = (int32)(out[3]);        
-               }
-#else
-               sp[i] = (int32)(vec_f.m128_f32[0]);
-               sp[i] = (int32)(vec_f.m128_f32[1]);
-               sp[i] = (int32)(vec_f.m128_f32[2]);
-               sp[i] = (int32)(vec_f.m128_f32[3]);     
-#endif //  !(defined(_MSC_VER) || defined(MSC_VER))
+               sp[i] = (int32)(MM_EXTRACT_F32(vec_f,0));
+               sp[i] = (int32)(MM_EXTRACT_F32(vec_f,1));
+               sp[i] = (int32)(MM_EXTRACT_F32(vec_f,2));
+               sp[i] = (int32)(MM_EXTRACT_F32(vec_f,3));       
        }
 }
 #else
@@ -1684,20 +1592,10 @@ static void CALLINGCONV f64tof64(DATA_T *lp, int32 c)
        __m128 gain = _mm_set1_ps((float)INPUT_GAIN);
        for(i = c - 4; i >= 0; i -= 4){
                __m128 vec_f = F128_CLIP_INPUT(&lp[i], gain);
-#if !(defined(_MSC_VER) || defined(MSC_VER))
-               {
-               float *out = (float *)vec_f;
-               sp[i] = (double)(out[0]);
-               sp[i] = (double)(out[1]);
-               sp[i] = (double)(out[2]);
-               sp[i] = (double)(out[3]);       
-               }
-#else
-               sp[i] = (double)(vec_f.m128_f32[0]);
-               sp[i] = (double)(vec_f.m128_f32[1]);
-               sp[i] = (double)(vec_f.m128_f32[2]);
-               sp[i] = (double)(vec_f.m128_f32[3]);            
-#endif //  !(defined(_MSC_VER) || defined(MSC_VER))
+               sp[i] = (double)(MM_EXTRACT_F32(vec_f,0));
+               sp[i] = (double)(MM_EXTRACT_F32(vec_f,1));
+               sp[i] = (double)(MM_EXTRACT_F32(vec_f,2));
+               sp[i] = (double)(MM_EXTRACT_F32(vec_f,3));              
        }
 }
 #elif defined(DATA_T_DOUBLE)
index ced166c..ad71194 100644 (file)
@@ -4486,7 +4486,7 @@ static void start_note(MidiEvent *e, int i, int vid, int cnt, int add_delay_cnt)
                if(!special_patch[j]){
                        vp->reserve_offset = 0;
                }else{
-                       vp->reserve_offset = special_patch[j]->sample_offset << FRACTION_BITS;
+                       vp->reserve_offset = (splen_t)special_patch[j]->sample_offset << FRACTION_BITS;
                        if(vp->sample->modes & MODES_LOOPING)  {
                                if(vp->reserve_offset > vp->sample->loop_end)
                                        vp->reserve_offset = vp->sample->loop_start;
@@ -11377,16 +11377,8 @@ static inline void mix_ch_signal_source(DATA_T *src, int ch, int count)
                                        vevol = _mm_shuffle_ps(vevol, vevol, 0x44);
                                }
                                vsp = _mm_mul_ps(_mm_loadu_ps(src), vevol);
-#if !(defined(_MSC_VER) || defined(MSC_VER))
-                               {
-                               float *out = (float *)vsp;
-                               *(src++) = out[0];
-                               *(src++) = out[1];
-                               }
-#else
-                               *(src++) = vsp.m128_f32[0];
-                               *(src++) = vsp.m128_f32[1];     
-#endif //  !(defined(_MSC_VER) || defined(MSC_VER))
+                               *(src++) = MM_EXTRACT_F32(vsp,0);
+                               *(src++) = MM_EXTRACT_F32(vsp,1);       
                        }
 
 #else // ! USE_X86_EXT_INTRIN
index e5e93b0..209f41f 100644 (file)
@@ -945,4 +945,8 @@ extern void free_playmidi(void);
 extern int32 get_current_play_tempo(void);
 extern void init_voice(int i);
 extern void update_voice(int i);
+
+extern int calc_bend_val(int val);
+extern void kill_all_voices(void);
+
 #endif /* ___PLAYMIDI_H_ */
index 249e58c..4fc6f19 100644 (file)
@@ -52,6 +52,7 @@
 
 #ifdef __W32__
 #include <windows.h>
+#include <mmsystem.h>
 #endif /* __W32__ */
 #include <portaudio.h>
 #ifdef PORTAUDIO_V19
index f904db3..6cdd785 100644 (file)
@@ -201,6 +201,11 @@ extern void control_effect_xg(int ch);
 extern void recompute_multi_eq_sd(void);
 extern void recompute_mfx_effect_sd(struct mfx_effect_sd_t *st, int marge);
 extern void realloc_mfx_effect_sd(struct mfx_effect_sd_t *st, int patch);
+extern void recompute_chorus_status_sd(struct mfx_effect_sd_t *st, int marge);
+extern void realloc_chorus_status_sd(struct mfx_effect_sd_t *st, int patch);
+extern void recompute_chorus_status_sd(struct mfx_effect_sd_t *st, int marge);
+extern void realloc_reverb_status_sd(struct mfx_effect_sd_t *st, int patch);
+extern void recompute_reverb_status_sd(struct mfx_effect_sd_t *st, int marge);
 extern void control_effect_sd(MidiEvent *ev);
 
 extern Instrument *recompute_userdrum(int bank, int prog, int elm);
@@ -214,8 +219,11 @@ extern void init_channel_layer(int);
 extern void add_channel_layer(int, int);
 extern void remove_channel_layer(int);
 
+extern void readmidi_read_init(void);
 extern void free_readmidi(void);
 
+extern void free_time_segments(void);
+
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
 
 #endif /* ___READMIDI_H_ */
index a9ce992..89242aa 100644 (file)
@@ -157,7 +157,7 @@ static DATA_T resample_none_double(const sample_t *srci, splen_t ofs, resample_r
 
 
 /* Simple linear interpolation */
-static DATA_T resample_linear(const sample_t *src, splen_t ofs, resample_rec_t *rec)
+static inline DATA_T resample_linear(const sample_t *src, splen_t ofs, resample_rec_t *rec)
 {
        const spos_t ofsi = ofs >> FRACTION_BITS;
        fract_t ofsf = ofs & FRACTION_MASK;
@@ -178,19 +178,17 @@ static DATA_T resample_linear_int32(const sample_t *srci, splen_t ofs, resample_
 {
        const int32 *src = (const int32*)srci;
        const spos_t ofsi = ofs >> FRACTION_BITS;
-//     FLOAT_T v1 = src[ofsi], fp = (ofs & FRACTION_MASK);
-//     return (v1 + (FLOAT_T)((int64)(src[ofsi + 1]) - (int64)(src[ofsi])) * fp * div_fraction) * OUT_INT32; // FLOAT_T
 #if defined(DATA_T_DOUBLE) || defined(DATA_T_FLOAT)
-       FLOAT_T v1 = src[ofsi], fp = (ofs & FRACTION_MASK);
-    return (v1 + (FLOAT_T)((int64)(src[ofsi + 1]) - (int64)(src[ofsi])) * fp * div_fraction) * OUT_INT32; // FLOAT_T
+    FLOAT_T v1 = src[ofsi], v2 = src[ofsi + 1], fp = (ofs & FRACTION_MASK);
+    return (v1 + (v2 - v1) * fp * div_fraction) * OUT_INT32; // FLOAT_T
 #else // DATA_T_IN32
        fract_t ofsf = ofs & FRACTION_MASK;
-    int32 v1 = src[ofsi], v2 = src[ofsi + 1];
-       return v1 + imuldiv_fraction_int32(v2 - v1, ofsf);
+    int32 v1 = src[ofsi] >> 16, v2 = src[ofsi + 1] >> 16;
+       return v1 + imuldiv_fraction(v2 - v1, ofsf);
 #endif
 }
 
-static DATA_T resample_linear_float(const sample_t *srci, splen_t ofs, resample_rec_t *rec)
+static inline DATA_T resample_linear_float(const sample_t *srci, splen_t ofs, resample_rec_t *rec)
 {
     const float *src = (const float*)srci;
        const spos_t ofsi = ofs >> FRACTION_BITS;
@@ -431,13 +429,10 @@ loop_ofs:
        temp1 = (v[1] + v[2]) *  DIV_6 * div_fraction;
        return temp1 * OUT_INT32; // FLOAT_T
 do_linear:
-       //v[1] = src[ofsi];
-       //v[2] = (int64)(src[ofsi + 1]) - (int64)(src[ofsi]);
-       //return (v[1] + v[2] * (FLOAT_T)ofsf * div_fraction) * OUT_INT32; // FLOAT_T
 #if defined(DATA_T_DOUBLE) || defined(DATA_T_FLOAT)
     v[1] = src[ofsi];
-       v[2] = (int64)(src[ofsi + 1]) - (int64)(src[ofsi]);
-    return (v[1] + v[2] * (FLOAT_T)ofsf * div_fraction) * OUT_INT32; // FLOAT_T
+       v[2] = src[ofsi + 1];
+    return (v[1] + (v[2] - v[1]) * (FLOAT_T)ofsf * div_fraction) * OUT_INT32; // FLOAT_T
 #else // DATA_T_IN32
        v[1] = src[ofsi];
        v[2] = src[ofsi + 1];   
@@ -649,7 +644,7 @@ do_linear:
    just keep this labeled as resample_lagrange(), even if it really is the
    Newton form of the polynomial. */
 
-static DATA_T resample_lagrange(const sample_t *src, splen_t ofs, resample_rec_t *rec)
+static inline DATA_T resample_lagrange(const sample_t *src, splen_t ofs, resample_rec_t *rec)
 {
     const spos_t ofsi = ofs >> FRACTION_BITS;
     fract_t ofsf = ofs & FRACTION_MASK;
@@ -858,13 +853,10 @@ loop_ofs:
        v[3] += v[0];
        return v[3] * OUT_INT32;
 do_linear:
-       //v[1] = src[ofsi];
-       //v[2] = (int64)(src[ofsi + 1]) - (int64)(src[ofsi]);
-       //return (v[1] + v[2] * (FLOAT_T)ofsf * div_fraction) * OUT_INT32; // FLOAT_T   
 #if defined(DATA_T_DOUBLE) || defined(DATA_T_FLOAT)
     v[1] = src[ofsi];
-       v[2] = (int64)(src[ofsi + 1]) - (int64)(src[ofsi]);
-    return (v[1] + v[2] * (FLOAT_T)ofsf * div_fraction) * OUT_INT32; // FLOAT_T
+       v[2] = src[ofsi + 1];
+    return (v[1] + (v[2] - v[1]) * (FLOAT_T)ofsf * div_fraction) * OUT_INT32; // FLOAT_T
 #else // DATA_T_IN32
        v[1] = src[ofsi];
        v[2] = src[ofsi + 1];   
@@ -872,7 +864,7 @@ do_linear:
 #endif
 }
 
-static DATA_T resample_lagrange_float(const sample_t *srci, splen_t ofs, resample_rec_t *rec)
+static inline DATA_T resample_lagrange_float(const sample_t *srci, splen_t ofs, resample_rec_t *rec)
 {
     const float *src = (const float*)srci;
     const spos_t ofsi = ofs >> FRACTION_BITS;
@@ -1570,7 +1562,7 @@ static DATA_T resample_gauss(const sample_t *src, splen_t ofs, resample_rec_t *r
                double tmp;
                for (i = 0; i < gauss_n; i += 8){
 #if (USE_X86_EXT_INTRIN >= 9)
-                       __m256i vec32 = _mm256_cvtepi16_epi32(_mm256_loadu_si256((__m128i *)&sptr[i])); // low i16*8 > i32*8
+                       __m256i vec32 = _mm256_cvtepi16_epi32(_mm_loadu_si128((__m128i *)&sptr[i])); // low i16*8 > i32*8
                        __m128i vec1 = _mm256_extracti128_si256(vec32, 0x0);
                        __m128i vec2 = _mm256_extracti128_si256(vec32, 0x1);
 #else
@@ -2278,9 +2270,6 @@ static DATA_T resample_sharp_int32(const sample_t *srci, splen_t ofs, resample_r
        FLOAT_T c,s = 0.0, va = 0.0, vb = 0.0;
        
        if(rec->mode == RESAMPLE_MODE_BIDIR_LOOP){
-               //FLOAT_T v1 = src[ofsi];
-               //FLOAT_T v2 = src[ofsi + 1];   
-               //return (v1 + (v2 - v1) * fp) * OUT_INT32;     
 #if defined(DATA_T_DOUBLE) || defined(DATA_T_FLOAT)
                FLOAT_T v1 = src[ofsi];
                FLOAT_T v2 = src[ofsi + 1];     
@@ -2897,11 +2886,11 @@ static DATA_T resample_lanczos(const sample_t *src, splen_t ofs, resample_rec_t
        width *= 2;
 #if (USE_X86_EXT_INTRIN >= 8) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
        if(width >= 16 && !(width & 0x7)){
-               __m256d sum = _mm256_set_pd(0, 0, 0, 0);
+               __m256d sum = _mm256_setzero_pd();
                __m128d sum1, sum2;     
                for (i = 0; i < width; i += 8){
 #if (USE_X86_EXT_INTRIN >= 9)
-                       __m256i vec32 = _mm256_cvtepi16_epi32(_mm256_loadu_si256((__m128i *)&v1[i])); // low i16*8 > i32*8
+                       __m256i vec32 = _mm256_cvtepi16_epi32(_mm_loadu_si128((__m128i *)&v1[i])); // low i16*8 > i32*8
                        __m128i vec1 = _mm256_extracti128_si256(vec32, 0x0);
                        __m128i vec2 = _mm256_extracti128_si256(vec32, 0x1);
 #else
@@ -2918,26 +2907,19 @@ static DATA_T resample_lanczos(const sample_t *src, splen_t ofs, resample_rec_t
                sum1 = _mm_add_pd(sum1, _mm_shuffle_pd(sum1, sum1, 0x1)); // v0=v0+v1 v1=v1+v0  
                _mm_store_sd(&sample_sum, sum1);
        }else
-#elif (USE_X86_EXT_INTRIN >= 6) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
-       if(width >= 16 && !(width & 0x3)){
-               __m128d sum = _mm_set_pd(0, 0);
-               for (i = 0; i < width; i += 4){
-                       __m128i vec32l = _mm_cvtepi16_epi32(_mm_loadu_si128((__m128i *)&v1[i])); // low i16*4 > i32*4
-                       __m128d vecd0 = _mm_cvtepi32_pd(vec32l); // low low i32*2 > d*2
-                       __m128d vecd2 = _mm_cvtepi32_pd(_mm_shuffle_epi32(vec32l, 0x4e)); // low hi i32*2 > d*2
-                       sum = MM_FMA_PD(vecd0, _mm_load_pd(&coef[i]), sum);
-                       sum = MM_FMA_PD(vecd2, _mm_load_pd(&coef[i + 2]), sum);
-               }
-               sum = _mm_add_pd(sum, _mm_shuffle_pd(sum, sum, 0x1)); // v0=v0+v1 v1=v1+v0
-               _mm_store_sd(&sample_sum, sum);
-       }else
 #elif (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
        if(width >= 16 && !(width & 0x3)){
-               __m128d sum1 = _mm_set_pd(0, 0);
-               __m128d sum2 = _mm_set_pd(0, 0);
+               __m128d sum1 = _mm_setzero_pd();
+               __m128d sum2 = _mm_setzero_pd();
                for (i = 0; i < width; i += 4){
-                       __m128d vecd0 = _mm_set_pd(v1[i + 1], v1[i]);
-                       __m128d vecd2 = _mm_set_pd(v1[i + 3], v1[i + 2]);
+#if (USE_X86_EXT_INTRIN >= 6) // sse4.1 , _mm_ cvtepi16_epi32()
+                       __m128i vi16 = _mm_loadu_si128((__m128i *)&v1[i]);
+                       __m128i vi32 = _mm_cvtepi16_epi32(vi16);
+#else
+                       __m128i vi32 = _mm_set_epi32(v1[i + 3], v1[i + 2], v1[i + 1], v1[i]);
+#endif
+                       __m128d vecd0 = _mm_cvtepi32_pd(vi32);
+                       __m128d vecd2 = _mm_cvtepi32_pd(_mm_shuffle_epi32(vi32, 0x4E)); // swap lo64 hi64
                        sum1 = MM_FMA_PD(vecd0, _mm_load_pd(&coef[i]), sum1);
                        sum2 = MM_FMA_PD(vecd2, _mm_load_pd(&coef[i + 2]), sum2);
                }
@@ -3046,7 +3028,7 @@ static DATA_T resample_lanczos_int32(const sample_t *srci, splen_t ofs, resample
        width *= 2;
 #if (USE_X86_EXT_INTRIN >= 8) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
        if(width >= 16 && !(width & 0x7)){
-               __m256d sum = _mm256_set_pd(0, 0, 0, 0);
+               __m256d sum = _mm256_setzero_pd();
                __m128d sum1, sum2;     
                for (i = 0; i < width; i += 8){
 #if (USE_X86_EXT_INTRIN >= 9)
@@ -3068,8 +3050,8 @@ static DATA_T resample_lanczos_int32(const sample_t *srci, splen_t ofs, resample
        }else
 #elif (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
        if(width >= 16 && !(width & 0x3)){
-               __m128d sum1 = _mm_set_pd(0, 0);
-               __m128d sum2 = _mm_set_pd(0, 0);
+               __m128d sum1 = _mm_setzero_pd();
+               __m128d sum2 = _mm_setzero_pd();
                for (i = 0; i < width; i += 4){
                        __m128i vec32i0 = _mm_loadu_si128((__m128i *)&v1[i]);
                        sum1 = MM_FMA_PD(_mm_cvtepi32_pd(vec32i0), _mm_load_pd(&coef[i]), sum1);
@@ -3180,7 +3162,7 @@ static DATA_T resample_lanczos_float(const sample_t *srci, splen_t ofs, resample
        width *= 2;
 #if (USE_X86_EXT_INTRIN >= 8) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
        if(width >= 16 && !(width & 0x7)){
-               __m256d sum = _mm256_set_pd(0, 0, 0, 0);
+               __m256d sum = _mm256_setzero_pd();
                __m128d sum1, sum2;     
                for (i = 0; i < width; i += 8){
                        __m256 vecf = _mm256_loadu_ps(&v1[i]);
@@ -3197,8 +3179,8 @@ static DATA_T resample_lanczos_float(const sample_t *srci, splen_t ofs, resample
        }else
 #elif (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
        if(width >= 16 && !(width & 0x3)){
-               __m128d sum1 = _mm_set_pd(0, 0);
-               __m128d sum2 = _mm_set_pd(0, 0);
+               __m128d sum1 = _mm_setzero_pd();
+               __m128d sum2 = _mm_setzero_pd();
                for (i = 0; i < width; i += 4){
                        __m128 vecf0 = _mm_loadu_ps(&v1[i]);
                        sum1 = MM_FMA_PD(_mm_cvtps_pd(vecf0), _mm_load_pd(&coef[i]), sum1);
@@ -3309,7 +3291,7 @@ static DATA_T resample_lanczos_double(const sample_t *srci, splen_t ofs, resampl
        width *= 2;
 #if (USE_X86_EXT_INTRIN >= 8) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
        if(width >= 16 && !(width & 0x7)){
-               __m256d sum = _mm256_set_pd(0, 0, 0, 0);
+               __m256d sum = _mm256_setzero_pd();
                __m128d sum1, sum2;     
                for (i = 0; i < width; i += 8){
                        sum = MM256_FMA_PD(_mm256_loadu_pd(&v1[i]), _mm256_load_pd(&coef[i]), sum);
@@ -3323,8 +3305,8 @@ static DATA_T resample_lanczos_double(const sample_t *srci, splen_t ofs, resampl
        }else
 #elif (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
        if(width >= 16 && !(width & 0x3)){
-               __m128d sum1 = _mm_set_pd(0, 0);
-               __m128d sum2 = _mm_set_pd(0, 0);
+               __m128d sum1 = _mm_setzero_pd();
+               __m128d sum2 = _mm_setzero_pd();
                for (i = 0; i < width; i += 4){         
                        sum1 = MM_FMA_PD(_mm_loadu_pd(&v1[i]), _mm_load_pd(&coef[i]), sum1);
                        sum2 = MM_FMA_PD(_mm_loadu_pd(&v1[i + 2]), _mm_load_pd(&coef[i + 2]), sum2);
@@ -4111,49 +4093,10 @@ void uninitialize_resampler_coeffs(void)
 
 /*************** optimize linear resample *****************/
 #if defined(PRECALC_LOOPS)
-//#define LO_LOOP_CALC // interpolation sample loop calc
 #define LO_OPTIMIZE_INCREMENT
 
 static inline DATA_T resample_linear_single(Voice *vp)
 {      
-#ifdef LO_LOOP_CALC // interpolation sample loop calc
-/*
-\95â\8a®\93_\83\8b\81[\83v\90Ü\82è\95Ô\82µ\91Î\89\9e
-\82¾\82ª\8dÅ\93K\89»\82È\82Ì\82É\95\89\89×\82Ì\96â\91è\82ª\81E\81E
-SF2\8ed\97l\8f\80\8b\92(\83\8b\81[\83v\91O\8cã4\83T\83\93\83v\83\8b\82Ü\82½\82Í PAT(\83\8b\81[\83v\91O\8cã1\83T\83\93\83v\83\8b\81H) \82Å\82 \82ê\82Î\82»\82à\82»\82à\95s\97v\82È\82à\82Ì
-*/
-       sample_t *src = vp->sample->data;
-       const resample_rec_t *resrc = &vp->resrc;
-    const fract_t ofsf = resrc->offset & FRACTION_MASK;
-    const spos_t ofsls = resrc->loop_start >> FRACTION_BITS;
-    const spos_t ofsle = resrc->loop_end >> FRACTION_BITS;
-       const spos_t ofsi = resrc->offset >> FRACTION_BITS;
-       spos_t ofsi2 = ofsi + 1;
-       int32 v1, v2;
-               
-       switch(resrc->mode){
-       case RESAMPLE_MODE_PLAIN:
-               // safe end+128 sample
-               break;
-       case RESAMPLE_MODE_LOOP:
-               if(ofsi2 >= ofsle)
-                       ofsi2 = ofsi2 - (ofsle - ofsls);
-               break;
-       case RESAMPLE_MODE_BIDIR_LOOP:          
-               if(resrc->increment >= 0){
-                       if(ofsi2 >= ofsle)
-                               ofsi2 = (ofsle << 1) - ofsi2;
-               }
-               break;
-       }
-       v1 = src[ofsi];
-       v2 = src[ofsi2];        
-#if defined(DATA_T_DOUBLE) || defined(DATA_T_FLOAT)
-    return ((FLOAT_T)v1 + (FLOAT_T)(v2 - v1) * (FLOAT_T)ofsf * div_fraction) * OUT_INT16;
-#else // DATA_T_IN32
-    return (v1 + imuldiv_fraction((v2 - v1), ofsf);
-#endif
-#else  
        sample_t *src = vp->sample->data;
     const fract_t ofsf = vp->resrc.offset & FRACTION_MASK;
        const spos_t ofsi = vp->resrc.offset >> FRACTION_BITS;
@@ -4164,10 +4107,9 @@ SF2
 #else // DATA_T_IN32
     return (v1 + imuldiv_fraction((v2 - v1), ofsf));
 #endif
-#endif // LO_LOOP_CALC
 }
 
-#if 0// (USE_X86_EXT_INTRIN >= 9)
+#if (USE_X86_EXT_INTRIN >= 9)
 // offset:int32*8, resamp:float*8
 // \83\8b\81[\83v\93à\95\94\82Ìoffset\8cv\8eZ\82ðint32\92l\88æ\82É\82·\82é , (sample_increment * (req_count+1)) < int32 max
 static inline DATA_T *resample_linear_multi(Voice *vp, DATA_T *dest, int32 req_count, int32 *out_count)
@@ -4180,7 +4122,7 @@ static inline DATA_T *resample_linear_multi(Voice *vp, DATA_T *dest, int32 req_c
        sample_t *src = vp->sample->data + (prec_offset >> FRACTION_BITS);
        int32 start_offset = (int32)(resrc->offset - prec_offset); // (offset\8cv\8eZ\82ðint32\92l\88æ\82É\82·\82é(SIMD\97p
        int32 inc = resrc->increment;
-       __m256i vint = _mm256_set_epi32(inc * 7, inc * 6, inc * 5, inc * 4, inc * 3, inc * 2, inc, 0)
+       __m256i vinit = _mm256_set_epi32(inc * 7, inc * 6, inc * 5, inc * 4, inc * 3, inc * 2, inc, 0);
        __m256i vofs = _mm256_add_epi32(_mm256_set1_epi32(start_offset), vinit);
        __m256i vinc = _mm256_set1_epi32(inc * 8), vfmask = _mm256_set1_epi32((int32)FRACTION_MASK);
        __m256 vec_divo = _mm256_set1_ps(DIV_15BIT), vec_divf = _mm256_set1_ps(div_fraction);
@@ -4196,9 +4138,9 @@ static inline DATA_T *resample_linear_multi(Voice *vp, DATA_T *dest, int32 req_c
        for(i = 0; i < count; i += 8) {
        __m256i vofsi1 = _mm256_srli_epi32(vofs, FRACTION_BITS);
        __m256i vofsi2 = _mm256_add_epi32(vofsi1, vvar1);
-       int32 ofs0 = _mm_cvtsi128_si32(_mm256_extracti128si256(vofsi1, 0x0));
-       __m256i vin1 = _mm256_loadu_si256((__m256i *)&src[ofs0]); // int16*16
-       __m256i vofsib = _mm256_permutevar8x32_epi32(vofsi1, _mm256_setzero_epi32()); 
+       int32 ofs0 = _mm_cvtsi128_si32(_mm256_extracti128_si256(vofsi1, 0x0));
+       __m128i vin1 = _mm_loadu_si128((__m128i *)&src[ofs0]); // int16*16
+       __m256i vofsib = _mm256_permutevar8x32_epi32(vofsi1, _mm256_setzero_si256()); 
        __m256i vofsub1 = _mm256_sub_epi32(vofsi1, vofsib); 
        __m256i vofsub2 = _mm256_sub_epi32(vofsi2, vofsib); 
        __m256 vvf1 = _mm256_cvtepi32_ps(_mm256_cvtepi16_epi32(vin1)); // int16 to float (float\95Ï\8a·\82ÅH128bit\82Í\8fÁ\82¦\82é
@@ -4265,7 +4207,7 @@ static inline DATA_T *resample_linear_multi(Voice *vp, DATA_T *dest, int32 req_c
        dest += 8;
 #else // DATA_T_IN32
        __m256 vec_out = MM256_FMA_PS(_mm256_sub_ps(vv2, vv1), _mm256_mul_ps(vfp, vec_divf), vv1);
-       _mm256_storeu_si256(__m256i *)dest, _mm256_cvtps_epi32(vec_out));
+       _mm256_storeu_si256((__m256i *)dest, _mm256_cvtps_epi32(vec_out));
        dest += 8;
 #endif
        vofs = _mm256_add_epi32(vofs, vinc);
@@ -4276,37 +4218,33 @@ static inline DATA_T *resample_linear_multi(Voice *vp, DATA_T *dest, int32 req_c
 
        for(; i < count; i += 8) {
        __m256i vofsi = _mm256_srli_epi32(vofs, FRACTION_BITS);
-#if !(defined(_MSC_VER) || defined(MSC_VER))
-       int32 *ofsp = (int32 *)vofsi;
-       __m128i vin1 = _mm_loadu_si128((__m128i *)&src[ofsp[0]]); // ofsi\82Æofsi+1\82ð\83\8d\81[\83h
-       __m128i vin2 = _mm_loadu_si128((__m128i *)&src[ofsp[1]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
-       __m128i vin3 = _mm_loadu_si128((__m128i *)&src[ofsp[2]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
-       __m128i vin4 = _mm_loadu_si128((__m128i *)&src[ofsp[3]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
-       __m128i vin5 = _mm_loadu_si128((__m128i *)&src[ofsp[4]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
-       __m128i vin6 = _mm_loadu_si128((__m128i *)&src[ofsp[5]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
-       __m128i vin7 = _mm_loadu_si128((__m128i *)&src[ofsp[6]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
-       __m128i vin8 = _mm_loadu_si128((__m128i *)&src[ofsp[7]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
+#if 1
+       __m256i vsrc01 = MM256_I32GATHER_I32((const int*)src, vofsi, 2);
+       __m256i vsrc0 = _mm256_srai_epi32(_mm256_slli_epi32(vsrc01, 16), 16);
+       __m256i vsrc1 = _mm256_srai_epi32(vsrc01, 16);
+       __m256 vv1 = _mm256_cvtepi32_ps(vsrc0);
+       __m256 vv2 = _mm256_cvtepi32_ps(vsrc1);
 #else
-       __m128i vin1 = _mm_loadu_si128((__m128i *)&src[vofsi.m256i_i32[0]]); // ofsi\82Æofsi+1\82ð\83\8d\81[\83h
-       __m128i vin2 = _mm_loadu_si128((__m128i *)&src[vofsi.m256i_i32[1]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
-       __m128i vin3 = _mm_loadu_si128((__m128i *)&src[vofsi.m256i_i32[2]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
-       __m128i vin4 = _mm_loadu_si128((__m128i *)&src[vofsi.m256i_i32[3]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
-       __m128i vin5 = _mm_loadu_si128((__m128i *)&src[vofsi.m256i_i32[4]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
-       __m128i vin6 = _mm_loadu_si128((__m128i *)&src[vofsi.m256i_i32[5]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
-       __m128i vin7 = _mm_loadu_si128((__m128i *)&src[vofsi.m256i_i32[6]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
-       __m128i vin8 = _mm_loadu_si128((__m128i *)&src[vofsi.m256i_i32[7]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
-#endif
+       __m128i vin1 = _mm_loadu_si128((__m128i *)&src[MM256_EXTRACT_I32(vofsi,0)]); // ofsi\82Æofsi+1\82ð\83\8d\81[\83h
+       __m128i vin2 = _mm_loadu_si128((__m128i *)&src[MM256_EXTRACT_I32(vofsi,1)]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
+       __m128i vin3 = _mm_loadu_si128((__m128i *)&src[MM256_EXTRACT_I32(vofsi,2)]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
+       __m128i vin4 = _mm_loadu_si128((__m128i *)&src[MM256_EXTRACT_I32(vofsi,3)]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
+       __m128i vin5 = _mm_loadu_si128((__m128i *)&src[MM256_EXTRACT_I32(vofsi,4)]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
+       __m128i vin6 = _mm_loadu_si128((__m128i *)&src[MM256_EXTRACT_I32(vofsi,5)]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
+       __m128i vin7 = _mm_loadu_si128((__m128i *)&src[MM256_EXTRACT_I32(vofsi,6)]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
+       __m128i vin8 = _mm_loadu_si128((__m128i *)&src[MM256_EXTRACT_I32(vofsi,7)]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
        __m128i vin12 = _mm_unpacklo_epi16(vin1, vin2); // [v11v21]e96,[v12v22]e96 to [v11v12v21v22]e64
        __m128i vin34 = _mm_unpacklo_epi16(vin3, vin4); // [v13v23]e96,[v14v24]e96 to [v13v14v23v24]e64
        __m128i vin56 = _mm_unpacklo_epi16(vin5, vin6); // \93¯\82
        __m128i vin78 = _mm_unpacklo_epi16(vin7, vin8); // \93¯\82
-       __m128i vi1234 = _mm_unpacklo_epi32(vin12, vin34); // [v11v12,v21v22]e64,[v13v14,v23v24]e64 to [v11v12v13v14,v21v22v23v24]e0
-       __m128i vi5678 = _mm_unpacklo_epi32(vin56, vin78); // [v15v16,v25v26]e64,[v17v18,v27v28]e64 to [v15v16v17v18,v25v26v27v28]e0
+       __m128i vin1234 = _mm_unpacklo_epi32(vin12, vin34); // [v11v12,v21v22]e64,[v13v14,v23v24]e64 to [v11v12v13v14,v21v22v23v24]e0
+       __m128i vin5678 = _mm_unpacklo_epi32(vin56, vin78); // [v15v16,v25v26]e64,[v17v18,v27v28]e64 to [v15v16v17v18,v25v26v27v28]e0
        __m256i viall = MM256_SET2X_SI256(vin1234, vin5678); // 256bit =128bit+128bit   
        __m256i vsi16_1 = _mm256_permute4x64_epi64(viall, 0xD8); // v1\82ðL128bit\82É\82Ü\82Æ\82ß
        __m256i vsi16_2 = _mm256_permute4x64_epi64(viall, 0x8D); // v2\82ðL128bit\82É\82Ü\82Æ\82ß
-       __m256 vv1 = _mm256_cvtepi32_ps(_mm256_cvtepi16_epi32(vsi16_1)); // int16 to float (float\95Ï\8a·\82ÅH128bit\82Í\8fÁ\82¦\82é
-       __m256 vv2 = _mm256_cvtepi32_ps(_mm256_cvtepi16_epi32(vsi16_2)); // int16 to float (float\95Ï\8a·\82ÅH128bit\82Í\8fÁ\82¦\82é
+       __m256 vv1 = _mm256_cvtepi32_ps(_mm256_cvtepi16_epi32(_mm256_extracti128_si256(vsi16_1, 0))); // int16 to float (float\95Ï\8a·\82ÅH128bit\82Í\8fÁ\82¦\82é
+       __m256 vv2 = _mm256_cvtepi32_ps(_mm256_cvtepi16_epi32(_mm256_extracti128_si256(vsi16_2, 0))); // int16 to float (float\95Ï\8a·\82ÅH128bit\82Í\8fÁ\82¦\82é
+#endif
        __m256 vfp = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256(vofs, vfmask)), vec_divf);
 #if defined(DATA_T_DOUBLE)
        __m256 vec_out = _mm256_mul_ps(MM256_FMA_PS(_mm256_sub_ps(vv2, vv1), _mm256_mul_ps(vfp, vec_divf), vv1), vec_divo);
@@ -4325,7 +4263,7 @@ static inline DATA_T *resample_linear_multi(Voice *vp, DATA_T *dest, int32 req_c
 #endif
        vofs = _mm256_add_epi32(vofs, vinc);
        }
-       resrc->offset = prec_offset + (splen_t)(vofs.m256i_i32[0]);
+       resrc->offset = prec_offset + (splen_t)(MM256_EXTRACT_I32(vofs, 0));
        *out_count = i;
     return dest;
 }
@@ -4377,7 +4315,7 @@ static inline DATA_T *resample_linear_multi(Voice *vp, DATA_T *dest, int32 req_c
        dest += 4;
 #elif defined(DATA_T_FLOAT) // DATA_T_FLOAT 
        __m128 vec_out = _mm_mul_ps(MM_FMA_PS(_mm_sub_ps(vv2, vv1), vfp, vv1), vec_divo);
-       _mm256_storeu_ps(dest, vec_out);
+       _mm_storeu_ps(dest, vec_out);
        dest += 4;
 #else // DATA_T_IN32
        __m128 vec_out = MM_FMA_PS(_mm_sub_ps(vv2, vv1), vfp, vv1);
@@ -4389,7 +4327,7 @@ static inline DATA_T *resample_linear_multi(Voice *vp, DATA_T *dest, int32 req_c
        }else
 #if 0 // 2set
        if(inc < opt_inc2){ // 2\83Z\83b\83g
-       const __m128i vvar4 = _mm_set1_epi32(4);
+       const __m128i vvar3 = _mm_set1_epi32(3);
        for(i = 0; i < count; i += 4) {
        __m128i vofsi1 = _mm_srli_epi32(vofs, FRACTION_BITS);
        __m128i vofsi2 = _mm_add_epi32(vofsi1, vvar1);
@@ -4401,12 +4339,8 @@ static inline DATA_T *resample_linear_multi(Voice *vp, DATA_T *dest, int32 req_c
        __m128i vofsib = _mm_shuffle_epi32(vofsi1, 0x0); 
        __m128i vofsub1 = _mm_sub_epi32(vofsi1, vofsib); 
        __m128i vofsub2 = _mm_sub_epi32(vofsi2, vofsib); 
-       __m128i vrmg1 = _mm_cmpgt_epi32(vofsub1, vvar4); // \83I\83t\83Z\83b\83g\8d·\82ª4\92´\89ß\82Ì\8fð\8c\8f\82Å\83}\83X\83N\8dì\90¬
-       __m128i vrmg2 = _mm_cmpgt_epi32(vofsub2, vvar4); // \83I\83t\83Z\83b\83g\8d·\82ª4\92´\89ß\82Ì\8fð\8c\8f\82Å\83}\83X\83N\8dì\90¬
-       __m128i vrme1 = _mm_cmpeq_epi32(vofsub1, vvar4); // \83I\83t\83Z\83b\83g\8d·\82ª4\93¯\93\99\82Ì\8fð\8c\8f\82Å\83}\83X\83N\8dì\90¬
-       __m128i vrme2 = _mm_cmpeq_epi32(vofsub2, vvar4); // \83I\83t\83Z\83b\83g\8d·\82ª4\93¯\93\99\82Ì\8fð\8c\8f\82Å\83}\83X\83N\8dì\90¬
-       __m128i vrm1 = _mm_or_si128(vrmg1, vrme1); // 4\88È\8fã\82É\82·\82é\82½\82ß\82É\83}\83X\83N\8d\87\90¬
-       __m128i vrm2 = _mm_or_si128(vrmg2, vrme2); // 4\88È\8fã\82É\82·\82é\82½\82ß\82É\83}\83X\83N\8d\87\90¬
+       __m128i vrm1 = _mm_cmpgt_epi32(vofsub1, vvar3); // \83I\83t\83Z\83b\83g\8d·\82ª4\88È\8fã\82Ì\8fð\8c\8f\82Å\83}\83X\83N\8dì\90¬
+       __m128i vrm2 = _mm_cmpgt_epi32(vofsub2, vvar3); // \83I\83t\83Z\83b\83g\8d·\82ª4\88È\8fã\82Ì\8fð\8c\8f\82Å\83}\83X\83N\8dì\90¬
        // src2 offset\82ª\89º\88Ê2bit\82Ì\82Ý\97L\8cø\82Å\82 \82ê\82Î4\82ð\92´\82¦\82é\95\94\95ª\82É\83}\83X\83N\95s\97v\82Ì\82Í\82¸
        __m128 vv11 = _mm_permutevar_ps(vvf1, vofsub1); // v1 ofsi
        __m128 vv12 = _mm_permutevar_ps(vvf2, vofsub1); // v1 ofsi
@@ -4434,7 +4368,7 @@ static inline DATA_T *resample_linear_multi(Voice *vp, DATA_T *dest, int32 req_c
        }else
 #endif // 2set
                
-// x86\82¾\82Æ\82Ù\82Æ\82ñ\82Ç\95Ï\82í\82ç\82È\82¢ x64\82¾\82Æ\82â\82â\91¬\82¢
+// x86\82¾\82Æ\82Ù\82Æ\82ñ\82Ç\95Ï\82í\82ç\82È\82¢ x64\82¾\82Æ\82â\82â\91¬\82¢ 1.5%\81E\81E
 #elif (USE_X86_EXT_INTRIN >= 5) && defined(IX64CPU)
        // \8dÅ\93K\89»\83\8c\81[\83g = (\83\8d\81[\83h\83f\81[\83^\90\94 - \8f\89\8aú\83I\83t\83Z\83b\83g\8f¬\90\94\95\94\82Ì\8dÅ\91å\92l(1\96¢\96\9e) - \95â\8aÔ\83|\83C\83\93\83g\90\94(linear\82Í1) ) / \83I\83t\83Z\83b\83g\83f\81[\83^\90\94
        // \83\8d\81[\83h\83f\81[\83^\90\94\82Í_mm_shuffle_epi8\88µ\82¦\82é\82Ìint16\82Ì8\83Z\83b\83g\82É\82È\82é (=int8*16)
@@ -4489,7 +4423,7 @@ static inline DATA_T *resample_linear_multi(Voice *vp, DATA_T *dest, int32 req_c
 #endif
        vofs = _mm_add_epi32(vofs, vinc);
        }
-       }       
+       }else
 #endif 
 #endif // LO_OPTIMIZE_INCREMENT
                
@@ -4497,18 +4431,10 @@ static inline DATA_T *resample_linear_multi(Voice *vp, DATA_T *dest, int32 req_c
        const __m128 vec_divo = _mm_set1_ps(DIV_15BIT);
        for(; i < count; i += 4) {
        __m128i vofsi = _mm_srli_epi32(vofs, FRACTION_BITS);
-#if !(defined(_MSC_VER) || defined(MSC_VER))
-       int32 *ofsp = (int32 *)vofsi;
-       __m128i vin1 = _mm_loadu_si128((__m128i *)&src[ofsp[0]]); // ofsi\82Æofsi+1\82ð\83\8d\81[\83h
-       __m128i vin2 = _mm_loadu_si128((__m128i *)&src[ofsp[1]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
-       __m128i vin3 = _mm_loadu_si128((__m128i *)&src[ofsp[2]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
-       __m128i vin4 = _mm_loadu_si128((__m128i *)&src[ofsp[3]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
-#else
-       __m128i vin1 = _mm_loadu_si128((__m128i *)&src[vofsi.m128i_i32[0]]); // ofsi\82Æofsi+1\82ð\83\8d\81[\83h
-       __m128i vin2 = _mm_loadu_si128((__m128i *)&src[vofsi.m128i_i32[1]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
-       __m128i vin3 = _mm_loadu_si128((__m128i *)&src[vofsi.m128i_i32[2]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
-       __m128i vin4 = _mm_loadu_si128((__m128i *)&src[vofsi.m128i_i32[3]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
-#endif         
+       __m128i vin1 = _mm_loadu_si128((__m128i *)&src[MM_EXTRACT_I32(vofsi,0)]); // ofsi\82Æofsi+1\82ð\83\8d\81[\83h
+       __m128i vin2 = _mm_loadu_si128((__m128i *)&src[MM_EXTRACT_I32(vofsi,1)]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
+       __m128i vin3 = _mm_loadu_si128((__m128i *)&src[MM_EXTRACT_I32(vofsi,2)]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
+       __m128i vin4 = _mm_loadu_si128((__m128i *)&src[MM_EXTRACT_I32(vofsi,3)]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶ 
        __m128i vin12 = _mm_unpacklo_epi16(vin1, vin2); // [v11v21]e96,[v12v22]e96 to [v11v12v21v22]e64
        __m128i vin34 = _mm_unpacklo_epi16(vin3, vin4); // [v13v23]e96,[v14v24]e96 to [v13v14v23v24]e64
        __m128i vi16 = _mm_unpacklo_epi32(vin12, vin34); // [v11v12,v21v22]e64,[v13v14,v23v24]e64 to [v11v12v13v14,v21v22v23v24]e0
@@ -4545,7 +4471,7 @@ static inline DATA_T *resample_linear_multi(Voice *vp, DATA_T *dest, int32 req_c
        vofs = _mm_add_epi32(vofs, vinc);
        }
        }
-       resrc->offset = prec_offset + (splen_t)(vofs.m128i_i32[0]);
+       resrc->offset = prec_offset + (splen_t)(MM_EXTRACT_I32(vofs,0));
        *out_count = i;
     return dest;
 }
@@ -4585,20 +4511,10 @@ static inline DATA_T *resample_linear_multi(Voice *vp, DATA_T *dest, int32 req_c
                vv2 = _mm_cvt_si2ss(vv2, src[++ofsi]), vv2 = _mm_shuffle_ps(vv2, vv2, 0x1b);                    
 #if defined(DATA_T_DOUBLE)
                vec_out = _mm_mul_ps(MM_FMA_PS(_mm_sub_ps(vv2, vv1), _mm_mul_ps(vfp, vec_divf), vv1), vec_divo);
-#if !(defined(_MSC_VER) || defined(MSC_VER))
-               {
-               float *out = (float *)vec_out;
-               *dest++ = (DATA_T)out[0];
-               *dest++ = (DATA_T)out[1];
-               *dest++ = (DATA_T)out[2];
-               *dest++ = (DATA_T)out[3];
-               }
-#else
-               *dest++ = (DATA_T)vec_out.m128_f32[0];
-               *dest++ = (DATA_T)vec_out.m128_f32[1];
-               *dest++ = (DATA_T)vec_out.m128_f32[2];
-               *dest++ = (DATA_T)vec_out.m128_f32[3];
-#endif
+               *dest++ = (DATA_T)MM_EXTRACT_F32(vec_out,0);
+               *dest++ = (DATA_T)MM_EXTRACT_F32(vec_out,1);
+               *dest++ = (DATA_T)MM_EXTRACT_F32(vec_out,2);
+               *dest++ = (DATA_T)MM_EXTRACT_F32(vec_out,3);
 #elif defined(DATA_T_FLOAT) // DATA_T_FLOAT
                _mm_storeu_ps(dest, _mm_mul_ps(MM_FMA_PS(_mm_sub_ps(vv2, vv1), _mm_mul_ps(vfp, vec_divf), vv1), vec_divo));
                dest += 4;
@@ -4668,16 +4584,20 @@ static inline DATA_T *resample_linear_multi(Voice *vp, DATA_T *dest, int32 req_c
 }
 
 #else // normal
-
+// \83\8b\81[\83v\93à\95\94\82Ìoffset\8cv\8eZ\82ðint32\92l\88æ\82É\82·\82é , (sample_increment * (req_count+1)) < int32 max
 static inline DATA_T *resample_linear_multi(Voice *vp, DATA_T *dest, int32 req_count, int32 *out_count)
 {
        int32 i;
-       resample_rec_t *resrc = &vp->resrc;
-       sample_t *src = vp->sample->data;
+       resample_rec_t *resrc = &vp->resrc;     
+       splen_t prec_offset = resrc->offset & INTEGER_MASK;
+       sample_t *src = vp->sample->data + (prec_offset >> FRACTION_BITS);
+       const int32 start_offset = (int32)(resrc->offset - prec_offset); // offset\8cv\8eZ\82ðint32\92l\88æ\82É\82·\82é(SIMD\97p
+       int32 ofs = (int32)(resrc->offset & FRACTION_MASK);
+       const int32 inc = resrc->increment;
 
        for(i = 0; i < req_count; i++) {        
-               spos_t ofsi = resrc->offset >> FRACTION_BITS;
-               fract_t ofsf = resrc->offset & FRACTION_MASK;           
+               int32 ofsi = ofs >> FRACTION_BITS;
+               int32 ofsf = ofs & FRACTION_MASK;       
                int32 v1 = src[ofsi];
                int32 v2 = src[ofsi + 1];       
        //      *dest++ = ((FLOAT_T)v1 + (FLOAT_T)(v2 - v1) * (FLOAT_T)ofsf * div_fraction) * OUT_INT16;
@@ -4686,24 +4606,20 @@ static inline DATA_T *resample_linear_multi(Voice *vp, DATA_T *dest, int32 req_c
 #else
                *dest++ = (v1 + imuldiv_fraction((v2 - v1), ofsf);
 #endif
-               resrc->offset += resrc->increment;      
+               ofs += inc;
        }
+       resrc->offset = prec_offset + (splen_t)ofs;
        *out_count = i;
     return dest;
 }
 #endif
 
-
-
 static void lo_rs_plain(Voice *vp, DATA_T *dest, int32 count)
 {
        /* Play sample until end, then free the voice. */
        resample_rec_t *resrc = &vp->resrc;
        int32 i = 0, j;
 
-#ifdef LO_LOOP_CALC
-       resrc->mode = RESAMPLE_MODE_PLAIN;
-#endif
        if (resrc->increment < 0) resrc->increment = -resrc->increment; /* In case we're coming out of a bidir loop */
        j = PRECALC_LOOP_COUNT(resrc->offset, resrc->data_length, resrc->increment) + 2; // safe end+128 sample
        if (j > count) {j = count;}
@@ -4722,9 +4638,6 @@ static void lo_rs_loop(Voice *vp, DATA_T *dest, int32 count)
        resample_rec_t *resrc = &vp->resrc;
        int32 i = 0, j;
        
-#ifdef LO_LOOP_CALC
-       resrc->mode = RESAMPLE_MODE_LOOP;
-#endif
        j = PRECALC_LOOP_COUNT(resrc->offset, resrc->loop_end, resrc->increment) - 2; // 2point interpolation
        if (j > count) {j = count;}
        else if(j < 0) {j = 0;}
@@ -4742,9 +4655,6 @@ static void lo_rs_bidir(Voice *vp, DATA_T *dest, int32 count)
        resample_rec_t *resrc = &vp->resrc;
        int32 i = 0, j = 0;     
 
-#ifdef LO_LOOP_CALC    
-       resrc->mode = RESAMPLE_MODE_BIDIR_LOOP;
-#endif
        if (resrc->increment > 0){
                j = PRECALC_LOOP_COUNT(resrc->offset, resrc->loop_end, resrc->increment) - 2; // 2point interpolation
                if (j > count) {j = count;}
@@ -4786,113 +4696,1564 @@ static inline void resample_voice_linear_optimize(Voice *vp, DATA_T *ptr, int32
                lo_rs_loop(vp, ptr, count);     /* loop */
        }               
 }
-
 #endif /* optimize linear resample */
 
+/*************** optimize linear float resample *****************/
+#if defined(PRECALC_LOOPS)
+#define LO_OPTIMIZE_INCREMENT
 
+static inline DATA_T resample_linear_float_single(Voice *vp)
+{      
+    const float *src = (const float*)vp->sample->data;
+    const fract_t ofsf = vp->resrc.offset & FRACTION_MASK;
+       const spos_t ofsi = vp->resrc.offset >> FRACTION_BITS;
+#if defined(DATA_T_DOUBLE) || defined(DATA_T_FLOAT)
+    FLOAT_T v1 = src[ofsi], v2 = src[ofsi + 1], fp = ofsf;
+    return (v1 + (v2 - v1) * fp * div_fraction); // FLOAT_T
+#else // DATA_T_IN32
+    int32 v1 = (int32)(src[ofsi] * M_16BIT), v2 = (int32)(src[ofsi + 1] * M_16BIT);
+       return v1 + imuldiv_fraction(v2 - v1, ofsf);
+#endif
+}
 
-
-/*************** resampling with fixed increment *****************/
-///r
-static void rs_plain_c(int v, DATA_T *ptr, int32 count)
+#if (USE_X86_EXT_INTRIN >= 3)
+// offset:int32*4, resamp:float*4
+// \83\8b\81[\83v\93à\95\94\82Ìoffset\8cv\8eZ\82ðint32\92l\88æ\82É\82·\82é , (sample_increment * (req_count+1)) < int32 max
+static inline DATA_T *resample_linear_float_multi(Voice *vp, DATA_T *dest, int32 req_count, int32 *out_count)
 {
-    Voice *vp = &voice[v];
-    DATA_T *dest = ptr + vp->resrc.buffer_offset;
-       cache_t *src = (cache_t *)vp->sample->data;
-       int32 count2 = count;
-    splen_t ofs, i, le;
-       
-    le = vp->sample->loop_end >> FRACTION_BITS;
-    ofs = vp->resrc.offset >> FRACTION_BITS;
+       resample_rec_t *resrc = &vp->resrc;
+       int32 i = 0;
+       const uint32 req_count_mask = ~(0x3);
+       const int32 count = req_count & req_count_mask;
+       splen_t prec_offset = resrc->offset & INTEGER_MASK;
+       float *src = (float *)vp->sample->data + (prec_offset >> FRACTION_BITS);
+       const int32 start_offset = (int32)(resrc->offset - prec_offset); // offset\8cv\8eZ\82ðint32\92l\88æ\82É\82·\82é(SIMD\97p
+       const int32 inc = resrc->increment;
+       __m128i vofs = _mm_add_epi32(_mm_set1_epi32(start_offset), _mm_set_epi32(inc * 3, inc * 2, inc, 0));
+       const __m128i vinc = _mm_set1_epi32(inc * 4), vfmask = _mm_set1_epi32((int32)FRACTION_MASK);
+       const __m128 vec_divf = _mm_set1_ps(div_fraction);
+       const __m128 vec_divo = _mm_set1_ps(M_15BIT);
+       for(; i < count; i += 4) {
+       __m128i vofsi = _mm_srli_epi32(vofs, FRACTION_BITS);
+       __m128 vin1 = _mm_loadu_ps(&src[MM_EXTRACT_I32(vofsi,0)]); // ofsi\82Æofsi+1\82ð\83\8d\81[\83h [v11v12v13v14]
+       __m128 vin2 = _mm_loadu_ps(&src[MM_EXTRACT_I32(vofsi,1)]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶ [v21v22v23v24]
+       __m128 vin3 = _mm_loadu_ps(&src[MM_EXTRACT_I32(vofsi,2)]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶ [v31v32v33v34]
+       __m128 vin4 = _mm_loadu_ps(&src[MM_EXTRACT_I32(vofsi,3)]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶ [v41v42v43v44] 
+    __m128 vin12 = _mm_shuffle_ps(vin1, vin2, 0x44); // [v11,v12,v21,v22]
+    __m128 vin34 = _mm_shuffle_ps(vin3, vin4, 0x44); // [v31,v32,v41,v42]
+    __m128 vv1 = _mm_shuffle_ps(vin12, vin34, 0x88); // [v11,v21,v31,v41]
+    __m128 vv2 = _mm_shuffle_ps(vin12, vin34, 0xDD); // [v12,v22,v32,v42]
+       __m128 vfp = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128(vofs, vfmask)), vec_divf);
+       __m128 vec_out = MM_FMA_PS(_mm_sub_ps(vv2, vv1), vfp, vv1);
+#if defined(DATA_T_DOUBLE)
+#if (USE_X86_EXT_INTRIN >= 8)
+       _mm256_storeu_pd(dest, _mm256_cvtps_pd(vec_out));
+       dest += 4;
+#else
+       _mm_storeu_pd(dest, _mm_cvtps_pd(vec_out));
+       dest += 2;
+       _mm_storeu_pd(dest, _mm_cvtps_pd(_mm_movehl_ps(vec_out, vec_out)));
+       dest += 2;
+#endif
+#elif defined(DATA_T_FLOAT) // DATA_T_FLOAT
+       _mm_storeu_ps(dest, vec_out);
+       dest += 4;
+#else // DATA_T_IN32
+       _mm_storeu_si128((__m128i *)dest, _mm_cvtps_epi32(_mm_mul_ps(vec_out, vec_divo)));
+       dest += 4;
+#endif
+       vofs = _mm_add_epi32(vofs, vinc);
+       }
+       resrc->offset = prec_offset + (splen_t)(MM_EXTRACT_I32(vofs,0));
+       *out_count = i;
+    return dest;
+}
 
-    i = ofs + count2;
-    if(i > le)
-               i = le;
-       count2 = i - ofs;
+#else // normal
+// \83\8b\81[\83v\93à\95\94\82Ìoffset\8cv\8eZ\82ðint32\92l\88æ\82É\82·\82é , (sample_increment * (req_count+1)) < int32 max
+static inline DATA_T *resample_linear_float_multi(Voice *vp, DATA_T *dest, int32 req_count, int32 *out_count)
+{
+       int32 i;
+       resample_rec_t *resrc = &vp->resrc;
+       splen_t prec_offset = resrc->offset & INTEGER_MASK;
+       float *src = (float *)vp->sample->data + (prec_offset >> FRACTION_BITS);
+       const int32 start_offset = (int32)(resrc->offset - prec_offset); // offset\8cv\8eZ\82ðint32\92l\88æ\82É\82·\82é(SIMD\97p
+       int32 ofs = (int32)(resrc->offset & FRACTION_MASK);
+       const int32 inc = resrc->increment;
 
-       for (i = 0; i < count2; i++) {
-               dest[i] = src[i + ofs];
+       for(i = 0; i < req_count; i++) {        
+               int32 ofsi = ofs >> FRACTION_BITS;
+               int32 ofsf = ofs & FRACTION_MASK;               
+#if defined(DATA_T_DOUBLE) || defined(DATA_T_FLOAT)
+               FLOAT_T v1 = src[ofsi], v2 = src[ofsi + 1], fp = (ofsf & FRACTION_MASK);
+               *dest++ = (v1 + (v2 - v1) * fp * div_fraction); // FLOAT_T
+#else
+               int32 v1 = (int32)(src[ofsi] * M_16BIT), v2 = (int32)(src[ofsi + 1] * M_16BIT);
+               *dest++ = v1 + imuldiv_fraction(v2 - v1, ofsf);
+#endif
+               ofs += inc;
        }
-       for (; i < count; i++) {
-               vp->finish_voice = 1;
-               dest[i] = 0;
-       }       
-       ofs += count2;
-       vp->resrc.offset = ofs << FRACTION_BITS;
+       resrc->offset = prec_offset + (splen_t)ofs;
+       *out_count = i;
+    return dest;
 }
-///r
-static void rs_plain(int v, DATA_T *ptr, int32 count)
-{
-  /* Play sample until end, then free the voice. */
-  Voice *vp = &voice[v];
-  DATA_T *dest = ptr;
-       sample_t *src = vp->sample->data;
-       int data_type = vp->sample->data_type;
-  splen_t
-    ofs = vp->resrc.offset,
-    ls = 0,
-    le = vp->sample->data_length;
-  int32 incr = vp->resrc.increment;
-#ifdef PRECALC_LOOPS
-  int32 i = 0, j;
 #endif
 
-       if(vp->cache && incr == (1 << FRACTION_BITS)){
-               rs_plain_c(v, ptr, count);
-               return;
-       }       
+static void lo_rs_plain_float(Voice *vp, DATA_T *dest, int32 count)
+{
+       /* Play sample until end, then free the voice. */
+       resample_rec_t *resrc = &vp->resrc;
+       int32 i = 0, j;
 
-#ifdef PRECALC_LOOPS
-       if (incr < 0) incr = -incr; /* In case we're coming out of a bidir loop */
-  /* Precalc how many times we should go through the loop.
-     NOTE: Assumes that incr > 0 and that ofs <= le */
-       j = PRECALC_LOOP_COUNT(ofs, le, incr);
-       if (j > count) {j = count;}
+       if (resrc->increment < 0) resrc->increment = -resrc->increment; /* In case we're coming out of a bidir loop */
+       j = PRECALC_LOOP_COUNT(resrc->offset, resrc->data_length, resrc->increment) + 2; // safe end+128 sample
+       if (j > count) {j = count;}
        else if(j < 0) {j = 0;} 
-       for(i = 0; i < j; i++) {
-      RESAMPLATION;
-      ofs += incr;
-    }
-       for (; i < count; i++) {
-               *dest++ = 0;
-               vp->finish_voice = 1;
-       }       
-#else /* PRECALC_LOOPS */
-       while (count--)
-       {
-               if (ofs >= le){
-                       *dest++ = 0;
-                       vp->finish_voice = 1;
-               }else {
-                       RESAMPLATION;
-                       ofs += incr;
-               }
+       dest = resample_linear_float_multi(vp, dest, j, &i);
+       for(; i < j; i++) {
+               *dest++ = resample_linear_float_single(vp);
+               resrc->offset += resrc->increment;
        }
-#endif /* PRECALC_LOOPS */
-
-  vp->resrc.offset = ofs; /* Update offset */
+       for(; i < count; i++) { *dest++ = 0; vp->finish_voice = 1;}
 }
-static void rs_loop_c(Voice *vp, DATA_T *ptr, int32 count)
-{
-  splen_t
-               ofs = vp->resrc.offset >> FRACTION_BITS,
-               le = vp->sample->loop_end >> FRACTION_BITS,
-               ll = le - (vp->sample->loop_start >> FRACTION_BITS);
 
-       DATA_T *dest = ptr;
-       cache_t *src = (cache_t *)vp->sample->data;
-       int32 i, j;
+static void lo_rs_loop_float(Voice *vp, DATA_T *dest, int32 count)
+{
+       /* Play sample until end-of-loop, skip back and continue. */
+       resample_rec_t *resrc = &vp->resrc;
+       int32 i = 0, j;
+       
+       j = PRECALC_LOOP_COUNT(resrc->offset, resrc->loop_end, resrc->increment) - 2; // 2point interpolation
+       if (j > count) {j = count;}
+       else if(j < 0) {j = 0;}
+       dest = resample_linear_float_multi(vp, dest, j, &i);
+       for(; i < count; i++) {
+               *dest++ = resample_linear_float_single(vp);
+               if((resrc->offset += resrc->increment) >= resrc->loop_end)
+                       resrc->offset -= resrc->loop_end - resrc->loop_start;
+               /* Hopefully the loop is longer than an increment. */
+       }
+}
 
-// ERROR loop_start = 4215529472 
-       if(ll < 0)
-       {       
-               vp->sample->loop_start = 0;
-               ll = le - (vp->sample->loop_start >> FRACTION_BITS);
-       }       
+static void lo_rs_bidir_float(Voice *vp, DATA_T *dest, int32 count)
+{
+       resample_rec_t *resrc = &vp->resrc;
+       int32 i = 0, j = 0;     
 
-       while(count){
-               while(ofs >= le)
-                       ofs -= ll;
+       if (resrc->increment > 0){
+               j = PRECALC_LOOP_COUNT(resrc->offset, resrc->loop_end, resrc->increment) - 2; // 2point interpolation
+               if (j > count) {j = count;}
+               else if(j < 0) {j = 0;}
+               dest = resample_linear_float_multi(vp, dest, j, &i);
+       }
+       for(; i < count; i++) {
+               *dest++ = resample_linear_float_single(vp);
+               resrc->offset += resrc->increment;
+               if(resrc->increment > 0){
+                       if(resrc->offset >= resrc->loop_end){
+                               resrc->offset = (resrc->loop_end << 1) - resrc->offset;
+                               resrc->increment = -resrc->increment;
+                       }
+               }else{
+                       if(resrc->offset <= resrc->loop_start){
+                               resrc->offset = (resrc->loop_start << 1) - resrc->offset;
+                               resrc->increment = -resrc->increment;
+                       }
+               }
+       }
+}
+
+static inline void resample_voice_linear_float_optimize(Voice *vp, DATA_T *ptr, int32 count)
+{
+    int mode = vp->sample->modes;
+       
+       if(vp->resrc.plain_flag){ /* no loop */ /* else then loop */ 
+               lo_rs_plain_float(vp, ptr, count);      /* no loop */
+       }else if(!(mode & MODES_ENVELOPE) && (vp->status & (VOICE_OFF | VOICE_DIE))){ /* no env */
+               vp->resrc.plain_flag = 1; /* lock no loop */
+               lo_rs_plain_float(vp, ptr, count);      /* no loop */
+       }else if(mode & MODES_RELEASE && (vp->status & VOICE_OFF)){ /* release sample */
+               vp->resrc.plain_flag = 1; /* lock no loop */
+               lo_rs_plain_float(vp, ptr, count);      /* no loop */
+       }else if(mode & MODES_PINGPONG){ /* Bidirectional */
+               lo_rs_bidir_float(vp, ptr, count);      /* Bidirectional loop */
+       }else {
+               lo_rs_loop_float(vp, ptr, count);       /* loop */
+       }               
+}
+#endif /* optimize linear float resample */
+
+/*************** optimize lagrange resample ***********************/
+#if defined(PRECALC_LOOPS)
+#define LAO_OPTIMIZE_INCREMENT
+
+#if 0 // timidity41-eddb86e
+#if USE_X86_EXT_INTRIN >= 8
+
+// caller must check offsets to ensure lagrange interpolation is applicable
+// TODO: use newton interpolation
+static DATA_T *resample_multi_lagrange_m256(Voice *vp, DATA_T *dest, int32 *i, int32 count)
+{
+       resample_rec_t *resrc = &vp->resrc;
+       spos_t ofsls = resrc->loop_start >> FRACTION_BITS;
+       spos_t ofsle = resrc->loop_end >> FRACTION_BITS;
+       spos_t ofsend = resrc->data_length >> FRACTION_BITS;
+
+       splen_t prec_offset = (resrc->offset & INTEGER_MASK) - (1 << FRACTION_BITS);
+       sample_t *src = vp->sample->data + (prec_offset >> FRACTION_BITS);
+       int32 start_offset = (int32)(resrc->offset - prec_offset); // (offset\8cv\8eZ\82ðint32\92l\88æ\82É\82·\82é(SIMD\97p
+
+       __m256i vindices = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
+       __m256i vofs = _mm256_add_epi32(_mm256_set1_epi32(start_offset), _mm256_mullo_epi32(vindices, _mm256_set1_epi32(resrc->increment)));
+       __m256i vofsi = _mm256_srai_epi32(vofs, FRACTION_BITS);
+
+       // src[ofsi-1], src[ofsi]
+       __m256i vinm10 = MM256_I32GATHER_I32((const int *)src, _mm256_sub_epi32(vofsi, _mm256_set1_epi32(1)), 2);
+       // src[ofsi+1], src[ofsi+2]
+       __m256i vin12 = MM256_I32GATHER_I32((const int *)src, _mm256_add_epi32(vofsi, _mm256_set1_epi32(1)), 2);
+
+       // (int32)src[ofsi-1]
+       __m256i vinm1 = _mm256_srai_epi32(_mm256_slli_epi32(vinm10, 16), 16);
+       // (int32)src[ofsi]
+       __m256i vin0 = _mm256_srai_epi32(vinm10, 16);
+       // (int32)src[ofsi+1]
+       __m256i vin1 = _mm256_srai_epi32(_mm256_slli_epi32(vin12, 16), 16);
+       // (int32)src[ofsi+2]
+       __m256i vin2 = _mm256_srai_epi32(vin12, 16);
+
+       __m256 vec_divf = _mm256_set1_ps(div_fraction);
+
+       // (float)(ofs - ofsi)
+       __m256 vfofsf = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256(vofs, _mm256_set1_epi32(FRACTION_MASK))), vec_divf);
+
+       // (float)(int32)src[ofsi-1]
+       __m256 vfinm1 = _mm256_cvtepi32_ps(vinm1);
+       // (float)(int32)src[ofsi]
+       __m256 vfin0 = _mm256_cvtepi32_ps(vin0);
+       // (float)(int32)src[ofsi+1]
+       __m256 vfin1 = _mm256_cvtepi32_ps(vin1);
+       // (float)(int32)src[ofsi+2]
+       __m256 vfin2 = _mm256_cvtepi32_ps(vin2);
+
+       __m256 v1 = _mm256_set1_ps(1.0f);
+
+       // x - x1
+       __m256 vfofsfm1 = _mm256_add_ps(vfofsf, v1);
+       // x - x2
+       // __m256 vfofsf0 = vfofsf;
+
+       // x - x3
+       __m256 vfofsf1 = _mm256_sub_ps(vfofsf, v1);
+       // x - x4
+       __m256 vfofsf2 = _mm256_sub_ps(vfofsf1, v1);
+
+       //   (x - x2)(x - x3)(x - x4) / (x1 - x2)(x1 - x3)(x1 - x4)
+       // = (x - x2)(x - x3)(x - x4) * (-1/6)
+       __m256 vfcoefm1 = _mm256_mul_ps(_mm256_mul_ps(vfofsf, vfofsf1), _mm256_mul_ps(vfofsf2, _mm256_set1_ps(-1.0f / 6.0f)));
+
+       //   (x - x1)(x - x3)(x - x4) / (x2 - x1)(x2 - x3)(x2 - x4)
+       // = (x - x1)(x - x3)(x - x4) * (1/2)
+       __m256 vfcoef0 = _mm256_mul_ps(_mm256_mul_ps(vfofsfm1, vfofsf1), _mm256_mul_ps(vfofsf2, _mm256_set1_ps(1.0f / 2.0f)));
+
+       //   (x - x1)(x - x2)(x - x4) / (x3 - x1)(x3 - x2)(x3 - x4)
+       // = (x - x1)(x - x2)(x - x4) * (-1/2)
+       __m256 vfcoef1 = _mm256_mul_ps(_mm256_mul_ps(vfofsfm1, vfofsf), _mm256_mul_ps(vfofsf2, _mm256_set1_ps(-1.0f / 2.0f)));
+
+       //   (x - x1)(x - x2)(x - x3) / (x4 - x1)(x4 - x2)(x4 - x3)
+       // = (x - x1)(x - x2)(x - x3) * (1/6)
+       __m256 vfcoef2 = _mm256_mul_ps(_mm256_mul_ps(vfofsfm1, vfofsf), _mm256_mul_ps(vfofsf1, _mm256_set1_ps(1.0f / 6.0f)));
+
+#if USE_X86_EXT_INTRIN >= 9
+       __m256 vresult = _mm256_add_ps(
+               _mm256_fmadd_ps(vfinm1, vfcoefm1, _mm256_mul_ps(vfin0, vfcoef0)),
+               _mm256_fmadd_ps(vfin1, vfcoef1, _mm256_mul_ps(vfin2, vfcoef2))
+       );
+#else
+       __m256 vresult = _mm256_add_ps(
+               _mm256_add_ps(_mm256_mul_ps(vfinm1, vfcoefm1), _mm256_mul_ps(vfin0, vfcoef0)),
+               _mm256_add_ps(_mm256_mul_ps(vfin1, vfcoef1), _mm256_mul_ps(vfin2, vfcoef2))
+       );
+#endif
+
+#if defined(DATA_T_DOUBLE)
+       vresult = _mm256_mul_ps(vresult, _mm256_set1_ps(OUT_INT16));
+       _mm256_storeu_pd(dest, _mm256_cvtps_pd(_mm256_extractf128_ps(vresult, 0)));
+       _mm256_storeu_pd(dest + 4, _mm256_cvtps_pd(_mm256_extractf128_ps(vresult, 1)));
+#elif defined(DATA_T_FLOAT)
+       vresult = _mm256_mul_ps(vresult, _mm256_set1_ps(OUT_INT16));
+       _mm256_storeu_ps(dest, vresult);
+#else
+       _mm256_storeu_si256(dest, _mm256_cvtps_epi32(vresult));
+#endif
+
+       dest += 8;
+       resrc->offset += resrc->increment * 8;
+       *i += 8;
+       return dest;
+}
+
+#endif
+
+#if USE_X86_EXT_INTRIN >= 6
+
+// caller must check offsets to ensure lagrange interpolation is applicable
+// TODO: use newton interpolation
+static DATA_T *resample_multi_lagrange_m128(Voice *vp, DATA_T *dest, int32 *i, int32 count)
+{
+       resample_rec_t *resrc = &vp->resrc;
+       spos_t ofsls = resrc->loop_start >> FRACTION_BITS;
+       spos_t ofsle = resrc->loop_end >> FRACTION_BITS;
+       spos_t ofsend = resrc->data_length >> FRACTION_BITS;
+
+       splen_t prec_offset = (resrc->offset & INTEGER_MASK) - (1 << FRACTION_BITS);
+       sample_t *src = vp->sample->data + (prec_offset >> FRACTION_BITS);
+       int32 start_offset = (int32)(resrc->offset - prec_offset); // (offset\8cv\8eZ\82ðint32\92l\88æ\82É\82·\82é(SIMD\97p
+
+       __m128i vindices = _mm_set_epi32(3, 2, 1, 0);
+       __m128i vofs = _mm_add_epi32(_mm_set1_epi32(start_offset), _mm_mullo_epi32(vindices, _mm_set1_epi32(resrc->increment)));
+       __m128i vofsi = _mm_srai_epi32(vofs, FRACTION_BITS);
+
+       // src[ofsi-1], src[ofsi]
+       __m128i vinm10 = MM_I32GATHER_I32((const int *)src, _mm_sub_epi32(vofsi, _mm_set1_epi32(1)), 2);
+       // src[ofsi+1], src[ofsi+2]
+       __m128i vin12 = MM_I32GATHER_I32((const int *)src, _mm_add_epi32(vofsi, _mm_set1_epi32(1)), 2);
+
+       // (int32)src[ofsi-1]
+       __m128i vinm1 = _mm_srai_epi32(_mm_slli_epi32(vinm10, 16), 16);
+       // (int32)src[ofsi]
+       __m128i vin0 = _mm_srai_epi32(vinm10, 16);
+       // (int32)src[ofsi+1]
+       __m128i vin1 = _mm_srai_epi32(_mm_slli_epi32(vin12, 16), 16);
+       // (int32)src[ofsi+2]
+       __m128i vin2 = _mm_srai_epi32(vin12, 16);
+
+       __m128 vec_divf = _mm_set1_ps(div_fraction);
+
+       // (float)(ofs - ofsi)
+       __m128 vfofsf = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128(vofs, _mm_set1_epi32(FRACTION_MASK))), vec_divf);
+
+       // (float)(int32)src[ofsi-1]
+       __m128 vfinm1 = _mm_cvtepi32_ps(vinm1);
+       // (float)(int32)src[ofsi]
+       __m128 vfin0 = _mm_cvtepi32_ps(vin0);
+       // (float)(int32)src[ofsi+1]
+       __m128 vfin1 = _mm_cvtepi32_ps(vin1);
+       // (float)(int32)src[ofsi+2]
+       __m128 vfin2 = _mm_cvtepi32_ps(vin2);
+
+       __m128 v1 = _mm_set1_ps(1.0f);
+
+       // x - x1
+       __m128 vfofsfm1 = _mm_add_ps(vfofsf, v1);
+       // x - x2
+       // __m128 vfofsf0 = vfofsf;
+
+       // x - x3
+       __m128 vfofsf1 = _mm_sub_ps(vfofsf, v1);
+       // x - x4
+       __m128 vfofsf2 = _mm_sub_ps(vfofsf1, v1);
+
+       //   (x - x2)(x - x3)(x - x4) / (x1 - x2)(x1 - x3)(x1 - x4)
+       // = (x - x2)(x - x3)(x - x4) * (-1/6)
+       __m128 vfcoefm1 = _mm_mul_ps(_mm_mul_ps(vfofsf, vfofsf1), _mm_mul_ps(vfofsf2, _mm_set1_ps(-1.0f / 6.0f)));
+
+       //   (x - x1)(x - x3)(x - x4) / (x2 - x1)(x2 - x3)(x2 - x4)
+       // = (x - x1)(x - x3)(x - x4) * (1/2)
+       __m128 vfcoef0 = _mm_mul_ps(_mm_mul_ps(vfofsfm1, vfofsf1), _mm_mul_ps(vfofsf2, _mm_set1_ps(1.0f / 2.0f)));
+
+       //   (x - x1)(x - x2)(x - x4) / (x3 - x1)(x3 - x2)(x3 - x4)
+       // = (x - x1)(x - x2)(x - x4) * (-1/2)
+       __m128 vfcoef1 = _mm_mul_ps(_mm_mul_ps(vfofsfm1, vfofsf), _mm_mul_ps(vfofsf2, _mm_set1_ps(-1.0f / 2.0f)));
+
+       //   (x - x1)(x - x2)(x - x3) / (x4 - x1)(x4 - x2)(x4 - x3)
+       // = (x - x1)(x - x2)(x - x3) * (1/6)
+       __m128 vfcoef2 = _mm_mul_ps(_mm_mul_ps(vfofsfm1, vfofsf), _mm_mul_ps(vfofsf1, _mm_set1_ps(1.0f / 6.0f)));
+
+#if USE_X86_EXT_INTRIN >= 9
+       __m128 vresult = _mm_add_ps(
+               _mm_fmadd_ps(vfinm1, vfcoefm1, _mm_mul_ps(vfin0, vfcoef0)),
+               _mm_fmadd_ps(vfin1, vfcoef1, _mm_mul_ps(vfin2, vfcoef2))
+       );
+#else
+       __m128 vresult = _mm_add_ps(
+               _mm_add_ps(_mm_mul_ps(vfinm1, vfcoefm1), _mm_mul_ps(vfin0, vfcoef0)),
+               _mm_add_ps(_mm_mul_ps(vfin1, vfcoef1), _mm_mul_ps(vfin2, vfcoef2))
+       );
+#endif
+
+#if defined(DATA_T_DOUBLE)
+       vresult = _mm_mul_ps(vresult, _mm_set1_ps(OUT_INT16));
+       _mm_storeu_pd(dest, _mm_cvtps_pd(vresult));
+       _mm_storeu_pd(dest + 2, _mm_cvtps_pd(_mm_movehl_ps(vresult, vresult)));
+#elif defined(DATA_T_FLOAT)
+       vresult = _mm_mul_ps(vresult, _mm_set1_ps(OUT_INT16));
+       _mm_storeu_ps(dest, vresult);
+#else
+       _mm_storeu_si128(dest, _mm_cvtps_epi32(vresult));
+#endif
+
+       dest += 4;
+       resrc->offset += resrc->increment * 4;
+       *i += 4;
+       return dest;
+}
+
+#endif
+
+static void resample_lagrange_multi2(Voice *vp, DATA_T *dest, int32 count)
+{
+       const sample_t *src = vp->sample->data;
+       resample_rec_t *resrc = &vp->resrc;
+       spos_t ofsls = resrc->loop_start >> FRACTION_BITS;
+       spos_t ofsle = resrc->loop_end >> FRACTION_BITS;
+       spos_t ofsend = resrc->data_length >> FRACTION_BITS;
+       int32 i = 0;
+
+       if (resrc->mode == RESAMPLE_MODE_PLAIN) {
+               if (resrc->increment < 0) {
+                       resrc->increment = -resrc->increment;
+               }
+
+               // interpolate [0, 1] linearly
+               while (i < count && (resrc->offset >> FRACTION_BITS) < 1) {
+                       *dest++ = resample_linear(src, resrc->offset, resrc);
+                       resrc->offset += resrc->increment;
+                       i++;
+               }
+
+               // lagrange interpolation
+#if USE_X86_EXT_INTRIN >= 8
+               while (count - i >= 8) {
+                       // !(ofsi + 2 < ofsend)
+                       if (((resrc->offset + resrc->increment * 7) >> FRACTION_BITS) + 2 >= ofsend) {
+                               break;
+                       }
+
+                       dest = resample_multi_lagrange_m256(vp, dest, &i, count);
+               }
+#endif
+
+#if USE_X86_EXT_INTRIN >= 6
+               while (count - i >= 4) {
+                       // !(ofsi + 2 < ofsend)
+                       if (((resrc->offset + resrc->increment * 3) >> FRACTION_BITS) + 2 >= ofsend) {
+                               break;
+                       }
+
+                       dest = resample_multi_lagrange_m128(vp, dest, &i, count);
+               }
+#endif
+
+               while (i < count && (resrc->offset >> FRACTION_BITS) + 2 < ofsend) {
+                       *dest++ = resample_lagrange(src, resrc->offset, resrc);
+                       resrc->offset += resrc->increment;
+                       i++;
+               }
+
+               // interpolate [ofsend - 2, ofsend - 1] linearly
+               while (i < count && (resrc->offset >> FRACTION_BITS) < 1) {
+                       *dest++ = resample_linear(src, resrc->offset, resrc);
+                       resrc->offset += resrc->increment;
+                       i++;
+               }
+
+               if (i < count) {
+                       memset(dest, 0, (count - i) * sizeof(DATA_T));
+                       resrc->offset += resrc->increment * (count - i);
+                       vp->finish_voice = 1;
+               }
+       } else {
+               while (i < count) {
+                       // interpolate [0, 1] linearly
+                       while (i < count && (resrc->offset >> FRACTION_BITS) < 1) {
+                               *dest++ = resample_linear(src, resrc->offset, resrc);
+                               resrc->offset += resrc->increment;
+                               i++;
+                       }
+
+#if USE_X86_EXT_INTRIN >= 8
+                       while (count - i >= 8) {
+                               spos_t ofs0i = resrc->offset >> FRACTION_BITS;
+                               spos_t ofs7i = (resrc->offset + resrc->increment * 7) >> FRACTION_BITS;
+
+                               if (resrc->increment > 0 ? ofsle <= ofs7i + 2 : ofs7i - 1 < ofsls || ofsle <= ofs0i + 2) {
+                                       break;
+                               }
+
+                               dest = resample_multi_lagrange_m256(vp, dest, &i, count);
+                       }
+#endif
+
+#if USE_X86_EXT_INTRIN >= 6
+                       while (count - i >= 4) {
+                               spos_t ofs0i = resrc->offset >> FRACTION_BITS;
+                               spos_t ofs3i = (resrc->offset + resrc->increment * 3) >> FRACTION_BITS;
+
+                               if (resrc->increment > 0 ? ofsle <= ofs3i + 2 : ofs3i - 1 < ofsls || ofsle <= ofs0i + 2) {
+                                       break;
+                               }
+
+                               dest = resample_multi_lagrange_m128(vp, dest, &i, count);
+                       }
+#endif
+
+                       while (i < count) {
+                               spos_t ofsi = resrc->offset >> FRACTION_BITS;
+
+                               if (resrc->increment > 0 ? ofsle <= ofsi + 2 : ofsi - 1 < ofsls || ofsle <= ofsi + 2) {
+                                       break;
+                               }
+
+                               *dest++ = resample_lagrange(src, resrc->offset, resrc);
+                               resrc->offset += resrc->increment;
+                               i++;
+                       }
+
+                       while (i < count) {
+                               spos_t ofsi = resrc->offset >> FRACTION_BITS;
+
+                               if (resrc->increment > 0 ? ofsi + 2 < ofsle : ofsls <= ofsi - 1 && ofsi + 2 < ofsle) {
+                                       break;
+                               }
+
+                               *dest++ = resample_lagrange(src, resrc->offset, resrc);
+                               resrc->offset += resrc->increment;
+                               i++;
+
+                               if (resrc->loop_end < resrc->offset) {
+                                       if (resrc->mode == RESAMPLE_MODE_LOOP) {
+                                               resrc->offset -= resrc->loop_end - resrc->loop_start;
+                                       } else if (resrc->mode == RESAMPLE_MODE_BIDIR_LOOP && resrc->increment > 0) {
+                                               resrc->increment = -resrc->increment;
+                                       }
+                               } else if (resrc->mode == RESAMPLE_MODE_BIDIR_LOOP && resrc->increment < 0 && resrc->offset < resrc->loop_start) {
+                                       resrc->increment = -resrc->increment;
+                               }
+                       }
+               }
+       }
+}
+#endif // timidity41-eddb86e
+
+static inline DATA_T resample_lagrange_single(Voice *vp)
+{              
+       sample_t *src = vp->sample->data;
+       const resample_rec_t *resrc = &vp->resrc;
+    fract_t ofsf = resrc->offset & FRACTION_MASK;
+    const spos_t ofsls = resrc->loop_start >> FRACTION_BITS;
+    const spos_t ofsle = resrc->loop_end >> FRACTION_BITS;
+       const spos_t ofsi = resrc->offset >> FRACTION_BITS;
+    spos_t ofstmp, len;
+#if defined(DATA_T_DOUBLE) || defined(DATA_T_FLOAT)
+    FLOAT_T v[4], tmp;
+#else // DATA_T_IN32
+       int32 v[4], tmp;
+#endif
+       int32 i, dir;
+
+       switch(resrc->mode){
+       case RESAMPLE_MODE_PLAIN:
+               if(ofsi < 1)
+                       goto do_linear;
+               break; // normal
+       case RESAMPLE_MODE_LOOP:
+               if(ofsi < ofsls){
+                       if(ofsi < 1)
+                               goto do_linear;
+                       if((ofsi + 2) < ofsle)
+                               break; // normal
+               }else if(((ofsi + 2) < ofsle) && ((ofsi - 1) >= ofsls))
+                       break; // normal                
+               len = ofsle - ofsls; // loop_length
+               ofstmp = ofsi - 1;
+               if(ofstmp < ofsls) {ofstmp += len;} // if loop_length == data_length need                       
+               for(i = 0; i < 4; i++){
+                       v[i] = src[ofstmp];                     
+                       if((++ofstmp) > ofsle) {ofstmp -= len;} // -= loop_length , jump loop_start
+               }
+               goto loop_ofs;
+               break;
+       case RESAMPLE_MODE_BIDIR_LOOP:                  
+               if(resrc->increment >= 0){ // normal dir
+                       if(ofsi < ofsls){
+                               if(ofsi < 1)
+                                       goto do_linear;
+                               if((ofsi + 2) < ofsle)
+                                       break; // normal
+                       }else if(((ofsi + 2) < ofsle) && ((ofsi - 1) >= ofsls))
+                               break; // normal
+                       dir = 1;
+                       ofstmp = ofsi - 1;
+                       if(ofstmp < ofsls){ // if loop_length == data_length need                               
+                               ofstmp = (ofsls << 1) - ofstmp;
+                               dir = -1;
+                       }                       
+               }else{ // reverse dir
+                       dir = -1;
+                       ofstmp = ofsi + 1;
+                       if(ofstmp > ofsle){ // if loop_length == data_length need                               
+                               ofstmp = (ofsle << 1) - ofstmp;
+                               dir = 1;
+                       }
+                       ofsf = mlt_fraction - ofsf;
+               }
+               for(i = 0; i < 4; i++){
+                       v[i] = src[ofstmp];                     
+                       ofstmp += dir;
+                       if(dir < 0){ // -
+                               if(ofstmp <= ofsls) {dir = 1;}
+                       }else{ // +
+                               if(ofstmp >= ofsle) {dir = -1;}
+                       }
+               }
+               goto loop_ofs;
+               break;
+       }
+normal_ofs:
+       v[0] = src[ofsi - 1];
+    v[1] = src[ofsi];
+    v[2] = src[ofsi + 1];      
+       v[3] = src[ofsi + 2];
+#if defined(DATA_T_DOUBLE) || defined(DATA_T_FLOAT)
+loop_ofs:
+       ofsf += mlt_fraction;
+       tmp = v[1] - v[0];
+       v[3] += -3 * v[2] + 3 * v[1] - v[0];
+       v[3] *= (FLOAT_T)(ofsf - ml2_fraction) * DIV_6 * div_fraction;
+       v[3] += v[2] - v[1] - tmp;
+       v[3] *= (FLOAT_T)(ofsf - mlt_fraction) * DIV_2 * div_fraction;
+       v[3] += tmp;
+       v[3] *= (FLOAT_T)ofsf * div_fraction;
+       v[3] += v[0];
+       return v[3] * OUT_INT16;
+do_linear:
+    v[1] = src[ofsi];
+       v[2] = (int32)(src[ofsi + 1]) - (int32)(src[ofsi]);
+    return (v[1] + v[2] * (FLOAT_T)ofsf * div_fraction) * OUT_INT16; // FLOAT_T
+#else // DATA_T_IN32
+loop_ofs:
+       ofsf += mlt_fraction;
+       tmp = v[1] - v[0];
+       v[3] += -3*v[2] + 3*v[1] - v[0];
+       v[3] = imuldiv_fraction(v[3], (ofsf - ml2_fraction) / 6);
+       v[3] += v[2] - v[1] - tmp;
+       v[3] = imuldiv_fraction(v[3], (ofsf - mlt_fraction) >> 1);
+       v[3] += tmp;
+       v[3] = imuldiv_fraction(v[3], ofsf);
+       v[3] += v[0];
+       return v[3];
+do_linear:
+    v[1] = src[ofsi];
+       v[2] = src[ofsi + 1];
+       return v[1] + imuldiv_fraction(v[2] - v[1], ofsf);
+#endif
+}
+
+#if 0 //(USE_X86_EXT_INTRIN >= 9) // \96¢\83e\83X\83\93®\82­\82©\82Í\95s\96¾ broadcast\82Íset1\82¾\82Á\82½\82©\82à\81E\81
+// offset:int32*8, resamp:float*8
+// \83\8b\81[\83v\93à\95\94\82Ìoffset\8cv\8eZ\82ðint32\92l\88æ\82É\82·\82é , (sample_increment * (req_count+1)) < int32 max
+static inline DATA_T *resample_lagrange_multi(Voice *vp, DATA_T *dest, int32 req_count, int32 *out_count)
+{
+       resample_rec_t *resrc = &vp->resrc;
+       int32 i = 0;
+       const int32 req_count_mask = ~(0x7);
+       const int32 count = req_count & req_count_mask;
+       splen_t prec_offset = resrc->offset & INTEGER_MASK;
+       sample_t *src = vp->sample->data + (prec_offset >> FRACTION_BITS);
+       const int32 start_offset = (int32)(resrc->offset - prec_offset); // offset\8cv\8eZ\82ðint32\92l\88æ\82É\82·\82é(SIMD\97p
+       const int32 inc = resrc->increment;
+       const __m256i vinc = _mm256_broadcastd_epi32(inc * 8), vfmask = _mm256_broadcastd_epi32((int32)FRACTION_MASK);
+       __m256i vofs = _mm256_add_epi32(_mm256_broadcastd_epi32(start_offset), _mm256_set_epi32(inc*7,inc*6,inc*5,inc*4,inc*3,inc*2,inc,0));
+       const __m256 vdivf = _mm256_broadcastd_ps(div_fraction);        
+       const __m256 vfrac_6 = _mm256_broadcastd_ps(div_fraction * DIV_6);
+       const __m256 vfrac_2 = _mm256_broadcastd_ps(div_fraction * DIV_2);
+       const __m256 v3n = _mm256_broadcastd_ps(-3);
+       const __m256 v3p = _mm256_broadcastd_ps(3);
+       const __m256i vfrac = _mm256_broadcastd_epi32(mlt_fraction);
+       const __m256i vfrac2 = _mm256_broadcastd_epi32(ml2_fraction);
+       const __m256 vec_divo = _mm256_broadcastd_ps(DIV_15BIT);
+#ifdef LAO_OPTIMIZE_INCREMENT
+       // \8dÅ\93K\89»\83\8c\81[\83g = (\83\8d\81[\83h\83f\81[\83^\90\94 - \8f\89\8aú\83I\83t\83Z\83b\83g\8f¬\90\94\95\94\82Ì\8dÅ\91å\92l(1\96¢\96\9e) - \95â\8aÔ\83|\83C\83\93\83g\90\94(lagrange\82Í3) ) / \83I\83t\83Z\83b\83g\83f\81[\83^\90\94
+       // \83\8d\81[\83h\83f\81[\83^\90\94\82Íint16\97ppermutevar\82ª\82È\82¢\82Ì\82Å\95Ï\8a·\8cã\82Ì32bit(int32/float)\82Ì8\83Z\83b\83g\82É\82È\82é
+       const int32 opt_inc1 = (1 << FRACTION_BITS) * (8 - 1 - 3) / 8; // (float*8) * 1\83Z\83b\83g
+       if(inc < opt_inc1){     // 1\83Z\83b\83g
+       const __m256i vvar1n = _mm256_broadcastd_epi32(-1);
+       const __m256i vvar1 = _mm256_broadcastd_epi32(1);
+       const __m256i vvar2 = _mm256_broadcastd_epi32(2);
+       for(i = 0; i < count; i += 8) {
+       __m256i vofsi2 = _mm256_srli_epi32(vofs, FRACTION_BITS); // ofsi
+       __m256i vofsi1 = _mm256_add_epi32(vofsi2, vvar1n); // ofsi-1
+       __m256i vofsi3 = _mm256_add_epi32(vofsi2, vvar1); // ofsi+1
+       __m256i vofsi4 = _mm256_add_epi32(vofsi2, vvar2); // ofsi+2
+       int32 ofs0 = _mm_cvtsi128_si32(_mm256_extracti128_si256(vofsi1, 0x0));
+       __m128i vin1 = _mm_loadu_si128((__m128i *)&src[ofs0]); // int16*8
+       __m256i vofsib = _mm256_permutevar8x32_epi32(vofsi1, _mm256_setzero_si256()); 
+       __m256i vofsub1 = _mm256_sub_epi32(vofsi1, vofsib); 
+       __m256i vofsub2 = _mm256_sub_epi32(vofsi2, vofsib);  
+       __m256i vofsub3 = _mm256_sub_epi32(vofsi3, vofsib); 
+       __m256i vofsub4 = _mm256_sub_epi32(vofsi4, vofsib);
+       __m256 vvf1 = _mm256_cvtepi32_ps(_mm256_cvtepi16_epi32(vin1)); // int16 to float (i16*8->i32*8->f32*8
+       __m256 vv1 = _mm256_permutevar8x32_ps(vvf1, vofsub1); // v1 ofsi-1
+       __m256 vv2 = _mm256_permutevar8x32_ps(vvf1, vofsub2); // v2 ofsi
+       __m256 vv3 = _mm256_permutevar8x32_ps(vvf1, vofsub3); // v2 ofsi+1
+       __m256 vv4 = _mm256_permutevar8x32_ps(vvf1, vofsub4); // v2 ofsi+2
+       // \82 \82Æ\82Í\92Ê\8fí\82Æ\93¯\82
+       __m256i vofsf = _mm_add_epi32(_mm_and_si128(vofs, vfmask), vfrac); // ofsf = (ofs & FRACTION_MASK) + mlt_fraction;
+       __m256 vtmp = _mm256_sub_ps(vv1, vv0); // tmp = v[1] - v[0];
+       __m256 vtmp1, vtmp2, vtmp3, vtmp4;
+       vv3 = _mm256_add_ps(vv3, _mm256_sub_ps(MM256_FMA2_PS(vv2, v3n, vv1, v3p), vv0)); // v[3] += -3 * v[2] + 3 * v[1] - v[0];
+       vtmp1 = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_sub_epi32(vofsf, vfrac2)), vfrac_6); // tmp1 = (float)(ofsf - ml2_fraction) * DIV_6 * div_fraction;
+       vtmp2 = _mm256_sub_ps(_mm256_sub_ps(vv2, vv1), vtmp); // tmp2 = v[2] - v[1] - tmp);
+       vtmp3 = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_sub_epi32(vofsf, vfrac)), vfrac_2); // tmp3 = (FLOAT_T)(ofsf - mlt_fraction) * DIV_2 * div_fraction;
+       vtmp4 = _mm256_mul_ps(_mm256_cvtepi32_ps(vofsf), vdivf); // tmp4 = (FLOAT_T)ofsf * div_fraction;
+       vv3 = MM256_FMA_PS(vv3, vtmp1, vtmp2); // v[3] = v[3] * tmp1 + tmp2
+       vv3 = MM256_FMA_PS(vv3, vtmp3, vtmp); // v[3] = v[3] * tmp3 + tmp;
+       vv3 = MM256_FMA_PS(vv3, vtmp4, vv0); // v[3] = v[3] * tmp4 + vv0;
+#if defined(DATA_T_DOUBLE)
+       vv3 = _mm256_mul_ps(vv3, vec_divo);
+       _mm256_storeu_pd(dest, _mm256_cvtps_pd(_mm256_extractf128_ps(vv3, 0x0)));
+       dest += 4;
+       _mm256_storeu_pd(dest, _mm256_cvtps_pd(_mm256_extractf128_ps(vv3, 0x1)));
+       dest += 4;
+#elif defined(DATA_T_FLOAT) // DATA_T_FLOAT
+       _mm256_storeu_ps(dest, _mm256_mul_ps(vv3, vec_divo));
+       dest += 8;
+#else // DATA_T_IN32
+       _mm_storeu_si128((__m128i *)dest, _mm_cvtps_epi32(_mm256_extractf128_ps(vv3, 0x0)));
+       dest += 4;
+       _mm_storeu_si128((__m128i *)dest, _mm_cvtps_epi32(_mm256_extractf128_ps(vv3, 0x1)));
+       dest += 4;
+#endif
+       vofs = _mm256_add_epi32(vofs, vinc); // ofs += inc;
+       }
+       }else
+#endif // LAO_OPTIMIZE_INCREMENT
+       for(; i < count; i += 8) {
+       __m256i vofsi = _mm256_srli_epi32(vofs, FRACTION_BITS); // ofsi = ofs >> FRACTION_BITS
+       __m128i vin1 = _mm_loadu_si128((__m128i *)&src[MM256_EXTRACT_I32(vofsi,0) - 1]); // ofsi-1~ofsi+2\82ð\83\8d\81[\83h
+       __m128i vin2 = _mm_loadu_si128((__m128i *)&src[MM256_EXTRACT_I32(vofsi,1) - 1]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
+       __m128i vin3 = _mm_loadu_si128((__m128i *)&src[MM256_EXTRACT_I32(vofsi,2) - 1]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
+       __m128i vin4 = _mm_loadu_si128((__m128i *)&src[MM256_EXTRACT_I32(vofsi,3) - 1]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
+       __m128i vin5 = _mm_loadu_si128((__m128i *)&src[MM256_EXTRACT_I32(vofsi,4) - 1]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
+       __m128i vin6 = _mm_loadu_si128((__m128i *)&src[MM256_EXTRACT_I32(vofsi,5) - 1]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
+       __m128i vin7 = _mm_loadu_si128((__m128i *)&src[MM256_EXTRACT_I32(vofsi,6) - 1]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
+       __m128i vin8 = _mm_loadu_si128((__m128i *)&src[MM256_EXTRACT_I32(vofsi,7) - 1]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
+       __m128i vin12 = _mm_unpacklo_epi16(vin1, vin2); // [v11v21v31v41],[v12v22v32v42] to [v11v12v21v22v31v32v41v42]
+       __m128i vin34 = _mm_unpacklo_epi16(vin3, vin4); // [v13v23v33v43],[v14v24v34v44] to [v13v14v23v24v33v34v43v44]
+       __m128i vin56 = _mm_unpacklo_epi16(vin5, vin6); // [v15v25v35v45],[v16v26v36v46] to [v15v16v25v26v35v36v45v46]
+       __m128i vin78 = _mm_unpacklo_epi16(vin7, vin8); // [v17v27v37v47],[v18v28v38v48] to [v17v18v27v28v37v38v47v48]
+       __m128i vin1121 = _mm_unpacklo_epi32(vin12, vin34); // [v11v12,v21v22],[v13v14,v23v24] to [v11v12v13v14,v21v22v23v24]
+       __m128i vin3141 = _mm_unpackhi_epi32(vin12, vin34); // [v31v32,v41v42],[v33v34v,43v44] to [v31v32v33v34,v41v42v43v44]
+       __m128i vin1525 = _mm_unpacklo_epi32(vin56, vin78); // [v15v16,v25v26],[v17v18,v27v28] to [v15v16v17v18,v25v26v27v28]
+       __m128i vin3545 = _mm_unpackhi_epi32(vin56, vin78); // [v35v36,v45v46],[v37v38v,47v48] to [v35v36v37v38,v45v46v47v48]
+       __m128i vi16_1 = _mm_unpacklo_epi64(vin1121, vin1525); // [v11v12v13v14,v21v22v23v24],[v15v16v17v18,v25v26v27v28] to [v11v12v13v14v15v16v17v18]
+       __m128i vi16_2 = _mm_unpackhi_epi64(vin1121, vin1525); // [v11v12v13v14,v21v22v23v24],[v15v16v17v18,v25v26v27v28] to [v21v22v23v24v25v26v27v28]
+       __m128i vi16_3 = _mm_unpacklo_epi64(vin3141, vin3545); // [v31v32v33v34,v41v42v43v44],[v35v36v37v38,v45v46v47v48] to [v31v32v33v34v35v36v37v38]
+       __m128i vi16_4 = _mm_unpackhi_epi64(vin3141, vin3545); // [v31v32v33v34,v41v42v43v44],[v35v36v37v38,v45v46v47v48] to [v41v42v43v44v45v46v47v48]
+       __m256 vv0 = _mm256_cvtepi32_ps(_mm256_cvtepi16_epi32(vi16_1)); // int16 to float (16bit*8 -> 32bit*8 > float*8
+       __m256 vv1 = _mm256_cvtepi32_ps(_mm256_cvtepi16_epi32(vi16_2)); // int16 to float (16bit*8 -> 32bit*8 > float*8
+       __m256 vv2 = _mm256_cvtepi32_ps(_mm256_cvtepi16_epi32(vi16_3)); // int16 to float (16bit*8 -> 32bit*8 > float*8
+       __m256 vv3 = _mm256_cvtepi32_ps(_mm256_cvtepi16_epi32(vi16_4)); // int16 to float (16bit*8 -> 32bit*8 > float*8
+       __m256i vofsf = _mm_add_epi32(_mm_and_si128(vofs, vfmask), vfrac); // ofsf = (ofs & FRACTION_MASK) + mlt_fraction;
+       __m256 vtmp = _mm256_sub_ps(vv1, vv0); // tmp = v[1] - v[0];
+       __m256 vtmp1, vtmp2, vtmp3, vtmp4;
+       vv3 = _mm256_add_ps(vv3, _mm256_sub_ps(MM256_FMA2_PS(vv2, v3n, vv1, v3p), vv0)); // v[3] += -3 * v[2] + 3 * v[1] - v[0];
+       vtmp1 = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_sub_epi32(vofsf, vfrac2)), vfrac_6); // tmp1 = (float)(ofsf - ml2_fraction) * DIV_6 * div_fraction;
+       vtmp2 = _mm256_sub_ps(_mm256_sub_ps(vv2, vv1), vtmp); // tmp2 = v[2] - v[1] - tmp);
+       vtmp3 = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_sub_epi32(vofsf, vfrac)), vfrac_2); // tmp3 = (FLOAT_T)(ofsf - mlt_fraction) * DIV_2 * div_fraction;
+       vtmp4 = _mm256_mul_ps(_mm256_cvtepi32_ps(vofsf), vdivf); // tmp4 = (FLOAT_T)ofsf * div_fraction;
+       vv3 = MM256_FMA_PS(vv3, vtmp1, vtmp2); // v[3] = v[3] * tmp1 + tmp2
+       vv3 = MM256_FMA_PS(vv3, vtmp3, vtmp); // v[3] = v[3] * tmp3 + tmp;
+       vv3 = MM256_FMA_PS(vv3, vtmp4, vv0); // v[3] = v[3] * tmp4 + vv0;
+#if defined(DATA_T_DOUBLE)
+       vv3 = _mm256_mul_ps(vv3, vec_divo);
+       _mm256_storeu_pd(dest, _mm256_cvtps_pd(_mm256_extractf128_ps(vv3, 0x0)));
+       dest += 4;
+       _mm256_storeu_pd(dest, _mm256_cvtps_pd(_mm256_extractf128_ps(vv3, 0x1)));
+       dest += 4;
+#elif defined(DATA_T_FLOAT) // DATA_T_FLOAT
+       _mm256_storeu_ps(dest, _mm256_mul_ps(vv3, vec_divo));
+       dest += 8;
+#else // DATA_T_IN32
+       _mm256_storeu_si256((__m256i *)dest, _mm256_cvtps_epi32(vv3));
+       dest += 8;
+#endif
+       vofs = _mm256_add_epi32(vofs, vinc); // ofs += inc;
+       }
+       resrc->offset = prec_offset + (splen_t)(MM256_EXTRACT_I32(vofs,0));
+       *out_count = i;
+    return dest;
+}
+
+#elif (USE_X86_EXT_INTRIN >= 3)
+// offset:int32*4*2, resamp:float*4*2 2set 15.51s (1set 16.08s
+// \83\8b\81[\83v\93à\95\94\82Ìoffset\8cv\8eZ\82ðint32\92l\88æ\82É\82·\82é , (sample_increment * (req_count+1)) < int32 max
+static inline DATA_T *resample_lagrange_multi(Voice *vp, DATA_T *dest, int32 req_count, int32 *out_count)
+{
+       resample_rec_t *resrc = &vp->resrc;
+       int32 i = 0;
+       const int32 req_count_mask = ~(0x7);
+       const int32 count = req_count & req_count_mask;
+       splen_t prec_offset = resrc->offset & INTEGER_MASK;
+       sample_t *src = vp->sample->data + (prec_offset >> FRACTION_BITS);
+       const int32 start_offset = (int32)(resrc->offset - prec_offset); // offset\8cv\8eZ\82ðint32\92l\88æ\82É\82·\82é(SIMD\97p
+       const int32 inc = resrc->increment;
+       const __m128i vinc = _mm_set1_epi32(inc * 8), vfmask = _mm_set1_epi32((int32)FRACTION_MASK);
+       __m128i vofs1 = _mm_add_epi32(_mm_set1_epi32(start_offset), _mm_set_epi32(inc * 3, inc * 2, inc, 0));
+       __m128i vofs2 = _mm_add_epi32(vofs1, _mm_set1_epi32(inc * 4));
+       const __m128 vdivf = _mm_set1_ps(div_fraction); 
+       const __m128 vfrac_6 = _mm_set1_ps(div_fraction * DIV_6);
+       const __m128 vfrac_2 = _mm_set1_ps(div_fraction * DIV_2);
+       const __m128 v3n = _mm_set1_ps(-3);
+       const __m128 v3p = _mm_set1_ps(3);
+       const __m128i vfrac = _mm_set1_epi32(mlt_fraction);
+       const __m128i vfrac2 = _mm_set1_epi32(ml2_fraction);
+       const __m128 vec_divo = _mm_set1_ps(DIV_15BIT);
+       for(; i < count; i += 8) {
+       __m128i vofsi1 = _mm_srli_epi32(vofs1, FRACTION_BITS); // ofsi = ofs >> FRACTION_BITS
+       __m128i vofsi2 = _mm_srli_epi32(vofs2, FRACTION_BITS); // ofsi = ofs >> FRACTION_BITS
+       __m128i vin1 = _mm_loadu_si128((__m128i *)&src[MM_EXTRACT_I32(vofsi1,0) - 1]); // ofsi-1~ofsi+2\82ð\83\8d\81[\83h
+       __m128i vin2 = _mm_loadu_si128((__m128i *)&src[MM_EXTRACT_I32(vofsi1,1) - 1]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
+       __m128i vin3 = _mm_loadu_si128((__m128i *)&src[MM_EXTRACT_I32(vofsi1,2) - 1]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
+       __m128i vin4 = _mm_loadu_si128((__m128i *)&src[MM_EXTRACT_I32(vofsi1,3) - 1]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
+       __m128i vin5 = _mm_loadu_si128((__m128i *)&src[MM_EXTRACT_I32(vofsi2,0) - 1]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
+       __m128i vin6 = _mm_loadu_si128((__m128i *)&src[MM_EXTRACT_I32(vofsi2,1) - 1]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
+       __m128i vin7 = _mm_loadu_si128((__m128i *)&src[MM_EXTRACT_I32(vofsi2,2) - 1]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
+       __m128i vin8 = _mm_loadu_si128((__m128i *)&src[MM_EXTRACT_I32(vofsi2,3) - 1]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82
+       __m128i vin12 = _mm_unpacklo_epi16(vin1, vin2); // [v11v21v31v41],[v12v22v32v42] to [v11v12v21v22v31v32v41v42]
+       __m128i vin34 = _mm_unpacklo_epi16(vin3, vin4); // [v13v23v33v43],[v14v24v34v44] to [v13v14v23v24v33v34v43v44]
+       __m128i vin56 = _mm_unpacklo_epi16(vin5, vin6); // [v15v25v35v45],[v16v26v36v46] to [v15v16v25v26v35v36v45v46]
+       __m128i vin78 = _mm_unpacklo_epi16(vin7, vin8); // [v17v27v37v47],[v18v28v38v48] to [v17v18v27v28v37v38v47v48]
+       __m128i vi16_1 = _mm_unpacklo_epi32(vin12, vin34); // [v11v12,v21v22],[v13v14,v23v24] to [v11v12v13v14,v21v22v23v24]
+       __m128i vi16_2 = _mm_unpackhi_epi32(vin12, vin34); // [v31v32,v41v42],[v33v34v,43v44] to [v31v32v33v34,v41v42v43v44]
+       __m128i vi16_3 = _mm_unpacklo_epi32(vin56, vin78); // [v15v16,v25v26],[v17v18,v27v28] to [v15v16v17v18,v25v26v27v28]
+       __m128i vi16_4 = _mm_unpackhi_epi32(vin56, vin78); // [v35v36,v45v46],[v37v38v,47v48] to [v35v36v37v38,v45v46v47v48]
+#if (USE_X86_EXT_INTRIN >= 6) // sse4.1 , _mm_ cvtepi16_epi32()
+       __m128i vi16_1_2 = _mm_shuffle_epi32(vi16_1, 0x4e); // ofsi+0\82ÍL64bit\82Ö
+       __m128i vi16_2_2 = _mm_shuffle_epi32(vi16_2, 0x4e); // ofsi+2\82ÍL64bit\82Ö
+       __m128i vi16_3_2 = _mm_shuffle_epi32(vi16_3, 0x4e); // ofsi+0\82ÍL64bit\82Ö
+       __m128i vi16_4_2 = _mm_shuffle_epi32(vi16_4, 0x4e); // ofsi+2\82ÍL64bit\82Ö
+       __m128 vv01 = _mm_cvtepi32_ps(_mm_cvtepi16_epi32(vi16_1)); // int16 to float
+       __m128 vv11 = _mm_cvtepi32_ps(_mm_cvtepi16_epi32(vi16_1_2)); // int16 to float
+       __m128 vv21 = _mm_cvtepi32_ps(_mm_cvtepi16_epi32(vi16_2)); // int16 to float
+       __m128 vv31 = _mm_cvtepi32_ps(_mm_cvtepi16_epi32(vi16_2_2)); // int16 to float
+       __m128 vv02 = _mm_cvtepi32_ps(_mm_cvtepi16_epi32(vi16_3)); // int16 to float
+       __m128 vv12 = _mm_cvtepi32_ps(_mm_cvtepi16_epi32(vi16_3_2)); // int16 to float
+       __m128 vv22 = _mm_cvtepi32_ps(_mm_cvtepi16_epi32(vi16_4)); // int16 to float
+       __m128 vv32 = _mm_cvtepi32_ps(_mm_cvtepi16_epi32(vi16_4_2)); // int16 to float
+#else
+       __m128i sign1 = _mm_cmpgt_epi16(_mm_setzero_si128(), vi16_1);
+       __m128i sign2 = _mm_cmpgt_epi16(_mm_setzero_si128(), vi16_2);
+       __m128i sign3 = _mm_cmpgt_epi16(_mm_setzero_si128(), vi16_3);
+       __m128i sign4 = _mm_cmpgt_epi16(_mm_setzero_si128(), vi16_4);
+       __m128 vv01 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(vi16_1, sign1)); // int16 to float
+       __m128 vv11 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(vi16_1, sign1)); // int16 to float
+       __m128 vv21 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(vi16_2, sign2)); // int16 to float
+       __m128 vv31 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(vi16_2, sign2)); // int16 to float
+       __m128 vv02 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(vi16_3, sign3)); // int16 to float
+       __m128 vv12 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(vi16_3, sign3)); // int16 to float
+       __m128 vv22 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(vi16_4, sign4)); // int16 to float
+       __m128 vv32 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(vi16_4, sign4)); // int16 to float
+#endif
+       __m128i vofsf1 = _mm_add_epi32(_mm_and_si128(vofs1, vfmask), vfrac); // ofsf = (ofs & FRACTION_MASK) + mlt_fraction;
+       __m128i vofsf2 = _mm_add_epi32(_mm_and_si128(vofs2, vfmask), vfrac); // ofsf = (ofs & FRACTION_MASK) + mlt_fraction;
+       __m128 vtmp1 = _mm_sub_ps(vv11, vv01); // tmp = v[1] - v[0];
+       __m128 vtmp2 = _mm_sub_ps(vv12, vv02); // tmp = v[1] - v[0];
+       __m128 vtmpx11, vtmpx12, vtmpx21, vtmpx22, vtmpx31, vtmpx32, vtmpx41, vtmpx42;
+       __m128 vtmpi1, vtmpi2;
+       vv31 = _mm_add_ps(vv31, _mm_sub_ps(MM_FMA2_PS(vv21, v3n, vv11, v3p), vv01)); // v[3] += -3 * v[2] + 3 * v[1] - v[0];
+       vv32 = _mm_add_ps(vv32, _mm_sub_ps(MM_FMA2_PS(vv22, v3n, vv12, v3p), vv02)); // v[3] += -3 * v[2] + 3 * v[1] - v[0];
+       vtmpi1 = _mm_cvtepi32_ps(_mm_sub_epi32(vofsf1, vfrac2));
+       vtmpi2 = _mm_cvtepi32_ps(_mm_sub_epi32(vofsf2, vfrac2));
+       vtmpx11 = _mm_mul_ps(vtmpi1, vfrac_6); // tmpx1 = (float)(ofsf - ml2_fraction) * DIV_6 * div_fraction;
+       vtmpx12 = _mm_mul_ps(vtmpi2, vfrac_6); // tmpx1 = (float)(ofsf - ml2_fraction) * DIV_6 * div_fraction;
+       vtmpx21 = _mm_sub_ps(_mm_sub_ps(vv21, vv11), vtmp1); // tmpx2 = v[2] - v[1] - tmp);
+       vtmpx22 = _mm_sub_ps(_mm_sub_ps(vv22, vv12), vtmp2); // tmpx2 = v[2] - v[1] - tmp);
+       vtmpi1 = _mm_cvtepi32_ps(_mm_sub_epi32(vofsf1, vfrac));
+       vtmpi2 = _mm_cvtepi32_ps(_mm_sub_epi32(vofsf2, vfrac));
+       vtmpx31 = _mm_mul_ps(vtmpi1, vfrac_2); // tmpx3 = (FLOAT_T)(ofsf - mlt_fraction) * DIV_2 * div_fraction;
+       vtmpx32 = _mm_mul_ps(vtmpi2, vfrac_2); // tmpx3 = (FLOAT_T)(ofsf - mlt_fraction) * DIV_2 * div_fraction;
+       vtmpi1 = _mm_cvtepi32_ps(vofsf1);
+       vtmpi2 = _mm_cvtepi32_ps(vofsf2);
+       vtmpx41 = _mm_mul_ps(vtmpi1, vdivf); // tmpx4 = (FLOAT_T)ofsf * div_fraction;
+       vtmpx42 = _mm_mul_ps(vtmpi2, vdivf); // tmpx4 = (FLOAT_T)ofsf * div_fraction;
+       vv31 = MM_FMA_PS(vv31, vtmpx11, vtmpx21); // v[3] = v[3] * tmpx1 + tmpx2
+       vv32 = MM_FMA_PS(vv32, vtmpx12, vtmpx22); // v[3] = v[3] * tmp1 + tmp2
+       vv31 = MM_FMA_PS(vv31, vtmpx31, vtmp1); // v[3] = v[3] * tmpx3 + tmp;
+       vv32 = MM_FMA_PS(vv32, vtmpx32, vtmp2); // v[3] = v[3] * tmpx3 + tmp;
+       vv31 = MM_FMA_PS(vv31, vtmpx41, vv01); // v[3] = v[3] * tmpx4 + vv0;
+       vv32 = MM_FMA_PS(vv32, vtmpx42, vv02); // v[3] = v[3] * tmpx4 + vv0;
+#if defined(DATA_T_DOUBLE)
+       vv31 = _mm_mul_ps(vv31, vec_divo);
+       vv32 = _mm_mul_ps(vv32, vec_divo);
+#if (USE_X86_EXT_INTRIN >= 8)  
+       _mm256_storeu_pd(dest, _mm256_cvtps_pd(vv31));
+       dest += 4;
+       _mm256_storeu_pd(dest, _mm256_cvtps_pd(vv32));
+       dest += 4;
+#else
+       _mm_storeu_pd(dest, _mm_cvtps_pd(vv31));
+       dest += 2;
+       _mm_storeu_pd(dest, _mm_cvtps_pd(_mm_movehl_ps(vv31, vv31)));
+       dest += 2;
+       _mm_storeu_pd(dest, _mm_cvtps_pd(vv32));
+       dest += 2;
+       _mm_storeu_pd(dest, _mm_cvtps_pd(_mm_movehl_ps(vv32, vv32)));
+       dest += 2;
+#endif
+#elif defined(DATA_T_FLOAT) // DATA_T_FLOAT
+       _mm_storeu_ps(dest, _mm_mul_ps(vv31, vec_divo));
+       dest += 4;
+       _mm_storeu_ps(dest, _mm_mul_ps(vv32, vec_divo));
+       dest += 4;
+#else // DATA_T_IN32
+       _mm_storeu_si128((__m128i *)dest, _mm_cvtps_epi32(vv31));
+       dest += 4;
+       _mm_storeu_si128((__m128i *)dest, _mm_cvtps_epi32(vv32));
+       dest += 4;
+#endif
+       vofs1 = _mm_add_epi32(vofs1, vinc); // ofs += inc;
+       vofs2 = _mm_add_epi32(vofs2, vinc); // ofs += inc;
+       }
+       resrc->offset = prec_offset + (splen_t)(MM_EXTRACT_I32(vofs1,0));
+       *out_count = i;
+    return dest;
+}
+
+#else // not use MMX/SSE/AVX 
+// \83\8b\81[\83v\93à\95\94\82Ìoffset\8cv\8eZ\82ðint32\92l\88æ\82É\82·\82é , (sample_increment * (req_count+1)) < int32 max
+static inline DATA_T *resample_lagrange_multi(Voice *vp, DATA_T *dest, int32 req_count, int32 *out_count)
+{
+       resample_rec_t *resrc = &vp->resrc;
+       int32 i = 0;
+       splen_t prec_offset = resrc->offset & INTEGER_MASK;
+       sample_t *src = vp->sample->data + (prec_offset >> FRACTION_BITS);
+       int32 ofs = (int32)(resrc->offset & FRACTION_MASK);
+       int32 inc = resrc->increment;
+
+       for(i = 0; i < req_count; i++) {
+               int32 ofsi, ofsf;
+#if defined(DATA_T_DOUBLE) || defined(DATA_T_FLOAT)
+               FLOAT_T v[4], tmp;
+               ofsi = ofs >> FRACTION_BITS, ofsf = ofs & FRACTION_MASK; ofs += inc;            
+               v[0] = src[ofsi - 1]; 
+               v[1] = src[ofsi];
+               v[2] = src[ofsi + 1];   
+               v[3] = src[ofsi + 2];           
+               ofsf += mlt_fraction;
+               tmp = v[1] - v[0];
+               v[3] += -3 * v[2] + 3 * v[1] - v[0];
+               v[3] *= (FLOAT_T)(ofsf - ml2_fraction) * DIV_6 * div_fraction;
+               v[3] += v[2] - v[1] - tmp;
+               v[3] *= (FLOAT_T)(ofsf - mlt_fraction) * DIV_2 * div_fraction;
+               v[3] += tmp;
+               v[3] *= (FLOAT_T)ofsf * div_fraction;
+               v[3] += v[0];
+               *dest++ = v[3] * OUT_INT16;
+#else // DATA_T_IN32
+               int32 v[4], tmp;
+               ofsi = ofs >> FRACTION_BITS, ofsf = ofs & FRACTION_MASK; ofs += inc;
+               v[0] = src[ofsi - 1];
+               v[1] = src[ofsi];
+               v[2] = src[ofsi + 1];   
+               v[3] = src[ofsi + 2];                   
+               ofsf += mlt_fraction;
+               tmp = v[1] - v[0];
+               v[3] += -3*v[2] + 3*v[1] - v[0];
+               v[3] = imuldiv_fraction(v[3], (ofsf - ml2_fraction) / 6);
+               v[3] += v[2] - v[1] - tmp;
+               v[3] = imuldiv_fraction(v[3], (ofsf - mlt_fraction) >> 1);
+               v[3] += tmp;
+               v[3] = imuldiv_fraction(v[3], ofsf);
+               v[3] += v[0];
+               *dest++ = v[3];         
+#endif
+       }
+       resrc->offset = prec_offset + (splen_t)ofs;
+       *out_count = i;
+    return dest;
+}
+#endif
+
+static void lao_rs_plain(Voice *vp, DATA_T *dest, int32 count)
+{
+       /* Play sample until end, then free the voice. */
+       resample_rec_t *resrc = &vp->resrc;
+       int32 i = 0, j = 0;     
+       
+       if (resrc->increment < 0) resrc->increment = -resrc->increment; /* In case we're coming out of a bidir loop */
+       j = PRECALC_LOOP_COUNT(resrc->offset, resrc->data_length, resrc->increment) + 4; // safe end+128 sample
+       if (j > count) {j = count;}
+       else if(j < 0) {j = 0;} 
+       if((resrc->offset >> FRACTION_BITS) >= 1)
+               dest = resample_lagrange_multi(vp, dest, j, &i);
+       for(; i < j; i++) {
+               *dest++ = resample_lagrange_single(vp);
+               resrc->offset += resrc->increment;
+       }
+       for(; i < count; i++) { *dest++ = 0; vp->finish_voice = 1;}
+}
+
+static void lao_rs_loop(Voice *vp, DATA_T *dest, int32 count)
+{
+       /* Play sample until end-of-loop, skip back and continue. */
+       resample_rec_t *resrc = &vp->resrc;
+       int32 i = 0, j = 0;
+       
+       if((resrc->offset >> FRACTION_BITS) >= 1){
+               j = PRECALC_LOOP_COUNT(resrc->offset, resrc->loop_end, resrc->increment) - 4; // 4point interpolation
+               if (j > count) {j = count;}
+               else if(j < 0) {j = 0;}
+               dest = resample_lagrange_multi(vp, dest, j, &i);
+       }
+       for(; i < count; i++) {
+               *dest++ = resample_lagrange_single(vp);
+               if((resrc->offset += resrc->increment) >= resrc->loop_end)
+                       resrc->offset -= resrc->loop_end - resrc->loop_start;
+               /* Hopefully the loop is longer than an increment. */
+       }
+}
+
+static void lao_rs_bidir(Voice *vp, DATA_T *dest, int32 count)
+{
+       resample_rec_t *resrc = &vp->resrc;
+       int32 i = 0, j = 0;     
+
+       if ((resrc->offset >> FRACTION_BITS) >= 1 && resrc->increment > 0){
+               j = PRECALC_LOOP_COUNT(resrc->offset, resrc->loop_end, resrc->increment) - 4; // 4point interpolation
+               if (j > count) {j = count;}
+               else if(j < 0) {j = 0;}
+               dest = resample_lagrange_multi(vp, dest, j, &i);
+       }
+       for(; i < count; i++) {
+               *dest++ = resample_lagrange_single(vp);
+               resrc->offset += resrc->increment;
+               if(resrc->increment > 0){
+                       if(resrc->offset >= resrc->loop_end){
+                               resrc->offset = (resrc->loop_end << 1) - resrc->offset;
+                               resrc->increment = -resrc->increment;
+                       }
+               }else{
+                       if(resrc->offset <= resrc->loop_start){
+                               resrc->offset = (resrc->loop_start << 1) - resrc->offset;
+                               resrc->increment = -resrc->increment;
+                       }
+               }
+       }
+}
+
+static inline void resample_voice_lagrange_optimize(Voice *vp, DATA_T *ptr, int32 count)
+{
+    int mode = vp->sample->modes;
+       
+       if(vp->resrc.plain_flag){ /* no loop */ /* else then loop */ 
+               vp->resrc.mode = RESAMPLE_MODE_PLAIN;   /* no loop */
+               lao_rs_plain(vp, ptr, count);   /* no loop */
+       }else if(!(mode & MODES_ENVELOPE) && (vp->status & (VOICE_OFF | VOICE_DIE))){ /* no env */
+               vp->resrc.plain_flag = 1; /* lock no loop */
+               vp->resrc.mode = RESAMPLE_MODE_PLAIN;   /* no loop */
+               lao_rs_plain(vp, ptr, count);   /* no loop */
+       }else if(mode & MODES_RELEASE && (vp->status & VOICE_OFF)){ /* release sample */
+               vp->resrc.plain_flag = 1; /* lock no loop */
+               vp->resrc.mode = RESAMPLE_MODE_PLAIN;   /* no loop */
+               lao_rs_plain(vp, ptr, count);   /* no loop */
+       }else if(mode & MODES_PINGPONG){ /* Bidirectional */
+               vp->resrc.mode = RESAMPLE_MODE_BIDIR_LOOP;      /* Bidirectional loop */
+               lao_rs_bidir(vp, ptr, count);   /* Bidirectional loop */
+       }else {
+               vp->resrc.mode = RESAMPLE_MODE_LOOP;    /* loop */
+               lao_rs_loop(vp, ptr, count);    /* loop */
+       }               
+}
+#endif /* optimize lagrange resample */
+
+
+/*************** optimize lagrange float resample ***********************/
+#if defined(PRECALC_LOOPS)
+
+static inline DATA_T resample_lagrange_float_single(Voice *vp)
+{              
+       float *src = (float *)vp->sample->data;
+       const resample_rec_t *resrc = &vp->resrc;
+    fract_t ofsf = resrc->offset & FRACTION_MASK;
+    const spos_t ofsls = resrc->loop_start >> FRACTION_BITS;
+    const spos_t ofsle = resrc->loop_end >> FRACTION_BITS;
+       const spos_t ofsi = resrc->offset >> FRACTION_BITS;
+    spos_t ofstmp, len;
+    FLOAT_T v[4], tmp;
+       int32 vi[4], tmpi;
+       int32 i, dir;
+
+       switch(resrc->mode){
+       case RESAMPLE_MODE_PLAIN:
+               if(ofsi < 1)
+                       goto do_linear;
+               break; // normal
+       case RESAMPLE_MODE_LOOP:
+               if(ofsi < ofsls){
+                       if(ofsi < 1)
+                               goto do_linear;
+                       if((ofsi + 2) < ofsle)
+                               break; // normal
+               }else if(((ofsi + 2) < ofsle) && ((ofsi - 1) >= ofsls))
+                       break; // normal                
+               len = ofsle - ofsls; // loop_length
+               ofstmp = ofsi - 1;
+               if(ofstmp < ofsls) {ofstmp += len;} // if loop_length == data_length need                       
+               for(i = 0; i < 4; i++){
+                       v[i] = src[ofstmp];                     
+                       if((++ofstmp) > ofsle) {ofstmp -= len;} // -= loop_length , jump loop_start
+               }
+               goto loop_ofs;
+               break;
+       case RESAMPLE_MODE_BIDIR_LOOP:                  
+               if(resrc->increment >= 0){ // normal dir
+                       if(ofsi < ofsls){
+                               if(ofsi < 1)
+                                       goto do_linear;
+                               if((ofsi + 2) < ofsle)
+                                       break; // normal
+                       }else if(((ofsi + 2) < ofsle) && ((ofsi - 1) >= ofsls))
+                               break; // normal
+                       dir = 1;
+                       ofstmp = ofsi - 1;
+                       if(ofstmp < ofsls){ // if loop_length == data_length need                               
+                               ofstmp = (ofsls << 1) - ofstmp;
+                               dir = -1;
+                       }                       
+               }else{ // reverse dir
+                       dir = -1;
+                       ofstmp = ofsi + 1;
+                       if(ofstmp > ofsle){ // if loop_length == data_length need                               
+                               ofstmp = (ofsle << 1) - ofstmp;
+                               dir = 1;
+                       }
+                       ofsf = mlt_fraction - ofsf;
+               }
+               for(i = 0; i < 4; i++){
+                       v[i] = src[ofstmp];                     
+                       ofstmp += dir;
+                       if(dir < 0){ // -
+                               if(ofstmp <= ofsls) {dir = 1;}
+                       }else{ // +
+                               if(ofstmp >= ofsle) {dir = -1;}
+                       }
+               }
+               goto loop_ofs;
+               break;
+       }
+normal_ofs:
+       v[0] = src[ofsi - 1];
+    v[1] = src[ofsi];
+    v[2] = src[ofsi + 1];      
+       v[3] = src[ofsi + 2];
+#if defined(DATA_T_DOUBLE) || defined(DATA_T_FLOAT)
+loop_ofs:
+       ofsf += mlt_fraction;
+       tmp = v[1] - v[0];
+       v[3] += -3 * v[2] + 3 * v[1] - v[0];
+       v[3] *= (FLOAT_T)(ofsf - ml2_fraction) * DIV_6 * div_fraction;
+       v[3] += v[2] - v[1] - tmp;
+       v[3] *= (FLOAT_T)(ofsf - mlt_fraction) * DIV_2 * div_fraction;
+       v[3] += tmp;
+       v[3] *= (FLOAT_T)ofsf * div_fraction;
+       v[3] += v[0];
+       return v[3] * OUT_INT16;
+do_linear:
+    v[1] = src[ofsi];
+       v[2] = (int32)(src[ofsi + 1]) - (int32)(src[ofsi]);
+    return (v[1] + v[2] * (FLOAT_T)ofsf * div_fraction) * OUT_INT16; // FLOAT_T
+#else // DATA_T_INT32
+loop_ofs:
+       vi[0] = v[0] * M_15BIT;
+    vi[1] = v[1] * M_15BIT;
+    vi[2] = v[2] * M_15BIT;
+       vi[3] = v[3] * M_15BIT;
+       ofsf += mlt_fraction;
+       tmpi = vi[1] - vi[0];
+       vi[3] += -3*vi[2] + 3*vi[1] - vi[0];
+       vi[3] = imuldiv_fraction(vi[3], (ofsf - ml2_fraction) / 6);
+       vi[3] += vi[2] - vi[1] - tmpi;
+       vi[3] = imuldiv_fraction(vi[3], (ofsf - mlt_fraction) >> 1);
+       vi[3] += tmpi;
+       vi[3] = imuldiv_fraction(vi[3], ofsf);
+       vi[3] += vi[0];
+       return vi[3];
+do_linear:
+    v[1] = src[ofsi];
+       v[2] = src[ofsi + 1];
+       vi[0] = v[0] * M_15BIT;
+    vi[1] = v[1] * M_15BIT;
+       return v[1] + imuldiv_fraction(vi[2] - vi[1], ofsf);
+#endif
+}
+
+#if (USE_X86_EXT_INTRIN >= 3)
+// offset:int32*4*2, resamp:float*4*2 2set
+// \83\8b\81[\83v\93à\95\94\82Ìoffset\8cv\8eZ\82ðint32\92l\88æ\82É\82·\82é , (sample_increment * (req_count+1)) < int32 max
+static inline DATA_T *resample_lagrange_float_multi(Voice *vp, DATA_T *dest, int32 req_count, int32 *out_count)
+{
+       resample_rec_t *resrc = &vp->resrc;
+       int32 i = 0;
+       const int32 req_count_mask = ~(0x7);
+       const int32 count = req_count & req_count_mask;
+       splen_t prec_offset = resrc->offset & INTEGER_MASK;
+       float *src = (float *)vp->sample->data + (prec_offset >> FRACTION_BITS);
+       const int32 start_offset = (int32)(resrc->offset - prec_offset); // offset\8cv\8eZ\82ðint32\92l\88æ\82É\82·\82é(SIMD\97p
+       const int32 inc = resrc->increment;
+       const __m128i vinc = _mm_set1_epi32(inc * 8), vfmask = _mm_set1_epi32((int32)FRACTION_MASK);
+       __m128i vofs1 = _mm_add_epi32(_mm_set1_epi32(start_offset), _mm_set_epi32(inc * 3, inc * 2, inc, 0));
+       __m128i vofs2 = _mm_add_epi32(vofs1, _mm_set1_epi32(inc * 4));
+       const __m128 vdivf = _mm_set1_ps(div_fraction); 
+       const __m128 vfrac_6 = _mm_set1_ps(div_fraction * DIV_6);
+       const __m128 vfrac_2 = _mm_set1_ps(div_fraction * DIV_2);
+       const __m128 v3n = _mm_set1_ps(-3);
+       const __m128 v3p = _mm_set1_ps(3);
+       const __m128i vfrac = _mm_set1_epi32(mlt_fraction);
+       const __m128i vfrac2 = _mm_set1_epi32(ml2_fraction);
+       const __m128 vec_divo = _mm_set1_ps(M_15BIT);
+       for(; i < count; i += 8) {
+       __m128i vofsi1 = _mm_srli_epi32(vofs1, FRACTION_BITS); // ofsi = ofs >> FRACTION_BITS
+       __m128i vofsi2 = _mm_srli_epi32(vofs2, FRACTION_BITS); // ofsi = ofs >> FRACTION_BITS
+       __m128 vin1 = _mm_loadu_ps(&src[MM_EXTRACT_I32(vofsi1,0) - 1]); // ofsi-1~ofsi+2\82ð\83\8d\81[\83h [v11v12v13v14]
+       __m128 vin2 = _mm_loadu_ps(&src[MM_EXTRACT_I32(vofsi1,1) - 1]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶ [v21v22v23v24]
+       __m128 vin3 = _mm_loadu_ps(&src[MM_EXTRACT_I32(vofsi1,2) - 1]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶ [v31v32v33v34]
+       __m128 vin4 = _mm_loadu_ps(&src[MM_EXTRACT_I32(vofsi1,3) - 1]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶ [v41v42v43v44]    
+       __m128 vin5 = _mm_loadu_ps(&src[MM_EXTRACT_I32(vofsi2,0) - 1]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶ [v51v52v53v54]
+       __m128 vin6 = _mm_loadu_ps(&src[MM_EXTRACT_I32(vofsi2,1) - 1]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶ [v61v62v63v64]
+       __m128 vin7 = _mm_loadu_ps(&src[MM_EXTRACT_I32(vofsi2,2) - 1]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶ [v71v72v73v74]
+       __m128 vin8 = _mm_loadu_ps(&src[MM_EXTRACT_I32(vofsi2,3) - 1]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶ [v81v82v83v84]    
+    __m128 vin12a = _mm_shuffle_ps(vin1, vin2, 0x44); // [v11,v12,v21,v22]
+    __m128 vin12b = _mm_shuffle_ps(vin1, vin2, 0xEE); // [v13,v14,v23,v24]
+    __m128 vin34a = _mm_shuffle_ps(vin3, vin4, 0x44); // [v31,v32,v41,v42]
+    __m128 vin34b = _mm_shuffle_ps(vin3, vin4, 0xEE); // [v33,v34,v43,v44]
+    __m128 vin56a = _mm_shuffle_ps(vin5, vin6, 0x44); // [v51,v52,v61,v62]
+    __m128 vin56b = _mm_shuffle_ps(vin5, vin6, 0xEE); // [v53,v54,v63,v64]
+    __m128 vin78a = _mm_shuffle_ps(vin7, vin8, 0x44); // [v71,v72,v81,v82]
+    __m128 vin78b = _mm_shuffle_ps(vin7, vin8, 0xEE); // [v73,v74,v83,v84]
+    __m128 vv01 = _mm_shuffle_ps(vin12a, vin34a, 0x88); // [v11,v21,v31,v41]
+    __m128 vv11 = _mm_shuffle_ps(vin12a, vin34a, 0xDD); // [v12,v22,v32,v42]
+    __m128 vv21 = _mm_shuffle_ps(vin12b, vin34b, 0x88); // [v13,v23,v33,v43]
+    __m128 vv31 = _mm_shuffle_ps(vin12b, vin34b, 0xDD); // [v14,v24,v34,v44]
+    __m128 vv02 = _mm_shuffle_ps(vin56a, vin78a, 0x88); // [v51,v61,v71,v81]
+    __m128 vv12 = _mm_shuffle_ps(vin56a, vin78a, 0xDD); // [v52,v62,v72,v82]
+    __m128 vv22 = _mm_shuffle_ps(vin56b, vin78b, 0x88); // [v53,v63,v73,v83]
+    __m128 vv32 = _mm_shuffle_ps(vin56b, vin78b, 0xDD); // [v54,v64,v74,v84]
+       __m128i vofsf1 = _mm_add_epi32(_mm_and_si128(vofs1, vfmask), vfrac); // ofsf = (ofs & FRACTION_MASK) + mlt_fraction;
+       __m128i vofsf2 = _mm_add_epi32(_mm_and_si128(vofs2, vfmask), vfrac); // ofsf = (ofs & FRACTION_MASK) + mlt_fraction;
+       __m128 vtmp1 = _mm_sub_ps(vv11, vv01); // tmp = v[1] - v[0];
+       __m128 vtmp2 = _mm_sub_ps(vv12, vv02); // tmp = v[1] - v[0];
+       __m128 vtmpx11, vtmpx12, vtmpx21, vtmpx22, vtmpx31, vtmpx32, vtmpx41, vtmpx42;
+       __m128 vtmpi1, vtmpi2;
+       vv31 = _mm_add_ps(vv31, _mm_sub_ps(MM_FMA2_PS(vv21, v3n, vv11, v3p), vv01)); // v[3] += -3 * v[2] + 3 * v[1] - v[0];
+       vv32 = _mm_add_ps(vv32, _mm_sub_ps(MM_FMA2_PS(vv22, v3n, vv12, v3p), vv02)); // v[3] += -3 * v[2] + 3 * v[1] - v[0];
+       vtmpi1 = _mm_cvtepi32_ps(_mm_sub_epi32(vofsf1, vfrac2));
+       vtmpi2 = _mm_cvtepi32_ps(_mm_sub_epi32(vofsf2, vfrac2));
+       vtmpx11 = _mm_mul_ps(vtmpi1, vfrac_6); // tmpx1 = (float)(ofsf - ml2_fraction) * DIV_6 * div_fraction;
+       vtmpx12 = _mm_mul_ps(vtmpi2, vfrac_6); // tmpx1 = (float)(ofsf - ml2_fraction) * DIV_6 * div_fraction;
+       vtmpx21 = _mm_sub_ps(_mm_sub_ps(vv21, vv11), vtmp1); // tmpx2 = v[2] - v[1] - tmp);
+       vtmpx22 = _mm_sub_ps(_mm_sub_ps(vv22, vv12), vtmp2); // tmpx2 = v[2] - v[1] - tmp);
+       vtmpi1 = _mm_cvtepi32_ps(_mm_sub_epi32(vofsf1, vfrac));
+       vtmpi2 = _mm_cvtepi32_ps(_mm_sub_epi32(vofsf2, vfrac));
+       vtmpx31 = _mm_mul_ps(vtmpi1, vfrac_2); // tmpx3 = (FLOAT_T)(ofsf - mlt_fraction) * DIV_2 * div_fraction;
+       vtmpx32 = _mm_mul_ps(vtmpi2, vfrac_2); // tmpx3 = (FLOAT_T)(ofsf - mlt_fraction) * DIV_2 * div_fraction;
+       vtmpi1 = _mm_cvtepi32_ps(vofsf1);
+       vtmpi2 = _mm_cvtepi32_ps(vofsf2);
+       vtmpx41 = _mm_mul_ps(vtmpi1, vdivf); // tmpx4 = (FLOAT_T)ofsf * div_fraction;
+       vtmpx42 = _mm_mul_ps(vtmpi2, vdivf); // tmpx4 = (FLOAT_T)ofsf * div_fraction;
+       vv31 = MM_FMA_PS(vv31, vtmpx11, vtmpx21); // v[3] = v[3] * tmpx1 + tmpx2
+       vv32 = MM_FMA_PS(vv32, vtmpx12, vtmpx22); // v[3] = v[3] * tmp1 + tmp2
+       vv31 = MM_FMA_PS(vv31, vtmpx31, vtmp1); // v[3] = v[3] * tmpx3 + tmp;
+       vv32 = MM_FMA_PS(vv32, vtmpx32, vtmp2); // v[3] = v[3] * tmpx3 + tmp;
+       vv31 = MM_FMA_PS(vv31, vtmpx41, vv01); // v[3] = v[3] * tmpx4 + vv0;
+       vv32 = MM_FMA_PS(vv32, vtmpx42, vv02); // v[3] = v[3] * tmpx4 + vv0;
+#if defined(DATA_T_DOUBLE)
+#if (USE_X86_EXT_INTRIN >= 8)  
+       _mm256_storeu_pd(dest, _mm256_cvtps_pd(vv31));
+       dest += 4;
+       _mm256_storeu_pd(dest, _mm256_cvtps_pd(vv32));
+       dest += 4;
+#else
+       _mm_storeu_pd(dest, _mm_cvtps_pd(vv31));
+       dest += 2;
+       _mm_storeu_pd(dest, _mm_cvtps_pd(_mm_movehl_ps(vv31, vv31)));
+       dest += 2;
+       _mm_storeu_pd(dest, _mm_cvtps_pd(vv32));
+       dest += 2;
+       _mm_storeu_pd(dest, _mm_cvtps_pd(_mm_movehl_ps(vv32, vv32)));
+       dest += 2;
+#endif
+#elif defined(DATA_T_FLOAT) // DATA_T_FLOAT
+       _mm_storeu_ps(dest, vv31);
+       dest += 4;
+       _mm_storeu_ps(dest, vv32);
+       dest += 4;
+#else // DATA_T_IN32
+       vv31 = _mm_mul_ps(vv31, vdivo);
+       vv32 = _mm_mul_ps(vv32, vdivo);
+       _mm_storeu_si128((__m128i *)dest, _mm_cvtps_epi32(vv31));
+       dest += 4;
+       _mm_storeu_si128((__m128i *)dest, _mm_cvtps_epi32(vv32));
+       dest += 4;
+#endif
+       vofs1 = _mm_add_epi32(vofs1, vinc); // ofs += inc;
+       vofs2 = _mm_add_epi32(vofs2, vinc); // ofs += inc;
+       }
+       resrc->offset = prec_offset + (splen_t)(MM_EXTRACT_I32(vofs1,0));
+       *out_count = i;
+    return dest;
+}
+
+#else // not use MMX/SSE/AVX 
+// \83\8b\81[\83v\93à\95\94\82Ìoffset\8cv\8eZ\82ðint32\92l\88æ\82É\82·\82é , (sample_increment * (req_count+1)) < int32 max
+static inline DATA_T *resample_lagrange_float_multi(Voice *vp, DATA_T *dest, int32 req_count, int32 *out_count)
+{
+       resample_rec_t *resrc = &vp->resrc;
+       int32 i = 0;
+       splen_t prec_offset = resrc->offset & INTEGER_MASK;
+       float *src = (float *)vp->sample->data + (prec_offset >> FRACTION_BITS);
+       int32 ofs = (int32)(resrc->offset & FRACTION_MASK);
+       int32 inc = resrc->increment;
+
+       for(i = 0; i < req_count; i++) {
+               int32 ofsi, ofsf;
+#if defined(DATA_T_DOUBLE) || defined(DATA_T_FLOAT)
+               FLOAT_T v[4], tmp;
+               ofsi = ofs >> FRACTION_BITS, ofsf = ofs & FRACTION_MASK; ofs += inc;            
+               v[0] = src[ofsi - 1]; 
+               v[1] = src[ofsi];
+               v[2] = src[ofsi + 1];   
+               v[3] = src[ofsi + 2];           
+               ofsf += mlt_fraction;
+               tmp = v[1] - v[0];
+               v[3] += -3 * v[2] + 3 * v[1] - v[0];
+               v[3] *= (FLOAT_T)(ofsf - ml2_fraction) * DIV_6 * div_fraction;
+               v[3] += v[2] - v[1] - tmp;
+               v[3] *= (FLOAT_T)(ofsf - mlt_fraction) * DIV_2 * div_fraction;
+               v[3] += tmp;
+               v[3] *= (FLOAT_T)ofsf * div_fraction;
+               v[3] += v[0];
+               *dest++ = v[3];
+#else // DATA_T_IN32
+               int32 v[4], tmp;
+               ofsi = ofs >> FRACTION_BITS, ofsf = ofs & FRACTION_MASK; ofs += inc;
+               v[0] = src[ofsi - 1] * M_15BIT;
+               v[1] = src[ofsi] * M_15BIT;
+               v[2] = src[ofsi + 1] * M_15BIT; 
+               v[3] = src[ofsi + 2] * M_15BIT;                 
+               ofsf += mlt_fraction;
+               tmp = v[1] - v[0];
+               v[3] += -3*v[2] + 3*v[1] - v[0];
+               v[3] = imuldiv_fraction(v[3], (ofsf - ml2_fraction) / 6);
+               v[3] += v[2] - v[1] - tmp;
+               v[3] = imuldiv_fraction(v[3], (ofsf - mlt_fraction) >> 1);
+               v[3] += tmp;
+               v[3] = imuldiv_fraction(v[3], ofsf);
+               v[3] += v[0];
+               *dest++ = v[3];         
+#endif
+       }
+       resrc->offset = prec_offset + (splen_t)ofs;
+       *out_count = i;
+    return dest;
+}
+#endif
+
+static void lao_rs_plain_float(Voice *vp, DATA_T *dest, int32 count)
+{
+       /* Play sample until end, then free the voice. */
+       resample_rec_t *resrc = &vp->resrc;
+       int32 i = 0, j = 0;     
+       
+       if (resrc->increment < 0) resrc->increment = -resrc->increment; /* In case we're coming out of a bidir loop */
+       j = PRECALC_LOOP_COUNT(resrc->offset, resrc->data_length, resrc->increment) + 4; // safe end+128 sample
+       if (j > count) {j = count;}
+       else if(j < 0) {j = 0;} 
+       if((resrc->offset >> FRACTION_BITS) >= 1)
+               dest = resample_lagrange_float_multi(vp, dest, j, &i);
+       for(; i < j; i++) {
+               *dest++ = resample_lagrange_float_single(vp);
+               resrc->offset += resrc->increment;
+       }
+       for(; i < count; i++) { *dest++ = 0; vp->finish_voice = 1;}
+}
+
+static void lao_rs_loop_float(Voice *vp, DATA_T *dest, int32 count)
+{
+       /* Play sample until end-of-loop, skip back and continue. */
+       resample_rec_t *resrc = &vp->resrc;
+       int32 i = 0, j = 0;
+       
+       if((resrc->offset >> FRACTION_BITS) >= 1){
+               j = PRECALC_LOOP_COUNT(resrc->offset, resrc->loop_end, resrc->increment) - 4; // 4point interpolation
+               if (j > count) {j = count;}
+               else if(j < 0) {j = 0;}
+               dest = resample_lagrange_float_multi(vp, dest, j, &i);
+       }
+       for(; i < count; i++) {
+               *dest++ = resample_lagrange_float_single(vp);
+               if((resrc->offset += resrc->increment) >= resrc->loop_end)
+                       resrc->offset -= resrc->loop_end - resrc->loop_start;
+               /* Hopefully the loop is longer than an increment. */
+       }
+}
+
+static void lao_rs_bidir_float(Voice *vp, DATA_T *dest, int32 count)
+{
+       resample_rec_t *resrc = &vp->resrc;
+       int32 i = 0, j = 0;     
+
+       if ((resrc->offset >> FRACTION_BITS) >= 1 && resrc->increment > 0){
+               j = PRECALC_LOOP_COUNT(resrc->offset, resrc->loop_end, resrc->increment) - 4; // 4point interpolation
+               if (j > count) {j = count;}
+               else if(j < 0) {j = 0;}
+               dest = resample_lagrange_float_multi(vp, dest, j, &i);
+       }
+       for(; i < count; i++) {
+               *dest++ = resample_lagrange_float_single(vp);
+               resrc->offset += resrc->increment;
+               if(resrc->increment > 0){
+                       if(resrc->offset >= resrc->loop_end){
+                               resrc->offset = (resrc->loop_end << 1) - resrc->offset;
+                               resrc->increment = -resrc->increment;
+                       }
+               }else{
+                       if(resrc->offset <= resrc->loop_start){
+                               resrc->offset = (resrc->loop_start << 1) - resrc->offset;
+                               resrc->increment = -resrc->increment;
+                       }
+               }
+       }
+}
+
+static inline void resample_voice_lagrange_float_optimize(Voice *vp, DATA_T *ptr, int32 count)
+{
+    int mode = vp->sample->modes;
+       
+       if(vp->resrc.plain_flag){ /* no loop */ /* else then loop */ 
+               vp->resrc.mode = RESAMPLE_MODE_PLAIN;   /* no loop */
+               lao_rs_plain_float(vp, ptr, count);     /* no loop */
+       }else if(!(mode & MODES_ENVELOPE) && (vp->status & (VOICE_OFF | VOICE_DIE))){ /* no env */
+               vp->resrc.plain_flag = 1; /* lock no loop */
+               vp->resrc.mode = RESAMPLE_MODE_PLAIN;   /* no loop */
+               lao_rs_plain_float(vp, ptr, count);     /* no loop */
+       }else if(mode & MODES_RELEASE && (vp->status & VOICE_OFF)){ /* release sample */
+               vp->resrc.plain_flag = 1; /* lock no loop */
+               vp->resrc.mode = RESAMPLE_MODE_PLAIN;   /* no loop */
+               lao_rs_plain_float(vp, ptr, count);     /* no loop */
+       }else if(mode & MODES_PINGPONG){ /* Bidirectional */
+               vp->resrc.mode = RESAMPLE_MODE_BIDIR_LOOP;      /* Bidirectional loop */
+               lao_rs_bidir_float(vp, ptr, count);     /* Bidirectional loop */
+       }else {
+               vp->resrc.mode = RESAMPLE_MODE_LOOP;    /* loop */
+               lao_rs_loop_float(vp, ptr, count);      /* loop */
+       }               
+}
+#endif /* optimize lagrange float resample */
+
+
+
+/*************** resampling with fixed increment *****************/
+///r
+static void rs_plain_c(int v, DATA_T *ptr, int32 count)
+{
+    Voice *vp = &voice[v];
+    DATA_T *dest = ptr + vp->resrc.buffer_offset;
+       cache_t *src = (cache_t *)vp->sample->data;
+       int32 count2 = count;
+    splen_t ofs, i, le;
+       
+    le = vp->sample->loop_end >> FRACTION_BITS;
+    ofs = vp->resrc.offset >> FRACTION_BITS;
+
+    i = ofs + count2;
+    if(i > le)
+               i = le;
+       count2 = i - ofs;
+
+       for (i = 0; i < count2; i++) {
+               dest[i] = src[i + ofs];
+       }
+       for (; i < count; i++) {
+               vp->finish_voice = 1;
+               dest[i] = 0;
+       }       
+       ofs += count2;
+       vp->resrc.offset = ofs << FRACTION_BITS;
+}
+///r
+static void rs_plain(int v, DATA_T *ptr, int32 count)
+{
+  /* Play sample until end, then free the voice. */
+  Voice *vp = &voice[v];
+  DATA_T *dest = ptr;
+       sample_t *src = vp->sample->data;
+       int data_type = vp->sample->data_type;
+  splen_t
+    ofs = vp->resrc.offset,
+    ls = 0,
+    le = vp->sample->data_length;
+  int32 incr = vp->resrc.increment;
+#ifdef PRECALC_LOOPS
+  int32 i = 0, j;
+#endif
+
+       if(vp->cache && incr == (1 << FRACTION_BITS)){
+               rs_plain_c(v, ptr, count);
+               return;
+       }       
+
+#ifdef PRECALC_LOOPS
+       if (incr < 0) incr = -incr; /* In case we're coming out of a bidir loop */
+  /* Precalc how many times we should go through the loop.
+     NOTE: Assumes that incr > 0 and that ofs <= le */
+       j = PRECALC_LOOP_COUNT(ofs, le, incr);
+       if (j > count) {j = count;}
+       else if(j < 0) {j = 0;} 
+       for(i = 0; i < j; i++) {
+      RESAMPLATION;
+      ofs += incr;
+    }
+       for (; i < count; i++) {
+               *dest++ = 0;
+               vp->finish_voice = 1;
+       }       
+#else /* PRECALC_LOOPS */
+       while (count--)
+       {
+               if (ofs >= le){
+                       *dest++ = 0;
+                       vp->finish_voice = 1;
+               }else {
+                       RESAMPLATION;
+                       ofs += incr;
+               }
+       }
+#endif /* PRECALC_LOOPS */
+
+  vp->resrc.offset = ofs; /* Update offset */
+}
+static void rs_loop_c(Voice *vp, DATA_T *ptr, int32 count)
+{
+  splen_t
+               ofs = vp->resrc.offset >> FRACTION_BITS,
+               le = vp->sample->loop_end >> FRACTION_BITS,
+               ll = le - (vp->sample->loop_start >> FRACTION_BITS);
+
+       DATA_T *dest = ptr;
+       cache_t *src = (cache_t *)vp->sample->data;
+       int32 i, j;
+
+// ERROR loop_start = 4215529472 
+       if(ll < 0)
+       {       
+               vp->sample->loop_start = 0;
+               ll = le - (vp->sample->loop_start >> FRACTION_BITS);
+       }       
+
+       while(count){
+               while(ofs >= le)
+                       ofs -= ll;
                /* Precalc how many times we should go through the loop */
                i = le - ofs;
                if(i > count)
@@ -5072,7 +6433,7 @@ void resample_voice(int v, DATA_T *ptr, int32 count)
 {
     Voice *vp = &voice[v];
     int mode;
-       int32 i;
+       int32 i = 0;
        int32 a;        
 
        if(!opt_resample_over_sampling && vp->sample->sample_rate == play_mode->rate &&
@@ -5090,7 +6451,7 @@ void resample_voice(int v, DATA_T *ptr, int32 count)
                        /* Let the caller know how much data we had left */
                        count2 = (int32)((vp->sample->data_length >> FRACTION_BITS) - ofs);
                }else
-                       vp->resrc.offset += (count2 << FRACTION_BITS);
+                       vp->resrc.offset += ((splen_t)count2 << FRACTION_BITS);
 
                switch(vp->sample->data_type){
                case SAMPLE_TYPE_INT16:
@@ -5125,9 +6486,22 @@ void resample_voice(int v, DATA_T *ptr, int32 count)
        vp->resrc.increment = (vp->resrc.increment >= 0) ? a : -a;
        
 #if defined(PRECALC_LOOPS)
-       if(opt_resample_type == RESAMPLE_LINEAR && vp->sample->data_type == SAMPLE_TYPE_INT16){
-               resample_voice_linear_optimize(vp, ptr, count);
-               return;
+       if(opt_resample_type == RESAMPLE_LINEAR){
+               if(vp->sample->data_type == SAMPLE_TYPE_INT16){
+                       resample_voice_linear_optimize(vp, ptr, count);
+                       return;
+               }else if(vp->sample->data_type == SAMPLE_TYPE_FLOAT && !opt_pre_resamplation){
+                       resample_voice_linear_float_optimize(vp, ptr, count);
+                       return;
+               }
+       } else if (opt_resample_type == RESAMPLE_LAGRANGE){
+               if(vp->sample->data_type == SAMPLE_TYPE_INT16){
+                       resample_voice_lagrange_optimize(vp, ptr, count);
+                       return;
+               }else if(vp->sample->data_type == SAMPLE_TYPE_FLOAT && !opt_pre_resamplation){
+                       resample_voice_lagrange_float_optimize(vp, ptr, count);
+                       return;
+               }
        }
 #endif
        
diff --git a/timidity/sfz.cpp b/timidity/sfz.cpp
new file mode 100644 (file)
index 0000000..3ed8089
--- /dev/null
@@ -0,0 +1,1518 @@
+// SFZ Support Routines for TiMidity++
+// Copyright (c) 2018 Starg <https://osdn.net/projects/timidity41>
+
+extern "C"
+{
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#include "timidity.h"
+#include "common.h"
+#include "controls.h"
+#include "tables.h"
+
+#include "sfz.h"
+
+// smplfile.c
+Instrument *extract_sample_file(char *sample_file);
+}
+
+#include <cassert>
+#include <cstdint>
+#include <cstdio>
+
+#include <algorithm>
+#include <exception>
+#include <iterator>
+#include <memory>
+#include <numeric>
+#include <optional>
+#include <sstream>
+#include <stack>
+#include <stdexcept>
+#include <string>
+#include <string_view>
+#include <unordered_map>
+#include <utility>
+#include <variant>
+#include <vector>
+
+namespace TimSFZ
+{
+
+using namespace std::string_literals;
+using namespace std::string_view_literals;
+
+struct TFFileCloser
+{
+    void operator()(timidity_file* pFile) const
+    {
+        if (pFile)
+        {
+            ::close_file(pFile);
+        }
+    }
+};
+
+struct InstrumentDeleter
+{
+    void operator()(Instrument* pInstrument) const
+    {
+        if (pInstrument)
+        {
+            ::free_instrument(pInstrument);
+        }
+    }
+};
+
+std::string ReadEntireFile(std::string url)
+{
+    std::unique_ptr<timidity_file, TFFileCloser> pFile(::open_file(url.data(), 1, OF_NORMAL));
+
+    if (!pFile)
+    {
+        throw std::runtime_error("unable to open '"s + url + "'");
+    }
+
+    std::string buf;
+
+    while (true)
+    {
+        int c = tf_getc(pFile.get());
+
+        if (c == EOF)
+        {
+            break;
+        }
+
+        buf.push_back(static_cast<char>(c));
+    }
+
+    return buf;
+}
+
+std::string ConstructPath(std::string_view base, std::string_view relPath)
+{
+    std::size_t lastPathDelimiterOffset = base.find_last_of("/\\");
+    return (lastPathDelimiterOffset == base.npos ? ""s : std::string(base, 0, lastPathDelimiterOffset))
+        .append("/").append(relPath);
+}
+
+struct FileInfo
+{
+    std::string FilePath;
+};
+
+struct FileLocationInfo
+{
+    std::size_t FileID;
+    std::uint32_t Line; // 1-based
+};
+
+class TextBuffer
+{
+public:
+    TextBuffer() = default;
+
+    TextBuffer(std::string str, FileLocationInfo loc)
+        : m_Text(std::move(str)), m_Locations{PartLocationInfo{0, loc}}
+    {
+    }
+
+    class View
+    {
+        friend class TextBuffer;
+
+        View(const TextBuffer* pBuffer, std::size_t offset, std::size_t length)
+            : m_pBuffer(pBuffer), m_Offset(offset), m_Length(length)
+        {
+        }
+
+    public:
+        View() : m_pBuffer(nullptr), m_Offset(0), m_Length(0)
+        {
+        }
+
+        View(const View&) = default;
+        View& operator=(const View&) = default;
+
+        bool IsEmpty() const
+        {
+            return m_Length == 0;
+        }
+
+        std::size_t GetLength() const
+        {
+            return m_Length;
+        }
+
+        void SetLength(std::size_t len)
+        {
+            assert(len <= m_Length);
+            m_Length = len;
+        }
+
+        char operator[](std::size_t i) const
+        {
+            return (*m_pBuffer)[m_Offset + i];
+        }
+
+        char Peek(std::size_t i = 0) const
+        {
+            return (*m_pBuffer)[m_Offset + i];
+        }
+
+        char PeekOr(std::size_t i = 0, char defaultValue = '\0') const
+        {
+            return i < m_Length ? (*m_pBuffer)[m_Offset + i] : defaultValue;
+        }
+
+        std::string ToString() const
+        {
+            return std::string(ToStringView());
+        }
+
+        std::string_view ToStringView() const
+        {
+            return std::string_view(m_pBuffer->m_Text.data() + m_Offset, m_Length);
+        }
+
+        void Advance(std::size_t count = 1)
+        {
+            assert(count <= m_Length);
+            m_Offset += count;
+            m_Length -= count;
+        }
+
+        FileLocationInfo GetLocationInfo(std::size_t i = 0) const
+        {
+            return m_pBuffer->GetLocationInfo(m_Offset + i);
+        }
+
+    private:
+        const TextBuffer* m_pBuffer;
+        std::size_t m_Offset;
+        std::size_t m_Length;
+    };
+
+    View GetView() const
+    {
+        return View(this, 0, m_Text.size());
+    }
+
+    View GetView(std::size_t offset, std::size_t length) const
+    {
+        return View(this, offset, length);
+    }
+
+    char operator[](std::size_t offset) const
+    {
+        return m_Text[offset];
+    }
+
+private:
+    struct PartLocationInfo
+    {
+        std::size_t Offset; // offset in m_Text
+        FileLocationInfo FirstLocation;
+    };
+
+    auto FindMatchingLocationInfo(std::size_t offset) const
+    {
+        auto it = std::upper_bound(
+            m_Locations.begin(),
+            m_Locations.end(),
+            offset,
+            [] (auto&& a, auto&& b)
+            {
+                return a < b.Offset;
+            }
+        );
+
+        assert(m_Locations.begin() < it);
+        return std::prev(it);
+    }
+
+public:
+    FileLocationInfo GetLocationInfo(std::size_t offset) const
+    {
+        auto it = FindMatchingLocationInfo(offset);
+        auto loc = it->FirstLocation;
+        loc.Line += std::count(m_Text.begin() + it->Offset, m_Text.begin() + offset, '\n');
+        return loc;
+    }
+
+    void Append(char c)
+    {
+        m_Text.append(1, c);
+    }
+
+    void Append(std::string_view s)
+    {
+        m_Text.append(s);
+    }
+
+    void Append(std::string_view str, FileLocationInfo loc)
+    {
+        PartLocationInfo partLoc{m_Text.size(), loc};
+        m_Text.append(str);
+        m_Locations.push_back(partLoc);
+    }
+
+    void Append(const View& view)
+    {
+        assert(this != view.m_pBuffer);
+        auto it = view.m_pBuffer->FindMatchingLocationInfo(view.m_Offset);
+        auto partLoc = *it;
+        std::ptrdiff_t offsetDiff = m_Text.size() - partLoc.Offset;
+        partLoc.Offset = m_Text.size();
+        partLoc.FirstLocation.Line += std::count(
+            view.m_pBuffer->m_Text.begin() + it->Offset,
+            view.m_pBuffer->m_Text.begin() + view.m_Offset,
+            '\n'
+        );
+
+        m_Locations.push_back(std::move(partLoc));
+
+        std::for_each(
+            std::next(it),
+            view.m_pBuffer->m_Locations.end(),
+            [this, offsetDiff] (auto&& x)
+            {
+                PartLocationInfo partLoc = std::forward<decltype(x)>(x);
+                partLoc.Offset += offsetDiff;
+                this->m_Locations.push_back(std::move(partLoc));
+            }
+        );
+
+        m_Text.append(view.m_pBuffer->m_Text, view.m_Offset, view.m_Length);
+    }
+
+private:
+    std::string m_Text;
+    std::vector<PartLocationInfo> m_Locations;  // must be sorted according to Offset
+};
+
+class ParserException : public std::runtime_error
+{
+public:
+    ParserException(std::string_view fileName, std::uint32_t line, std::string_view msg)
+        : runtime_error(FormatErrorMessage(fileName, line, msg))
+    {
+    }
+
+private:
+    std::string FormatErrorMessage(std::string_view fileName, std::uint32_t line, std::string_view msg)
+    {
+        std::ostringstream oss;
+        oss << fileName << "(" << line << "): " << msg << "\n";
+        return oss.str();
+    }
+};
+
+class BasicParser
+{
+public:
+    bool EndOfInput(TextBuffer::View& view)
+    {
+        return view.IsEmpty();
+    }
+
+    bool EndOfLine(TextBuffer::View& view)
+    {
+        return Char(view, '\n') || String(view, "\r\n");
+    }
+
+    template<typename T>
+    bool CharIf(TextBuffer::View& view, T&& pred)
+    {
+        if (EndOfInput(view))
+        {
+            return false;
+        }
+
+        if (std::forward<T>(pred)(view.Peek()))
+        {
+            view.Advance();
+            return true;
+        }
+        else
+        {
+            return false;
+        }
+    }
+
+    bool AnyChar(TextBuffer::View& view, char& c)
+    {
+        return CharIf(view, [&c] (char x) { c = x; return true; });
+    }
+
+    bool Char(TextBuffer::View& view, char c)
+    {
+        return CharIf(view, [c] (char x) { return c == x; });
+    }
+
+    bool CharSet(TextBuffer::View& view, std::string_view cs)
+    {
+        return CharIf(view, [cs] (char x) { return cs.find(x) != cs.npos; });
+    }
+
+    bool CharRange(TextBuffer::View& view, std::pair<char, char> cr)
+    {
+        return CharIf(view, [cr] (char x) { return cr.first <= x && x <= cr.second; });
+    }
+
+    bool String(TextBuffer::View& view, std::string_view str)
+    {
+        auto curView = view;
+
+        for (auto&& i : str)
+        {
+            if (!Char(curView, i))
+            {
+                return false;
+            }
+        }
+
+        view = curView;
+        return true;
+    }
+
+    bool WordStartChar(TextBuffer::View& view)
+    {
+        return CharIf(view, [] (char x) { return 'A' <= x && x <= 'Z' || 'a' <= x && x <= 'z' || x == '_'; });
+    }
+
+    bool WordContinueChar(TextBuffer::View& view)
+    {
+        return CharIf(
+            view,
+            [] (char x) { return 'A' <= x && x <= 'Z' || 'a' <= x && x <= 'z' || '0' <= x && x <= '9' || x == '_'; }
+        );
+    }
+
+    bool SpaceChar(TextBuffer::View& view)
+    {
+        return CharSet(view, " \t");
+    }
+
+    bool NonSpaceChar(TextBuffer::View& view)
+    {
+        return CharIf(view, [] (char x) { return x != ' ' && x != '\t' && x != '\r' && x != '\n'; });
+    }
+
+    bool AnyWord(TextBuffer::View& view, TextBuffer::View& word)
+    {
+        auto initView = view;
+
+        if (!WordStartChar(view))
+        {
+            return false;
+        }
+
+        while (WordContinueChar(view))
+        {
+        }
+
+        word = initView;
+        word.SetLength(initView.GetLength() - view.GetLength());
+        return true;
+    }
+
+    bool Word(TextBuffer::View& view, std::string_view word)
+    {
+        auto curView = view;
+
+        if (String(curView, word) && !WordContinueChar(curView))
+        {
+            view = curView;
+            return true;
+        }
+        else
+        {
+            return false;
+        }
+    }
+
+    bool AnyCharSequence(TextBuffer::View& view, TextBuffer::View& seq)
+    {
+        auto initView = view;
+
+        if (!NonSpaceChar(view))
+        {
+            return false;
+        }
+
+        while (NonSpaceChar(view))
+        {
+        }
+
+        seq = initView;
+        seq.SetLength(initView.GetLength() - view.GetLength());
+        return true;
+    }
+
+    bool LineComment(TextBuffer::View& view)
+    {
+        if (String(view, "//"))
+        {
+            auto curView = view;
+
+            while (!EndOfInput(curView) && !EndOfLine(curView))
+            {
+                char c;
+                AnyChar(curView, c);
+                view = curView;
+            }
+
+            return true;
+        }
+        else
+        {
+            return false;
+        }
+    }
+
+    bool BlockComment(TextBuffer::View& view)
+    {
+        if (String(view, "/*"))
+        {
+            while (true)
+            {
+                if (String(view, "*/"))
+                {
+                    break;
+                }
+                else if (EndOfInput(view))
+                {
+                    // TODO: warn unterminated block comment
+                    break;
+                }
+                else
+                {
+                    char c;
+                    AnyChar(view, c);
+                }
+            }
+
+            return true;
+        }
+        else
+        {
+            return false;
+        }
+    }
+
+    bool DoSkips(TextBuffer::View& view)
+    {
+        if (!LineComment(view) && !BlockComment(view) && !SpaceChar(view))
+        {
+            return false;
+        }
+
+        while (LineComment(view) || BlockComment(view) || SpaceChar(view))
+        {
+        }
+
+        return true;
+    }
+
+    bool DoSkipsNL(TextBuffer::View& view)
+    {
+        if (!LineComment(view) && !BlockComment(view) && !SpaceChar(view) && !EndOfLine(view))
+        {
+            return false;
+        }
+
+        while (LineComment(view) || BlockComment(view) || SpaceChar(view) || EndOfLine(view))
+        {
+        }
+
+        return true;
+    }
+
+    bool Integer(TextBuffer::View& view, std::int32_t& n)
+    {
+        auto curView = view;
+        Char(curView, '-');
+
+        if (CharRange(curView, {'0', '9'}))
+        {
+            while (CharRange(curView, {'0', '9'}))
+            {
+            }
+
+            auto intView = view;
+            intView.SetLength(view.GetLength() - curView.GetLength());
+            n = std::stoi(intView.ToString());
+            view = curView;
+            return true;
+        }
+
+        return false;
+    }
+
+    bool DoubleQuoteStringNoEscape(TextBuffer::View& view, TextBuffer::View& str)
+    {
+        auto curView = view;
+
+        if (Char(curView, '"'))
+        {
+            auto startView = curView;
+            auto endView = startView;
+
+            while (true)
+            {
+                if (EndOfInput(curView) || EndOfLine(curView))
+                {
+                    // TODO: warn unterminated string literal
+                    break;
+                }
+                else if (Char(curView, '"'))
+                {
+                    view = curView;
+                    break;
+                }
+                else
+                {
+                    char c;
+                    AnyChar(curView, c);
+                    view = curView;
+                    endView = curView;
+                }
+            }
+
+            str = startView;
+            str.SetLength(str.GetLength() - endView.GetLength());
+            return true;
+        }
+
+        return false;
+    }
+};
+
+class Preprocessor : private BasicParser
+{
+public:
+    explicit Preprocessor(std::string url)
+        : m_FileNames{url}, m_InBuffers{TextBuffer(ReadEntireFile(url), FileLocationInfo{0, 1})}
+    {
+        m_InputStack.push({m_InBuffers[0].GetView(), false});
+    }
+
+    void Preprocess()
+    {
+        while (true)
+        {
+            while (!m_InputStack.empty() && m_InputStack.top().View.IsEmpty())
+            {
+                m_InputStack.pop();
+            }
+
+            if (m_InputStack.empty())
+            {
+                break;
+            }
+
+            auto& curView = m_InputStack.top().View;
+            auto initView = curView;
+            DoSkips(curView);
+
+            if (!m_InputStack.top().StartsAtMiddle)
+            {
+                if (Word(curView, "#define"))
+                {
+                    DoSkips(curView);
+
+                    if (!Char(curView, '$'))
+                    {
+                        throw ParserException(
+                            m_FileNames[curView.GetLocationInfo().FileID],
+                            curView.GetLocationInfo().Line,
+                            "'#define': expected '$'"
+                        );
+                    }
+
+                    TextBuffer::View nameView;
+                    if (!AnyWord(curView, nameView))
+                    {
+                        throw ParserException(
+                            m_FileNames[curView.GetLocationInfo().FileID],
+                            curView.GetLocationInfo().Line,
+                            "'#define': expected macro name"
+                        );
+                    }
+
+                    DoSkips(curView);
+                    auto macroDefView = curView;
+                    auto macroDefEndView = macroDefView;
+
+                    while (true)
+                    {
+                        DoSkips(curView);
+
+                        TextBuffer::View seq;
+                        if (AnyCharSequence(curView, seq))
+                        {
+                            macroDefEndView = curView;
+                        }
+                        else if (EndOfInput(curView) || EndOfLine(curView))
+                        {
+                            break;
+                        }
+                        else
+                        {
+                            assert(false);
+                            break;
+                        }
+                    }
+
+                    macroDefView.SetLength(macroDefView.GetLength() - macroDefEndView.GetLength());
+                    if (m_DefinedMacros.insert_or_assign(nameView.ToString(), macroDefView).second)
+                    {
+                        // TODO: warn macro redefinition
+                    }
+
+                    continue;
+                }
+                else if (Word(curView, "#include"))
+                {
+                    DoSkips(curView);
+
+                    TextBuffer::View pathView;
+                    if (!DoubleQuoteStringNoEscape(curView, pathView))
+                    {
+                        throw ParserException(
+                            m_FileNames[curView.GetLocationInfo().FileID],
+                            curView.GetLocationInfo().Line,
+                            "'#include': expected file name"
+                        );
+                    }
+
+                    DoSkips(curView);
+
+                    if (!EndOfInput(curView) && !EndOfLine(curView))
+                    {
+                        throw ParserException(
+                            m_FileNames[curView.GetLocationInfo().FileID],
+                            curView.GetLocationInfo().Line,
+                            "'#include': unexpected characters after file name"
+                        );
+                    }
+
+                    std::string path = ConstructPath(
+                        m_FileNames[pathView.GetLocationInfo().FileID],
+                        pathView.ToStringView()
+                    );
+                    m_FileNames.push_back(path);
+                    auto& newBuf = m_InBuffers.emplace_back(
+                        ReadEntireFile(path.data()),
+                        FileLocationInfo{m_FileNames.size() - 1, 1}
+                    );
+                    m_InputStack.push({newBuf.GetView(), false});
+                    continue;
+                }
+            }
+
+            auto skipView = initView;
+            skipView.SetLength(initView.GetLength() - curView.GetLength());
+            m_OutBuffer.Append(skipView);
+
+            if (Char(curView, '$'))
+            {
+                TextBuffer::View nameView;
+                if (!AnyWord(curView, nameView))
+                {
+                    throw ParserException(
+                        m_FileNames[curView.GetLocationInfo().FileID],
+                        curView.GetLocationInfo().Line,
+                        "expected macro name after '$'"
+                    );
+                }
+
+                auto it = m_DefinedMacros.find(nameView.ToString());
+                if (it == m_DefinedMacros.end())
+                {
+                    throw ParserException(
+                        m_FileNames[curView.GetLocationInfo().FileID],
+                        curView.GetLocationInfo().Line,
+                        "macro '$"s.append(nameView.ToStringView()).append("' is not defined")
+                    );
+                }
+
+                m_InputStack.push({it->second, true});
+            }
+            else if (TextBuffer::View word; AnyWord(curView, word))
+            {
+                m_OutBuffer.Append(word);
+            }
+            else if (char c; AnyChar(curView, c))
+            {
+                m_OutBuffer.Append(c);
+            }
+        }
+    }
+
+    std::string_view GetFileNameFromID(std::size_t id) const
+    {
+        return m_FileNames[id];
+    }
+
+    TextBuffer& GetOutBuffer()
+    {
+        return m_OutBuffer;
+    }
+
+    const TextBuffer& GetOutBuffer() const
+    {
+        return m_OutBuffer;
+    }
+
+private:
+    struct InputStackItem
+    {
+        TextBuffer::View View;
+        bool StartsAtMiddle;    // true for macro expansion results, false for main and #include'd files
+    };
+
+    std::vector<std::string> m_FileNames;
+    std::vector<TextBuffer> m_InBuffers;
+    std::stack<InputStackItem, std::vector<InputStackItem>> m_InputStack;
+    TextBuffer m_OutBuffer;
+    std::unordered_map<std::string, TextBuffer::View> m_DefinedMacros;
+};
+
+enum class OpCodeKind
+{
+    HiKey,
+    HiVelocity,
+    LoKey,
+    LoopEnd,
+    LoopMode,
+    LoopStart,
+    LoVelocity,
+    PitchKeyCenter,
+    Sample
+};
+
+enum class LoopModeKind
+{
+    NoLoop,
+    OneShot,
+    LoopContinuous,
+    LoopSustain
+};
+
+struct OpCodeAndValue
+{
+    FileLocationInfo Location;
+    OpCodeKind OpCode;
+    std::variant<std::int32_t, double, LoopModeKind, std::string> Value;
+};
+
+enum class HeaderKind
+{
+    Control,
+    Global,
+    Group,
+    Region
+};
+
+struct Section
+{
+    template<typename T>
+    std::optional<T> GetAs(OpCodeKind opCode) const
+    {
+        // search in reverse order
+        auto it = std::find_if(OpCodes.rbegin(), OpCodes.rend(), [opCode] (auto&& x) { return x.OpCode == opCode; });
+
+        if (it == OpCodes.rend())
+        {
+            return std::nullopt;
+        }
+
+        const T* pValue = std::get_if<T>(&it->Value);
+
+        if (!pValue)
+        {
+            return std::nullopt;
+        }
+
+        return std::make_optional(*pValue);
+    }
+
+    FileLocationInfo HeaderLocation;
+    HeaderKind Header;
+    std::vector<OpCodeAndValue> OpCodes;
+};
+
+class Parser : private BasicParser
+{
+public:
+    explicit Parser(Preprocessor& pp) : m_Preprocessor(pp)
+    {
+    }
+
+    Preprocessor& GetPreprocessor()
+    {
+        return m_Preprocessor;
+    }
+
+    const std::vector<Section>& GetSections() const
+    {
+        return m_Sections;
+    }
+
+    void Parse()
+    {
+        auto view = m_Preprocessor.GetOutBuffer().GetView();
+
+        while (!view.IsEmpty())
+        {
+            DoSkipsNL(view);
+            Section sec;
+            sec.HeaderLocation = view.GetLocationInfo();
+
+            if (!ParseHeader(view, sec.Header))
+            {
+                throw ParserException(
+                    m_Preprocessor.GetFileNameFromID(view.GetLocationInfo().FileID),
+                    view.GetLocationInfo().Line,
+                    "expected section header"
+                );
+            }
+
+            while (true)
+            {
+                DoSkipsNL(view);
+                OpCodeAndValue opVal;
+                opVal.Location = view.GetLocationInfo();
+
+                if (ParseOpCode(view, opVal.OpCode))
+                {
+                    TextBuffer::View valView;
+                    if (ParseValueString(view, valView))
+                    {
+                        switch (opVal.OpCode)
+                        {
+                        case OpCodeKind::HiKey:
+                        case OpCodeKind::LoKey:
+                        case OpCodeKind::PitchKeyCenter:
+                            if (std::int32_t n; ParseMIDINoteNumber(valView, n))
+                            {
+                                opVal.Value = n;
+                            }
+                            else
+                            {
+                                throw ParserException(
+                                    m_Preprocessor.GetFileNameFromID(valView.GetLocationInfo().FileID),
+                                    valView.GetLocationInfo().Line,
+                                    "expected MIDI note number"
+                                );
+                            }
+                            break;
+
+                        case OpCodeKind::HiVelocity:
+                        case OpCodeKind::LoopEnd:
+                        case OpCodeKind::LoopStart:
+                        case OpCodeKind::LoVelocity:
+                            try
+                            {
+                                opVal.Value = std::stod(valView.ToString());
+                            }
+                            catch (const std::invalid_argument&)
+                            {
+                                throw ParserException(
+                                    m_Preprocessor.GetFileNameFromID(valView.GetLocationInfo().FileID),
+                                    valView.GetLocationInfo().Line,
+                                    "expected number"
+                                );
+                            }
+                            catch (const std::out_of_range&)
+                            {
+                                throw ParserException(
+                                    m_Preprocessor.GetFileNameFromID(valView.GetLocationInfo().FileID),
+                                    valView.GetLocationInfo().Line,
+                                    "overflow error in float literal"
+                                );
+                            }
+                            break;
+
+                        case OpCodeKind::LoopMode:
+                            opVal.Value = GetLoopModeKind(valView);
+                            break;
+
+                        default:
+                            opVal.Value = valView.ToString();
+                            break;
+                        }
+
+                        sec.OpCodes.push_back(std::move(opVal));
+                    }
+                    else
+                    {
+                        assert(false);
+                    }
+                }
+                else
+                {
+                    m_Sections.push_back(std::move(sec));
+                    break;
+                }
+            }
+
+        }
+    }
+
+private:
+    bool ParseHeader(TextBuffer::View& view, HeaderKind& kind)
+    {
+        if (!Char(view, '<'))
+        {
+            return false;
+        }
+
+        TextBuffer::View word;
+        if (!AnyWord(view, word))
+        {
+            throw ParserException(
+                m_Preprocessor.GetFileNameFromID(view.GetLocationInfo().FileID),
+                view.GetLocationInfo().Line,
+                "expected header name"
+            );
+        }
+
+        if (!Char(view, '>'))
+        {
+            throw ParserException(
+                m_Preprocessor.GetFileNameFromID(view.GetLocationInfo().FileID),
+                view.GetLocationInfo().Line,
+                "expected '>'"
+            );
+        }
+
+        static const std::unordered_map<std::string_view, HeaderKind> HeaderMap{
+            {"control"sv, HeaderKind::Control},
+            {"global"sv, HeaderKind::Global},
+            {"group"sv, HeaderKind::Group},
+            {"region"sv, HeaderKind::Region}
+        };
+
+        auto it = HeaderMap.find(word.ToStringView());
+
+        if (it == HeaderMap.end())
+        {
+            throw ParserException(
+                m_Preprocessor.GetFileNameFromID(word.GetLocationInfo().FileID),
+                word.GetLocationInfo().Line,
+                "unknown header <"s.append(word.ToStringView()).append(">")
+            );
+        }
+
+        kind = it->second;
+        return true;
+    }
+
+    bool ParseOpCode(TextBuffer::View& view, OpCodeKind& op)
+    {
+        auto curView = view;
+
+        TextBuffer::View word;
+        if (!AnyWord(curView, word))
+        {
+            return false;
+        }
+
+        DoSkips(curView);
+
+        if (!Char(curView, '='))
+        {
+            return false;
+        }
+
+        static const std::unordered_map<std::string_view, OpCodeKind> OpCodeMap{
+            {"hikey"sv, OpCodeKind::HiKey},
+            {"hivel"sv, OpCodeKind::HiVelocity},
+            {"lokey"sv, OpCodeKind::LoKey},
+            {"loop_end"sv, OpCodeKind::LoopEnd},
+            {"loop_mode"sv, OpCodeKind::LoopMode},
+            {"loop_start"sv, OpCodeKind::LoopStart},
+            {"lovel"sv, OpCodeKind::LoVelocity},
+            {"pitch_keycenter"sv, OpCodeKind::PitchKeyCenter},
+            {"sample"sv, OpCodeKind::Sample}
+        };
+
+        auto it = OpCodeMap.find(word.ToStringView());
+
+        if (it == OpCodeMap.end())
+        {
+            throw ParserException(
+                m_Preprocessor.GetFileNameFromID(word.GetLocationInfo().FileID),
+                word.GetLocationInfo().Line,
+                "unknown opcode '"s.append(word.ToStringView()).append("'")
+            );
+        }
+
+        op = it->second;
+        view = curView;
+        return true;
+    }
+
+    bool ParseValueString(TextBuffer::View& view, TextBuffer::View& value)
+    {
+        auto curView = view;
+
+        while (SpaceChar(curView))
+        {
+        }
+
+        auto startView = curView;
+        auto endView = startView;
+
+        while (true)
+        {
+            while (SpaceChar(curView))
+            {
+            }
+
+            if (EndOfInput(curView) || EndOfLine(curView) || LineComment(curView) || BlockComment(curView))
+            {
+                break;
+            }
+            else if (Char(curView, '<'))
+            {
+                break;
+            }
+            else if (OpCodeKind op; ParseOpCode(curView, op))
+            {
+                break;
+            }
+            else if (TextBuffer::View seq; AnyCharSequence(curView, seq))
+            {
+                endView = curView;
+            }
+            else
+            {
+                assert(false);
+            }
+        }
+
+        view = endView;
+        value = startView;
+        value.SetLength(startView.GetLength() - endView.GetLength());
+        return true;
+    }
+
+    bool ParseMIDINoteNumber(TextBuffer::View& view, std::int32_t& n)
+    {
+        if (Integer(view, n))
+        {
+            return true;
+        }
+
+        auto pred = [&n] (char x)
+        {
+            switch (x)
+            {
+            case 'C':
+            case 'c':
+                n = 0;
+                return true;
+
+            case 'D':
+            case 'd':
+                n = 2;
+                return true;
+
+            case 'E':
+            case 'e':
+                n = 4;
+                return true;
+
+            case 'F':
+            case 'f':
+                n = 5;
+                return true;
+
+            case 'G':
+            case 'g':
+                n = 7;
+                return true;
+
+            case 'A':
+            case 'a':
+                n = 9;
+                return true;
+
+            case 'B':
+            case 'b':
+                n = 11;
+                return true;
+
+            default:
+                return false;
+            }
+        };
+
+        if (!CharIf(view, pred))
+        {
+            return false;
+        }
+
+        if (Char(view, '#'))
+        {
+            n++;
+        }
+
+        std::int32_t oct;
+        if (!Integer(view, oct))
+        {
+            throw ParserException(
+                m_Preprocessor.GetFileNameFromID(view.GetLocationInfo().FileID),
+                view.GetLocationInfo().Line,
+                "expected octave number"
+            );
+        }
+
+        n += oct * 12;
+        return true;
+    }
+
+    LoopModeKind GetLoopModeKind(TextBuffer::View view)
+    {
+        auto curView = view;
+        if (TextBuffer::View word; AnyWord(curView, word))
+        {
+            static const std::unordered_map<std::string_view, LoopModeKind> LoopModeKindMap{
+                {"no_loop"sv, LoopModeKind::NoLoop},
+                {"one_shot"sv, LoopModeKind::OneShot},
+                {"loop_continuous"sv, LoopModeKind::LoopContinuous},
+                {"loop_sustain"sv, LoopModeKind::LoopSustain}
+            };
+
+            auto it = LoopModeKindMap.find(word.ToStringView());
+
+            if (it != LoopModeKindMap.end())
+            {
+                return it->second;
+            }
+        }
+
+        throw ParserException(
+            m_Preprocessor.GetFileNameFromID(view.GetLocationInfo().FileID),
+            view.GetLocationInfo().Line,
+            "unknown loop_mode '"s.append(view.ToStringView()).append("'")
+        );
+    }
+
+    Preprocessor& m_Preprocessor;
+    std::vector<Section> m_Sections;
+};
+
+class InstrumentBuilder
+{
+public:
+    InstrumentBuilder(Parser& parser, std::string_view name) : m_Parser(parser), m_Name(name)
+    {
+    }
+
+    std::unique_ptr<Instrument, InstrumentDeleter> BuildInstrument()
+    {
+        auto flatSections = FlattenSections(m_Parser.GetSections());
+        std::unique_ptr<Instrument, InstrumentDeleter> pInstrument(reinterpret_cast<Instrument*>(safe_calloc(sizeof(Instrument), 1)));
+        pInstrument->type = INST_SFZ;
+        pInstrument->instname = safe_strdup(m_Name.c_str());
+
+        std::vector<std::unique_ptr<Instrument, InstrumentDeleter>> sampleInstruments;
+        sampleInstruments.reserve(flatSections.size());
+
+        for (auto&& i : flatSections)
+        {
+            sampleInstruments.push_back(BuildSample(i));
+        }
+
+        pInstrument->samples = std::accumulate(
+            sampleInstruments.begin(),
+            sampleInstruments.end(),
+            0,
+            [] (auto&& a, auto&& b)
+            {
+                return a + b->samples;
+            }
+        );
+
+        pInstrument->sample = reinterpret_cast<Sample*>(safe_calloc(sizeof(Sample), pInstrument->samples));
+        Sample* pCurrentSample = pInstrument->sample;
+
+        for (auto&& i : sampleInstruments)
+        {
+            pCurrentSample = std::copy_n(i->sample, i->samples, pCurrentSample);
+            std::for_each(i->sample, i->sample + i->samples, [] (auto&& x) { x.data_alloced = false; });
+        }
+
+        return pInstrument;
+    }
+
+private:
+    std::unique_ptr<Instrument, InstrumentDeleter> BuildSample(const Section& flatSection)
+    {
+        if (auto sampleName = flatSection.GetAs<std::string>(OpCodeKind::Sample))
+        {
+            auto pSampleInstrument = BuildSingleSampleInstrument(*sampleName);
+
+            for (auto&& i : flatSection.OpCodes)
+            {
+                for (std::size_t j = 0; j < pSampleInstrument->samples; j++)
+                {
+                    auto pSample = &pSampleInstrument->sample[j];
+
+                    switch (i.OpCode)
+                    {
+                    case OpCodeKind::HiKey:
+                        pSample->high_key = static_cast<int8>(std::get<std::int32_t>(i.Value));
+                        break;
+
+                    case OpCodeKind::HiVelocity:
+                        pSample->high_vel = static_cast<uint8>(std::get<double>(i.Value));
+                        break;
+
+                    case OpCodeKind::LoKey:
+                        pSample->low_key = static_cast<int8>(std::get<std::int32_t>(i.Value));
+                        break;
+
+                    case OpCodeKind::LoopEnd:
+                        pSample->loop_end = static_cast<splen_t>(std::get<double>(i.Value)) << FRACTION_BITS;
+                        break;
+
+                    case OpCodeKind::LoopMode:
+                        pSample->modes &= ~(MODES_LOOPING | MODES_PINGPONG | MODES_REVERSE | MODES_SUSTAIN);
+
+                        switch (std::get<LoopModeKind>(i.Value))
+                        {
+                            case LoopModeKind::NoLoop:
+                                break;
+
+                            case LoopModeKind::OneShot:
+                                // ???
+                                break;
+
+                            case LoopModeKind::LoopContinuous:
+                                pSample->modes |= MODES_LOOPING | MODES_SUSTAIN;
+                                break;
+
+                            case LoopModeKind::LoopSustain:
+                                pSample->modes |= MODES_LOOPING | MODES_SUSTAIN | MODES_RELEASE;
+                                break;
+                        }
+                        break;
+
+                    case OpCodeKind::LoopStart:
+                        pSample->loop_start = static_cast<splen_t>(std::get<double>(i.Value)) << FRACTION_BITS;
+                        break;
+
+                    case OpCodeKind::LoVelocity:
+                        pSample->low_vel = static_cast<uint8>(std::get<double>(i.Value));
+                        break;
+
+                    case OpCodeKind::PitchKeyCenter:
+                        pSample->root_key = static_cast<int8>(std::get<std::int32_t>(i.Value));
+                        pSample->root_freq = ::freq_table[pSample->root_key];
+                        break;
+
+                    case OpCodeKind::Sample:
+                        break;
+                    }
+                }
+            }
+
+            return pSampleInstrument;
+        }
+        else
+        {
+            throw ParserException(
+                m_Parser.GetPreprocessor().GetFileNameFromID(flatSection.HeaderLocation.FileID),
+                flatSection.HeaderLocation.Line,
+                "no sample specified for region"
+            );
+        }
+    }
+
+    std::unique_ptr<Instrument, InstrumentDeleter> BuildSingleSampleInstrument(std::string sampleUrl)
+    {
+        std::unique_ptr<Instrument, InstrumentDeleter> pInstrument(::extract_sample_file(sampleUrl.data()));
+
+        if (!pInstrument)
+        {
+            throw std::runtime_error("unable to load sample '"s + sampleUrl + "'");
+        }
+
+        return pInstrument;
+    }
+
+    std::vector<Section> FlattenSections(const std::vector<Section>& sections)
+    {
+        std::vector<Section> flatSections;
+        std::vector<OpCodeAndValue> controlOpCodes;
+        std::vector<OpCodeAndValue> globalOpCodes;
+        std::vector<OpCodeAndValue> groupOpCodes;
+
+        for (auto&& i : sections)
+        {
+            switch (i.Header)
+            {
+            case HeaderKind::Control:
+                controlOpCodes.insert(controlOpCodes.end(), i.OpCodes.begin(), i.OpCodes.end());
+                break;
+
+            case HeaderKind::Global:
+                globalOpCodes.insert(globalOpCodes.end(), i.OpCodes.begin(), i.OpCodes.end());
+                break;
+
+            case HeaderKind::Group:
+                groupOpCodes = i.OpCodes;
+                break;
+
+            case HeaderKind::Region:
+                auto& newSection = flatSections.emplace_back();
+                newSection.Header = i.Header;
+                newSection.HeaderLocation = i.HeaderLocation;
+                auto& opCodes = newSection.OpCodes;
+                opCodes.clear();
+                opCodes.reserve(controlOpCodes.size() + globalOpCodes.size() + groupOpCodes.size() + i.OpCodes.size());
+                opCodes.insert(opCodes.end(), controlOpCodes.begin(), controlOpCodes.end());
+                opCodes.insert(opCodes.end(), globalOpCodes.begin(), globalOpCodes.end());
+                opCodes.insert(opCodes.end(), groupOpCodes.begin(), groupOpCodes.end());
+                opCodes.insert(opCodes.end(), i.OpCodes.begin(), i.OpCodes.end());
+                break;
+            }
+        }
+
+        return flatSections;
+    }
+
+    Parser& m_Parser;
+    std::string m_Name;
+};
+
+struct InstrumentCacheEntry
+{
+    InstrumentCacheEntry(std::string_view filePath, std::unique_ptr<Instrument, InstrumentDeleter> pInstrument)
+        : FilePath(filePath), pInstrument(std::move(pInstrument))
+    {
+    }
+
+    std::string FilePath;
+    std::unique_ptr<Instrument, InstrumentDeleter> pInstrument;
+    std::vector<Instrument*> RefInstruments;
+};
+
+class InstrumentCache
+{
+public:
+    Instrument* LoadSFZ(std::string filePath)
+    {
+        auto it = std::find_if(
+            m_Instruments.begin(),
+            m_Instruments.end(),
+            [&filePath] (auto&& x)
+            {
+                return x.FilePath == filePath;
+            }
+        );
+
+        if (it == m_Instruments.end())
+        {
+            try
+            {
+                TimSFZ::Preprocessor pp(filePath);
+                pp.Preprocess();
+                TimSFZ::Parser parser(pp);
+                parser.Parse();
+                TimSFZ::InstrumentBuilder builder(parser, filePath);
+                m_Instruments.emplace_back(filePath, builder.BuildInstrument());
+            }
+            catch (const std::exception& e)
+            {
+                char str[] = "%s";
+                ctl->cmsg(CMSG_ERROR, VERB_NORMAL, str, e.what());
+                return nullptr;
+            }
+
+            it = std::prev(m_Instruments.end());
+        }
+
+        std::unique_ptr<Instrument, InstrumentDeleter> pInstRef(reinterpret_cast<Instrument*>(safe_calloc(sizeof(Instrument), 1)));
+        it->RefInstruments.push_back(pInstRef.get());
+        pInstRef->type = it->pInstrument->type;
+        pInstRef->instname = safe_strdup(it->pInstrument->instname);
+        pInstRef->samples = it->pInstrument->samples;
+        pInstRef->sample = reinterpret_cast<Sample*>(safe_calloc(sizeof(Sample), it->pInstrument->samples));
+        std::copy_n(it->pInstrument->sample, it->pInstrument->samples, pInstRef->sample);
+        std::for_each(pInstRef->sample, pInstRef->sample + pInstRef->samples, [] (auto&& x) { x.data_alloced = false; });
+
+        return pInstRef.release();
+    }
+
+    void FreeInstrument(Instrument* pInstrument)
+    {
+        safe_free(pInstrument->instname);
+        pInstrument->instname = nullptr;
+
+        auto it = std::find_if(
+            m_Instruments.begin(),
+            m_Instruments.end(),
+            [pInstrument] (auto&& x)
+            {
+                auto it = std::find(x.RefInstruments.begin(), x.RefInstruments.end(), pInstrument);
+                return it != x.RefInstruments.end();
+            }
+        );
+
+        if (it != m_Instruments.end())
+        {
+            it->RefInstruments.erase(std::find(it->RefInstruments.begin(), it->RefInstruments.end(), pInstrument));
+
+            if (it->RefInstruments.empty())
+            {
+                m_Instruments.erase(it);
+            }
+        }
+    }
+
+    void FreeAll()
+    {
+        m_Instruments.clear();
+    }
+
+private:
+    std::vector<InstrumentCacheEntry> m_Instruments;
+};
+
+InstrumentCache GlobalInstrumentCache;
+
+} // namespace TimSFZ
+
+extern "C"
+{
+
+// THis is no-op for now, but may be used in the future.
+void init_sfz(void)
+{
+}
+
+void free_sfz(void)
+{
+    TimSFZ::GlobalInstrumentCache.FreeAll();
+}
+
+Instrument *extract_sfz_file(char *sample_file)
+{
+    return TimSFZ::GlobalInstrumentCache.LoadSFZ(sample_file);
+}
+
+void free_sfz_file(Instrument *ip)
+{
+    TimSFZ::GlobalInstrumentCache.FreeInstrument(ip);
+}
+
+} // extern "C"
diff --git a/timidity/sfz.h b/timidity/sfz.h
new file mode 100644 (file)
index 0000000..f36ed4c
--- /dev/null
@@ -0,0 +1,15 @@
+// SFZ Support Routines for TiMidity++
+// Copyright (c) 2018 Starg <https://osdn.net/projects/timidity41>
+
+#pragma once
+
+#ifdef ENABLE_SFZ
+
+#include "instrum.h"
+
+void init_sfz(void);
+void free_sfz(void);
+Instrument *extract_sfz_file(char *sample_file);
+void free_sfz_file(Instrument *ip);
+
+#endif /* ENABLE_SFZ */
index 48b4a55..aba6569 100755 (executable)
@@ -477,7 +477,7 @@ static int import_wave_load(char *sample_file, Instrument *inst)
        {
                uint8  modes;
                int32  sample_rate, root_freq;
-               uint32 loopStart = 0, loopEnd = 0;
+               splen_t loopStart = 0, loopEnd = 0;
 
                sample_rate = samplerc.dwSamplePeriod == 0 ? 0 : 1000000000L / samplerc.dwSamplePeriod;
                root_freq = freq_table[samplerc.dwMIDIUnityNote];
@@ -494,8 +494,8 @@ static int import_wave_load(char *sample_file, Instrument *inst)
                        const uint8 loopModes[] = { MODES_LOOPING, MODES_LOOPING | MODES_PINGPONG, MODES_LOOPING | MODES_REVERSE };
 
                        modes = loopModes[samplerc.loopType];
-                       loopStart = samplerc.loop_dwStart << FRACTION_BITS;
-                       loopEnd = samplerc.loop_dwEnd << FRACTION_BITS;
+                       loopStart = (splen_t)samplerc.loop_dwStart << FRACTION_BITS;
+                       loopEnd = (splen_t)samplerc.loop_dwEnd << FRACTION_BITS;
                }
                else
                        modes = 0;
@@ -1294,7 +1294,7 @@ static void initialize_sample(Instrument *inst, int frames, int sample_bits, int
                sample = &inst->sample[i];
                sample->data_alloced = 0;
                sample->loop_start = 0;
-               sample->loop_end = sample->data_length = frames << FRACTION_BITS;
+               sample->loop_end = sample->data_length = (splen_t)frames << FRACTION_BITS;
                sample->sample_rate = sample_rate;
                sample->low_key = 0;
                sample->high_key = 127;
index 7c55621..6d12488 100644 (file)
@@ -825,6 +825,43 @@ static Instrument *load_from_file(SFInsts *rec, InstList *ip)
                tf = sp->sfrom ? sfrom_sfrec->tf : rec->tf; ///r
 
 #if defined(SF2_24BIT) && (defined(DATA_T_DOUBLE) || defined(DATA_T_FLOAT))
+#if 1 /* SF2_24BIT_SAMPLE_TYPE_FLOAT */
+               if(sp->lowbit > 0 ){
+                   /* 24 bit */
+                   splen_t cnt;
+                   uint8 *lowbit;
+                       uint16 *highbit;
+                       float *tmp_data;
+
+                       frames = divi_2(sp->len);
+                   sample->data = (sample_t*)safe_large_malloc(sizeof(float) * (frames + 128));
+                   sample->data_alloced = 1;
+                       sample->data_type = SAMPLE_TYPE_FLOAT;
+                   highbit = (uint16 *)safe_large_malloc(sizeof(int16) * frames); // 16bit
+                   lowbit = (uint8 *)safe_large_malloc(sizeof(int8) * frames); // 8bit                 
+                       tf_seek(tf, sp->start, SEEK_SET);
+                       tf_read(highbit, sp->len, 1, tf);
+                   tf_seek(tf, sp->lowbit, SEEK_SET);
+                   tf_read(lowbit, frames, 1, tf);
+                       tmp_data = (float *)sample->data;
+                   for(j = 0; j < frames; j++) {
+                               // 24bit to int32full
+                           int32 tmp_i = 0; // 1byte 00\82Å\82¢\82¢\82ç\82µ\82¢\81H
+                               tmp_i |= (uint32)lowbit[j] << 8; // 2byte
+                           tmp_i |= (uint32)highbit[j] << 16; // 3-4byte
+#ifndef LITTLE_ENDIAN
+                               XCHG_LONG(tmp_i)
+#endif
+                               tmp_data[j] = (float)tmp_i * DIV_31BIT;
+                   }
+                   safe_free(highbit);
+                   safe_free(lowbit);
+                       /* set a small blank loop at the tail for avoiding abnormal loop. */    
+                       memset(&tmp_data[frames], 0, sizeof(float) * 128);
+                       if (antialiasing_allowed)
+                         antialiasing_float((float *)sample->data, sample->data_length >> FRACTION_BITS, sample->sample_rate, play_mode->rate);
+               }else
+#else /* SF2_24BIT_SAMPLE_TYPE_INT32 */
                if(sp->lowbit > 0 ){
                    /* 24 bit */
                    splen_t cnt;
@@ -836,15 +873,12 @@ static Instrument *load_from_file(SFInsts *rec, InstList *ip)
                    sample->data = (sample_t*)safe_large_malloc(sizeof(int32) * (frames + 128));
                    sample->data_alloced = 1;
                        sample->data_type = SAMPLE_TYPE_INT32;
-
                    highbit = (uint16 *)safe_large_malloc(sizeof(int16) * frames); // 16bit
-                   lowbit = (uint8 *)safe_large_malloc(sizeof(int8) * frames); // 8bit
-                       
+                   lowbit = (uint8 *)safe_large_malloc(sizeof(int8) * frames); // 8bit                 
                        tf_seek(tf, sp->start, SEEK_SET);
                        tf_read(highbit, sp->len, 1, tf);
                    tf_seek(tf, sp->lowbit, SEEK_SET);
                    tf_read(lowbit, frames, 1, tf);
-
                        tmp_data = (uint32 *)sample->data;
                    for(j = 0; j < frames; j++) {
                                // 24bit to int32full
@@ -858,16 +892,14 @@ static Instrument *load_from_file(SFInsts *rec, InstList *ip)
                    }
                    safe_free(highbit);
                    safe_free(lowbit);
-
                        /* set a small blank loop at the tail for avoiding abnormal loop. */
                //      tmp_data[frames] = tmp_data[frames + 1] = tmp_data[frames + 2] = 0;                     
                        memset(&tmp_data[frames], 0, sizeof(int32) * 128);
-
                        if (antialiasing_allowed)
                          antialiasing_int32((int32 *)sample->data, sample->data_length >> FRACTION_BITS, sample->sample_rate, play_mode->rate);
-
                }else
-#endif
+#endif /* SF2_24BIT_SAMPLE_TYPE_FLOAT */
+#endif /* defined(SF2_24BIT) && (defined(DATA_T_DOUBLE) || defined(DATA_T_FLOAT)) */
                {
                    /* 16 bit */
                        frames = divi_2(sp->len);
@@ -1542,6 +1574,8 @@ static void set_sample_info(SFInfo *sf, SampleList *vp, LayerTable *tbl)
                vp->v.loop_end = vp->len + 1;
        if (vp->v.loop_start > vp->len)
                vp->v.loop_start = vp->len;
+       if (vp->v.loop_start < 0)
+               vp->v.loop_start = 0;
        if (vp->v.loop_start >= vp->v.loop_end)
        {
                vp->v.loop_start = vp->len;
@@ -2310,6 +2344,8 @@ PlayMode dpm = {
                NULL
 };
 PlayMode *play_mode = &dpm;
+int free_instruments_afterwards = 1;
+int compute_buffer_size;
 #ifndef CFG_FOR_SF_SUPPORT_FFT
 int32 freq_table[1];
 FLOAT_T bend_fine[1];
index bc6779c..64f3b81 100644 (file)
@@ -14,6 +14,9 @@ Sample OverrideSample = {0};
 OVERRIDETIMIDITYDATA otd = {0};
 
 #if defined(__W32__)
+
+#include <windows.h>
+
 #if defined(WINDRV) || defined(WINDRV_SETUP)
 
 void timdrvOverrideSFSettingLoad(void)
index 6e27004..c25de62 100755 (executable)
 #include <sys/types.h>
 #endif
 
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
 #include <stdio.h>
 
 /* Architectures */
@@ -82,7 +86,7 @@
 #if defined(IX86CPU) && (defined(_MSC_VER) || defined(__POCC__) || \
        defined(__BORLANDC__) || defined(__WATCOMC__))
 #define CALLINGCONV __fastcall
-#elif defined(IX86CPU) && defined(__GNUC__)
+#elif defined(IX86CPU) && !defined(AMD64CPU) && defined(__GNUC__)
 #define CALLINGCONV __attribute__((fastcall))
 #else
 #define CALLINGCONV /**/
 #define restrict /* not C99 */
 #endif /* !restrict */
 
+#ifndef TIMIDITY_FORCEINLINE
+#ifdef __GNUC__
+#define TIMIDITY_FORCEINLINE __attribute__((__always_inline__))
+#elif defined(_MSC_VER)
+#define TIMIDITY_FORCEINLINE __forceinline
+#else
+#define TIMIDITY_FORCEINLINE inline
+#endif
+#endif /* TIMIDITY_FORCEINLINE */
 
 /* The size of the internal buffer is 2^AUDIO_BUFFER_BITS samples.
    This determines maximum number of samples ever computed in a row.
@@ -944,12 +957,13 @@ int usleep(unsigned int useconds); /* shut gcc warning up */
 #ifdef __MINGW32__
 #define aligned_malloc __mingw_aligned_malloc
 #define aligned_free   __mingw_aligned_free
-#elif __STDC_VERSION__ >= 201112L
-#define aligned_malloc(s,a) aligned_alloc(a,s)
-#define aligned_free   free
-//#elif _POSIX_VERSION >= 200112L
-//#define aligned_malloc(s,a) posix_memalign(,a,s)
+/* aligned_malloc is unsafe because s must be a multiple of a */
+//#elif __STDC_VERSION__ >= 201112L
+//#define aligned_malloc(s,a) aligned_malloc(a,s)
 //#define aligned_free   free
+#elif defined(__GNUC__) && _POSIX_VERSION >= 200112L
+#define aligned_malloc(s,a) ({void *ptr; if(!s || posix_memalign(&ptr,a,s)) ptr = NULL; ptr;})
+#define aligned_free   free
 #elif _MSC_VER
 #define aligned_malloc _aligned_malloc
 #define aligned_free   _aligned_free
index 65e89e6..38f31fa 100644 (file)
@@ -1574,9 +1574,9 @@ void mix_ch_chorus_sd_thread(DATA_T *buf, int32 count, int32 byte)
        switch(*chorus_status_sd.output_select){
        case 0: // main
                for(i = 0; i < count; i += 8){
-                       MM256_LS_ADD_PD(&buf[i], _mm256_load_pd(&chorus_effect_buffer_sub[cdmt_buf_o][i]), cho_level);
+                       MM256_LS_FMA_PD(&buf[i], _mm256_load_pd(&chorus_effect_buffer_sub[cdmt_buf_o][i]), cho_level);
                        MM256_LS_FMA_PD(&reverb_effect_buffer_thread[cdmt_ofs_1][i], _mm256_load_pd(&chorus_effect_buffer_sub[cdmt_buf_o][i]), rev_level);
-                       MM256_LS_ADD_PD(&buf[i + 4], _mm256_load_pd(&chorus_effect_buffer_sub[cdmt_buf_o][i + 4]), cho_level);
+                       MM256_LS_FMA_PD(&buf[i + 4], _mm256_load_pd(&chorus_effect_buffer_sub[cdmt_buf_o][i + 4]), cho_level);
                        MM256_LS_FMA_PD(&reverb_effect_buffer_thread[cdmt_ofs_1][i + 4], _mm256_load_pd(&chorus_effect_buffer_sub[cdmt_buf_o][i + 4]), rev_level);
                }
                break;
@@ -1608,7 +1608,7 @@ void mix_ch_chorus_sd_thread(DATA_T *buf, int32 count, int32 byte)
        switch(*chorus_status_sd.output_select){
        case 0: // main
                for(i = 0; i < count; i += 8){
-                       MM256_LS_ADD_PS(&buf[i], _mm256_load_ps(&chorus_effect_buffer_sub[cdmt_buf_o][i]), cho_level);
+                       MM256_LS_FMA_PS(&buf[i], _mm256_load_ps(&chorus_effect_buffer_sub[cdmt_buf_o][i]), cho_level);
                        MM256_LS_FMA_PS(&reverb_effect_buffer_thread[cdmt_ofs_1][i], _mm256_load_ps(&chorus_effect_buffer_sub[cdmt_buf_o][i]), rev_level);
                }       
                break;
@@ -1844,7 +1844,7 @@ void do_master_effect_thread(void)
        if(noise_sharp_type)
                ns_shaping(master_effect_buffer_thread[cdmt_buf_o], me_cv[cdmt_buf_o].nsamples);        
        if (opt_limiter)
-               do_limiter(master_effect_buffer_thread[cdmt_buf_o], me_cv[cdmt_buf_o].nsamples);
+               do_limiter(master_effect_buffer_thread[cdmt_buf_o], me_cv[cdmt_buf_o].count);
 
 #ifdef VST_LOADER_ENABLE
 #ifndef MASTER_VST_EFFECT2
index 86c3813..0a45a40 100644 (file)
@@ -281,16 +281,8 @@ static inline void mix_mystery_signal_thread(DATA_T *sp, DATA_T *lp, int v, int
                                vsp = _mm_loadu_ps(sp++);
                                vsp = _mm_shuffle_ps(vsp, vsp, 0x50); // [0,1,2,3] to {0,0,1,1]
                                vsp = _mm_mul_ps(vsp, vevol);
-#if !(defined(_MSC_VER) || defined(MSC_VER))
-                               {
-                               float *out = (float *)vsp;
-                               *(lp++) = out[0];
-                               *(lp++) = out[1];
-                               }
-#else
-                               *(lp++) = vsp.m128_f32[0];
-                               *(lp++) = vsp.m128_f32[1];
-#endif //  !(defined(_MSC_VER) || defined(MSC_VER))
+                               *(lp++) = MM_EXTRACT_F32(vsp,0);
+                               *(lp++) = MM_EXTRACT_F32(vsp,1);
                        }
 
 #else // ! USE_X86_EXT_INTRIN
@@ -410,6 +402,9 @@ void mix_voice_thread(DATA_T *buf, int v, int32 c, int thread)
        case INST_SF2:
        case INST_MOD:
        case INST_PCM:
+#ifdef ENABLE_SFZ
+       case INST_SFZ:
+#endif
                if(opt_resample_over_sampling){
                        int32 c2 = c * opt_resample_over_sampling;
                        resample_voice(v, sp, c2);
index 4c6e5ee..da829c9 100644 (file)
@@ -83,6 +83,7 @@
 #include "tables.h"
 #include "miditrace.h"
 #include "effect.h"
+#include "freq.h"
 #ifdef SUPPORT_SOUNDSPEC
 #include "soundspec.h"
 #endif /* SUPPORT_SOUNDSPEC */
 #include "sndfontini.h"
 #include "thread.h"
 #include "miditrace.h"
+#include "flac_a.h"
+#include "sfz.h"
 ///r
 #ifdef __BORLANDC__
 #define inline
 
 #ifdef IA_W32GUI
 #include "w32g.h"
+#include "w32g_subwin.h"
 #include "w32g_utl.h"
 #endif
 
 #include "portaudio_a.h"
 #endif
 
+#ifdef __W32G__
+#include "w32g_utl.h"
+#endif
+
 
 uint8 opt_normal_chorus_plus = 5; // chorusEX
 
@@ -1311,7 +1319,7 @@ static int config_parse_mfx_patch(char *w[], int words, int mapid, int bank, int
 }
 
 ///r
-static int set_gus_patchconf_opts(char *name,
+static int set_gus_patchconf_opts(const char *name,
                int line, char *opts, ToneBankElement *tone)
 {
        char *cp;
@@ -1708,7 +1716,7 @@ static int set_gus_patchconf_opts(char *name,
 
 ///r
 #define SET_GUS_PATCHCONF_COMMENT
-static int set_gus_patchconf(char *name, int line,
+static int set_gus_patchconf(const char *name, int line,
                             ToneBankElement *tone, char *pat, char **opts)
 {
     int j;
@@ -1803,6 +1811,21 @@ static int set_gus_patchconf(char *name, int line,
                opts += 2;
     }
 #endif
+#ifdef ENABLE_SFZ
+       else if(strcmp(pat, "%sfz") == 0) /* sfz extension */
+       {
+               /* %sfz filename */
+               if (opts[0] == NULL)
+               {
+                       ctl->cmsg(CMSG_ERROR, VERB_NORMAL,
+                               "%s: line %d: Syntax error", name, line);
+                       return 1;
+               }
+               tone->name = safe_strdup(opts[0]);
+               tone->instype = 5; // sfz
+               opts++;
+       }
+#endif
     else if(strcmp(pat, "%pat") == 0) /* pat extention */
        {
                tone->instype = 0; // pat
@@ -1850,7 +1873,7 @@ CFG
     return 0;
 }
 ///r
-static int set_patchconf(char *name, int line, ToneBank *bank, char *w[], int dr, int mapid, int bankmapfrom, int bankno, int add)
+static int set_patchconf(const char *name, int line, ToneBank *bank, char *w[], int dr, int mapid, int bankmapfrom, int bankno, int add)
 {
     int i;
        int elm;
@@ -6104,7 +6127,7 @@ static int parse_opt_h(const char *arg)
        };
        void show_ao_device_info(FILE *fp);
        FILE *fp;
-       char version[32], *help_args[7], per_mark[2];
+       char version[64], *help_args[7], per_mark[2];
        int i, j;
        char *h;
        ControlMode *cmp, **cmpp;
@@ -6115,6 +6138,8 @@ static int parse_opt_h(const char *arg)
        fp = open_pager();
        strcpy(version, (!strstr(timidity_version, "current")) ? "version " : "");
        strcat(version, timidity_version);
+       strcat(version, " ");
+       strcat(version, arch_string);
        per_mark[0] = '%';
        per_mark[1] = '\0';
        help_args[0] = version;
@@ -7115,9 +7140,6 @@ static inline int parse_opt_wave_update_step(const char *arg)
 
 
 #ifdef AU_FLAC
-extern void flac_set_option_verify(int);
-extern void flac_set_option_padding(int);
-extern void flac_set_compression_level(int);
 
 static inline int parse_opt_flac_verify(const char *arg)
 {
@@ -7648,7 +7670,7 @@ static inline int parse_opt_v(const char *arg)
 #else
                "TiMidity++ ",
                                (strcmp(timidity_version, "current")) ? "version " : "",
-                               timidity_version, NLS,
+                               timidity_version, " ", arch_string, NLS,
                NLS,
 #endif
                "Copyright (C) 1999-2004 Masanao Izumo <iz@onicos.co.jp>", NLS,
@@ -7951,7 +7973,7 @@ static inline void close_pager(FILE *fp)
 static void interesting_message(void)
 {
        printf(
-"TiMidity++ %s%s -- MIDI to WAVE converter and player" NLS
+"TiMidity++ %s%s %s -- MIDI to WAVE converter and player" NLS
 "Copyright (C) 1999-2004 Masanao Izumo <iz@onicos.co.jp>" NLS
 "Copyright (C) 1995 Tuukka Toivonen <tt@cgs.fi>" NLS
                        NLS
@@ -7974,7 +7996,9 @@ static void interesting_message(void)
 "along with this program; if not, write to the Free Software" NLS
 "Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA" NLS
                        NLS, (strcmp(timidity_version, "current")) ? "version " : "",
-                       timidity_version);
+                       timidity_version,
+                       arch_string
+               );
 }
 
 /* -------- functions for getopt_long ends here --------- */
@@ -8385,6 +8409,9 @@ MAIN_INTERFACE void timidity_init_player(void)
 #ifdef INT_SYNTH
        init_int_synth();
 #endif // INT_SYNTH
+#ifdef ENABLE_SFZ
+       init_sfz();
+#endif
 
 #ifdef SUPPORT_SOUNDSPEC
     if(view_soundspec_flag)
@@ -8756,6 +8783,23 @@ int main(int argc, char **argv)
        _CrtSetDbgFlag(CRTDEBUGFLAGS);
 #endif
        atexit(w32_exit);
+
+#ifdef ENABLE_VIRTUAL_TERMINAL_PROCESSING
+       {
+               HANDLE hStdOut = GetStdHandle(STD_OUTPUT_HANDLE);
+
+               if (hStdOut != INVALID_HANDLE_VALUE)
+               {
+                       DWORD mode;
+
+                       if (GetConsoleMode(hStdOut, &mode))
+                       {
+                               SetConsoleMode(hStdOut, mode | ENABLE_VIRTUAL_TERMINAL_PROCESSING);
+                       }
+               }
+       }
+#endif
+
 #endif /* __W32__ */
 #if !defined(KBTIM) && !defined(WINDRV)
        OverrideSFSettingLoad();
@@ -8925,7 +8969,7 @@ int main(int argc, char **argv)
                files = expand_file_archives(files, &nfiles);
        if (nfiles > 0)
                files_nbuf = files[0];
-#if !defined(IA_W32GUI) && !defined(IA_W32G_SYN)
+#if !defined(IA_W32GUI) && !defined(IA_W32G_SYN) && !defined(IA_WINSYN)
        if (dumb_error_count)
                sleep(1);
 #endif
@@ -9017,6 +9061,9 @@ int main(int argc, char **argv)
        //free_reverb_buffer();
        free_effect_buffers();
 ///r
+#ifdef ENABLE_SFZ
+       free_sfz();
+#endif
 #ifdef INT_SYNTH
        free_int_synth();
 #endif // INT_SYNTH
@@ -9096,6 +9143,9 @@ static void w32_exit(void)
        //free_reverb_buffer();
        free_effect_buffers();
 ///r
+#ifdef ENABLE_SFZ
+       free_sfz();
+#endif
 #ifdef INT_SYNTH
        free_int_synth();
 #endif // INT_SYNTH
index fd6d10a..54fd83b 100644 (file)
@@ -2346,6 +2346,7 @@ static inline void do_vfx_tremolo(int v, VoiceEffect *vfx, DATA_T *sp, int32 cou
        for(i = 0; i < count2; i += 8){
                MM256_LSU_MUL_PS(&sp[i], vamp);
        }
+       }
 #elif (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE)
        {
        const int32 req_count_mask = ~(0x7);
index 4d18bae..386e73b 100644 (file)
@@ -89,85 +89,6 @@ PlayMode dpm =
 
 /*****************************************************************************************************************************/
 
-#if defined(__CYGWIN32__) || defined(__MINGW32__)
-#ifdef HAVE_NEW_MMSYSTEM
-#include <mmsystem.h>
-#else
-/* On cygnus, there is not mmsystem.h for Multimedia API's.
- * mmsystem.h can not distribute becase of Microsoft Lisence
- * Then declare some of them here. **/
-#define WOM_OPEN                0x3BB
-#define WOM_CLOSE               0x3BC
-#define WOM_DONE                0x3BD
-#define WAVE_FORMAT_QUERY       0x0001
-#define WAVE_ALLOWSYNC          0x0002
-#define WAVE_FORMAT_PCM         1
-#define CALLBACK_FUNCTION       0x00030000l
-#define WAVERR_BASE             32
-#define WAVE_MAPPER             (UINT)-1
-
-DECLARE_HANDLE(HWAVEOUT);
-DECLARE_HANDLE(HWAVE);
-typedef HWAVEOUT *LPHWAVEOUT;
-
-/* Define WAVEHDR, WAVEFORMAT structure */
-
-typedef struct wavehdr_tag
-{
-    LPSTR       lpData;
-    DWORD       dwBufferLength;
-    DWORD       dwBytesRecorded;
-    DWORD       dwUser;
-    DWORD       dwFlags;
-    DWORD       dwLoops;
-    struct wavehdr_tag *lpNext;
-    DWORD       reserved;
-} WAVEHDR;
-
-typedef struct
-{
-    WORD    wFormatTag;
-    WORD    nChannels;
-    DWORD   nSamplesPerSec;
-    DWORD   nAvgBytesPerSec;
-    WORD    nBlockAlign;
-    WORD    wBitsPerSample;
-    WORD    cbSize;
-} WAVEFORMAT, WAVEFORMATEX, *LPWAVEFORMATEX;
-
-
-typedef struct waveoutcaps_tag
-{
-    WORD    wMid;
-    WORD    wPid;
-    UINT    vDriverVersion;
-#define MAXPNAMELEN      32
-    char    szPname[MAXPNAMELEN];
-    DWORD   dwFormats;
-    WORD    wChannels;
-    DWORD   dwSupport;
-} WAVEOUTCAPS;
-
-typedef WAVEHDR *       LPWAVEHDR;
-typedef WAVEFORMAT *    LPWAVEFORMAT;
-typedef WAVEOUTCAPS *   LPWAVEOUTCAPS;
-typedef UINT            MMRESULT;
-
-MMRESULT WINAPI waveOutOpen(LPHWAVEOUT, UINT, LPWAVEFORMAT, DWORD, DWORD, DWORD);
-MMRESULT WINAPI waveOutClose(HWAVEOUT);
-MMRESULT WINAPI waveOutPrepareHeader(HWAVEOUT, LPWAVEHDR, UINT);
-MMRESULT WINAPI waveOutUnprepareHeader(HWAVEOUT, LPWAVEHDR, UINT);
-MMRESULT WINAPI waveOutWrite(HWAVEOUT, LPWAVEHDR, UINT);
-UINT     WINAPI waveOutGetNumDevs(void);
-MMRESULT WINAPI waveOutReset(HWAVEOUT);
-MMRESULT WINAPI waveOutGetDevCaps(UINT, LPWAVEOUTCAPS, UINT);
-MMRESULT WINAPI waveOutGetDevCapsA(UINT, LPWAVEOUTCAPS, UINT);
-#define waveOutGetDevCaps waveOutGetDevCapsA
-MMRESULT WINAPI waveOutGetID(HWAVEOUT, UINT*);
-
-#endif
-#endif /* __CYGWIN32__ */ 
-
 ///r
 typedef struct {
     WAVEFORMATEX    Format;
index 5bc7817..419c12e 100644 (file)
@@ -365,7 +365,9 @@ extern void g_free_libFLAC_dll ( void );
 
        
 #else  /* defined(LEGACY_FLAC) */
-       
+
+extern int g_load_libFLAC_dll(void);
+extern void g_free_libFLAC_dll(void);  
        
         extern const char * const *  *g_FLAC__StreamEncoderInitStatusString;
         extern const char * const *  *g_FLAC__StreamEncoderStateString;
index dbbb521..ae58e35 100644 (file)
@@ -26,6 +26,7 @@
 #include "config.h"\r
 #endif /* HAVE_CONFIG_H */\r
 #include "interface.h"\r
+#include "common.h"\r
 \r
 #if defined(AU_VORBIS_DLL) || defined(AU_OPUS_DLL)\r
 \r
@@ -139,7 +140,7 @@ void free_ogg_dll(void)
 int load_ogg_dll(void)\r
 {\r
        if(!h_ogg_dll){\r
-               h_ogg_dll = LoadLibrary("ogg.dll");\r
+               h_ogg_dll = LoadLibrary("libogg.dll");\r
                if(!h_ogg_dll) return -1;\r
        }\r
 //     ogg_dll.oggpack_writeinit = (type_oggpack_writeinit)GetProcAddress(h_ogg_dll,"oggpack_writeinit");\r
@@ -527,7 +528,7 @@ int     ogg_page_bos(ogg_page *og)
 }\r
 #endif\r
 \r
-int     ogg_page_eos(ogg_page *og)\r
+int     ogg_page_eos(const ogg_page *og)\r
 {\r
        if(h_ogg_dll){\r
                return ogg_dll.ogg_page_eos(og);\r
index 78d622c..d5f7282 100644 (file)
@@ -128,7 +128,7 @@ PlayMode dpm = {
 #include <objbase.h>
 #if defined(_MSC_VER) && (_MSC_VER >= 1400)
 //#include <Avrt.h>
-#include <Audioclient.h>
+#include <audioclient.h>
 #include <audiopolicy.h>
 //#define INITGUID
 #include <mmdeviceapi.h>
index 32ab164..1b935da 100644 (file)
@@ -30,9 +30,6 @@
 
 #ifdef AU_WDMKS
 
-#ifdef __W32__
-#include "interface.h"
-#endif
 #include <stdio.h>
 #include <stdlib.h>
 #ifndef NO_STRING_H
index 12dc54c..13ec861 100644 (file)
@@ -62,7 +62,7 @@ extern int opterr;
 extern int optopt;
 
 #ifndef UTILS_GETOPT_PRIVATE
-extern int getopt(int __argc, char * const *__argv, const char *__shortopts);
+extern int getopt(int argc, char * const *argv, const char *__shortopts);
 #endif /* !UTILS_GETOPT_PRIVATE */
 
 #endif /* <unistd.h> */
@@ -150,10 +150,10 @@ struct option
    arguments to the option '\0'.  This behavior is specific to the GNU
    `getopt'.  */
 
-extern int getopt_long(int __argc, char * const *__argv,
+extern int getopt_long(int argc, char * const *argv,
                        const char *__shortopts,
                        const struct option *__longopts, int *__longind);
-extern int getopt_long_only(int __argc, char * const *__argv,
+extern int getopt_long_only(int argc, char * const *argv,
                             const char *__shortopts,
                             const struct option *__longopts, int *__longind);
 #endif /* !UTILS_GETOPT_PRIVATE */