wprintw(dftwin, "(%s)", prog->comment);
}
#endif
+#ifdef ENABLE_SFZ
+ else if(type == INST_SFZ)
+ {
+ if (prog->name)
+ {
+ waddch(dftwin, ' ');
+ waddstr(dftwin, prog->name);
+ }
+ if (prog->comment != NULL)
+ wprintw(dftwin, "(%s)", prog->comment);
+ }
+#endif
}
}
}
#include "wsgtk_main.h"
#endif
-#ifndef __W32__
#include <stdio.h>
+#ifndef __W32__
#include <termios.h>
//#include <term.h>
#include <unistd.h>
#endif
-#ifdef __GNUC__
-#include <termios.h>
-#endif
-
-#if defined(__W32__) && !defined(__GNUC__)
+#if defined(__W32__)
+#include <conio.h>
+#define kbhit _kbhit
#define HAVE_DOS_KEYBOARD 1
#endif
static int ctl_pass_playing_list(int n, char *args[])
#else
// 0: OK, 2: Require to reset.
-int ctl_pass_playing_list2(int n, char *args[])
+static int ctl_pass_playing_list2(int n, char *args[])
#endif
{
TIMECAPS tcaps;
#endif /* !IA_W32G_SYN */
-
-#ifdef IA_W32G_SYN
-static int winplaymidi_sleep_level = 2;
-static DWORD winplaymidi_active_start_time = 0;
-
-
-void winplaymidi(void) {
-
- if (winplaymidi_sleep_level < 1) {
- winplaymidi_sleep_level = 1;
- }
- if (0 != rtsyn_buf_check()) {
- winplaymidi_sleep_level =0;
- }
- rtsyn_np_play_some_data();
- if (winplaymidi_sleep_level == 1) {
- DWORD ct = GetCurrentTime();
- if (winplaymidi_active_start_time == 0 || ct < winplaymidi_active_start_time) {
- winplaymidi_active_start_time = ct;
- } else if (ct - winplaymidi_active_start_time > 60000) {
- winplaymidi_sleep_level = 2;
- }
- } else if (winplaymidi_sleep_level == 0) {
- winplaymidi_active_start_time = 0;
- }
-
- rtsyn_play_calculate();
-
- if (winplaymidi_sleep_level >= 2) {
- Sleep(100);
- } else if (winplaymidi_sleep_level > 0) {
- Sleep(1);
- }
-}
-#endif /* IA_W32G_SYN */
-
-
/*
* interface_<id>_loader();
*/
#include "wsgtk_main.h"
#endif
-#ifndef __W32__
#include <stdio.h>
+#ifndef __W32__
#include <termios.h>
//#include <term.h>
#include <unistd.h>
#endif
+#if defined(__W32__)
+#include <conio.h>
+#define kbhit _kbhit
+#define HAVE_DOS_KEYBOARD 1
+#endif
#ifndef __W32__
static struct termios initial_settings, new_settings;
static int ctl_pass_playing_list(int n, char *args[])
#else
// 0: OK, 2: Require to reset.
-int ctl_pass_playing_list2(int n, char *args[])
+static int ctl_pass_playing_list2(int n, char *args[])
#endif
{
int i, j,devnum,devok;
#endif /* !IA_W32G_SYN */
-
-#ifdef IA_W32G_SYN
-static int winplaymidi_sleep_level = 2;
-static DWORD winplaymidi_active_start_time = 0;
-
-
-void winplaymidi(void){
-
- if ( winplaymidi_sleep_level < 1 ) {
- winplaymidi_sleep_level = 1;
- }
- if( 0 != rtsyn_buf_check() ){
- winplaymidi_sleep_level =0;
- }
- rtsyn_play_some_data();
- if ( winplaymidi_sleep_level == 1 ) {
- DWORD ct = GetCurrentTime ();
- if ( winplaymidi_active_start_time == 0 || ct < winplaymidi_active_start_time ) {
- winplaymidi_active_start_time = ct;
- } else if ( ct - winplaymidi_active_start_time > 60000 ) {
- winplaymidi_sleep_level = 2;
- }
- } else if ( winplaymidi_sleep_level == 0 ) {
- winplaymidi_active_start_time = 0;
- }
-
- rtsyn_play_calculate();
-
- if ( winplaymidi_sleep_level >= 2) {
- Sleep ( 100 );
- } else if ( winplaymidi_sleep_level > 0 ) {
- Sleep ( 1 );
- }
-}
-#endif
-
-
/*
* interface_<id>_loader();
*/
#undef RC_NONE
#endif
#include <windows.h>
+#include <commdlg.h>
+#include <shellapi.h>
#ifdef RC_NONE
#undef RC_NONE
#endif
// (HANDLE)crt_beginthreadex(LPSECURITY_ATTRIBUTES security, DWORD stack_size, LPTHREAD_START_ROUTINE start_address, LPVOID arglist, DWORD initflag, LPDWORD thrdaddr );
#if defined(_MSC_VER) || defined(__WATCOMC__)
#define crt_beginthreadex(security,stack_size,start_address,arglist,initflag,thrdaddr ) \
-(HANDLE)_beginthreadex((void *)security,(unsigned)stack_size,(MSVC_BEGINTHREAD_START_ADDRESS)start_address,(void *)arglist,(unsigned)initflag,(unsigned *)thrdaddr)
+(HANDLE)_beginthreadex((void *)security,(unsigned)stack_size,start_address,(void *)arglist,(unsigned)initflag,(unsigned *)thrdaddr)
#elif defined(_BORLANDC_)
#define crt_beginthreadex(security,stack_size,start_address,arglist,initflag,thrdaddr ) \
(HANDLE)_beginthreadNT((BCC_BEGINTHREAD_START_ADDRESS)start_address,(unsigned)stack_size,(void *)arglist,(void *)security_attrib,(unsigned long)create_flags,(unsigned long *)thread_id)
static ptr_size_t value_thread = 0;
static void w32g_ext_control_sub_thread(int rc, ptr_size_t value);
-static void w32g_ext_control_thread(void)
+static unsigned __stdcall w32g_ext_control_thread(void *param)
{
for(;;){
WaitForSingleObject(hEventTcv, INFINITE); // \83X\83\8c\83b\83h\8aJ\8en\83C\83x\83\93\83g\91Ò\8b@
ResetEvent(hEventTcv); // \83X\83\8c\83b\83h\8aJ\8en\83C\83x\83\93\83g\83\8a\83Z\83b\83g
thread_finish = 1; // \83X\83\8c\83b\83h\8fI\97¹\83t\83\89\83O\83Z\83b\83g
}
- crt_endthread();
+
+ return 0;
}
static void w32g_uninit_ext_control_thread(void)
hEventTcv = CreateEvent(NULL,FALSE,FALSE,NULL); // reset manual
if(hEventTcv == NULL)
return;
- hThread = crt_beginthreadex(NULL, 0, (LPTHREAD_START_ROUTINE)w32g_ext_control_thread, 0, 0, &ThreadID);
+ hThread = crt_beginthreadex(NULL, 0, w32g_ext_control_thread, 0, 0, &ThreadID);
if(hThread == NULL)
return;
}
#include <process.h>
#include <stddef.h>
#include <windows.h>
+#include <mmsystem.h>
#undef RC_NONE
#include <shlobj.h>
// #include <prsht.h>
}
#ifdef W32GUI_DEBUG
-void DebugThread(void *args)
+void WINAPI DebugThread(void *args)
{
MSG msg;
DebugThreadExit = 0;
DWORD dwThreadID;
if(!DebugThreadExit)
return;
- hDebugThread = crt_beginthreadex(NULL,0,DebugThread,0,0,&dwThreadID);
+ hDebugThread = crt_beginthreadex(NULL,0,(LPTHREAD_START_ROUTINE)DebugThread,0,0,&dwThreadID);
}
#endif
{
char VersionText[2024];
sprintf(VersionText,
-"TiMidity++ %s%s%s" NLS NLS
+"TiMidity++ %s%s %s" NLS NLS
"TiMidity-0.2i by Tuukka Toivonen <tt@cgs.fi>." NLS
"TiMidity Win32 version by Davide Moretti <dave@rimini.com>." NLS
"TiMidity Windows 95 port by Nicolas Witczak." NLS
"TiMidity Win32 GUI by Daisuke Aoki <dai@y7.net>." NLS
" Japanese menu, dialog, etc by Saito <timidity@flashmail.com>." NLS
"TiMidity++ by Masanao Izumo <mo@goice.co.jp>." NLS
-,(strcmp(timidity_version, "current")) ? "version " : "", timidity_version,
-#if defined(_M_X64) || defined(__x86_64__)
-" [x64]"
-#elif defined(_M_IX86) || defined(__i386__)
-" [x86]"
-#else
-""
-#endif
+,(strcmp(timidity_version, "current")) ? "version " : "", timidity_version, arch_string
);
MessageBox(hParentWnd, VersionText, "Version", MB_OK);
}
{
char TiMidityText[2024];
sprintf(TiMidityText,
-" TiMidity++ %s%s -- MIDI to WAVE converter and player" NLS
+" TiMidity++ %s%s %s -- MIDI to WAVE converter and player" NLS
" Copyright (C) 1999-2002 Masanao Izumo <mo@goice.co.jp>" NLS
" Copyright (C) 1995 Tuukka Toivonen <tt@cgs.fi>" NLS
NLS
" along with this program; if not, write to the Free Software" NLS
" Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA" NLS
,
-(strcmp(timidity_version, "current")) ? "version " : "", timidity_version
+(strcmp(timidity_version, "current")) ? "version " : "", timidity_version, arch_string
);
MessageBox(hParentWnd, TiMidityText, "TiMidity++", MB_OK);
}
in++;
i++;
}
- Edit_SetSel(hwnd,-1,-1);
- Edit_ReplaceSel(hwnd,out);
+
+ {
+ int len = GetWindowTextLength(hwnd);
+ Edit_SetSel(hwnd, len, len);
+ Edit_ReplaceSel(hwnd, out);
+ }
}
void PrintfEditCtlWnd(HWND hwnd, char *fmt, ...)
i++;
}
if(IsWindow(hwnd)){
- SendMessage(hwnd, WM_SETREDRAW, 0, 0);
- Edit_SetSel(hwnd,-1,-1);
- SendMessage(hwnd, WM_SETREDRAW, 1, 0);
+ int len = GetWindowTextLength(hwnd);
+ Edit_SetSel(hwnd, len, len);
Edit_ReplaceSel(hwnd,out);
}
}
w32g_get_midi_file_info_post(entry);
{
char *title;
- volatile LVITEM lvi0;
- int len;
+ volatile LVITEM lvi0;
lvi0.iItem = i;
lvi0.iSubItem = 0;
lvi0.mask = LVIF_TEXT;
#ifdef LISTVIEW_PLAYLIST
void w32g_copy_playlist(void)
{
- int i, num, pos, selnum = 0, next;
+ int i, num, pos, selnum = 0;
HWND hList;
PlayListEntry *entry;
void w32g_paste_playlist(int uniq, int refine)
{
- int i, num, pos, select = 0, selnum = 0, skip = 0;
+ int i, num, pos, select = 0, skip = 0;
HWND hList;
PlayListEntry *entry;
- char *title;
struct midi_file_info *info;
if(!(hList = playlist_box()))
#include "resample.h"
#include "mix.h"
#include "thread.h"
+#include "sfz.h"
#include <tchar.h>
#include "w32g.h"
#include "w32g_res.h"
#include "w32g_utl.h"
+#include "w32g_ut2.h"
#include "w32g_pref.h"
///r
#ifdef AU_W32
#include "gogo_a.h"
#endif
+#ifdef AU_FLAC
+#include "flac_a.h"
+#endif
+
/*****************************************************************************************************************************/
/* TiMidity Win32GUI preference / PropertySheet */
-#if !defined(IA_W32G_SYN)
extern void w32g_restart(void);
-#endif
+
extern void set_gogo_opts_use_commandline_options(char *commandline);
extern void restore_voices(int save_voices);
static int vorbisCofigDialog(void);
static int gogoCofigDialog(void);
-static int flacConfigDialog(void);
static int w32_reset_exe_directory(void)
{
* \92\8d\88Ó: MainThread \82©\82ç\82Ì\8cÄ\82Ñ\8fo\82µ\8bÖ\8e~\81A\8aë\8c¯\81I
*/
extern void OnQuit(void);
+extern void timidity_init_player(void); /* timidity.c */
void PrefSettingApplyReally(void)
{
#ifdef INT_SYNTH
init_int_synth();
#endif // INT_SYNTH
+#ifdef ENABLE_SFZ
+ init_sfz();
+#endif
initialize_resampler_coeffs();
timidity_init_player();
restore_voices(1);
free_special_patch(-1);
tmdy_free_config();
free_soundfonts();
+#ifdef ENABLE_SFZ
+ free_sfz();
+#endif
#ifdef INT_SYNTH
free_int_synth();
#endif // INT_SYNTH
static int pref_channel_page;
static ChannelBitMask channelbitmask;
int i, j, tmp;
- const TCHAR **cb_info;
switch (uMess){
case WM_INITDIALOG:
// BANK
void w32g_setup_doc(int idx);
void w32g_open_doc(int close_if_no_doc);
void w32g_free_doc(void);
-extern void w32g_free_doc(void);
#endif /* ___W32G_SUBWIN_H_ */
HPEN hPen;
HBRUSH hBrush;
HGDIOBJ hgdiobj_hpen, hgdiobj_hbrush;
- RECT rc;
+ //RECT rc;
if (lockflag) TRACER_LOCK();
hPen = CreatePen(PS_SOLID, 1, C_BACK);
#include "readmidi.h"
#include "output.h"
#include "controls.h"
+#include "rtsyn.h"
#ifdef WIN32GCC
WINAPI void InitCommonControls(void);
#include "w32g_utl.h"
#include "w32g_pref.h"
#include "w32g_res.h"
+#include "w32g_int_synth_editor.h"
#ifdef IA_W32G_SYN
TEXT("\83^\83C\83\80\83N\83\8a\83e\83B\83J\83\8b")
};
static const TCHAR *syn_thread_priority_name_en[] = {
- TEXT("idle")
+ TEXT("idle"),
TEXT("lowest"),
TEXT("below normal"),
TEXT("normal"),
#endif /* !TWSYNSRV */
SetPriorityClass(GetCurrentProcess(), processPriority);
SetThreadPriority(w32g_syn.syn_hThread, syn_ThreadPriority);
- result = ctl_pass_playing_list2(w32g_syn_port_num, args);
+ result = ctl->pass_playing_list(w32g_syn_port_num, args);
SetPriorityClass(GetCurrentProcess(), NORMAL_PRIORITY_CLASS);
SetThreadPriority(w32g_syn.syn_hThread, THREAD_PRIORITY_NORMAL);
if (result == 2) {
{
char VersionText[2024];
sprintf(VersionText,
-"TiMidity++ %s%s" NLS NLS
+"TiMidity++ %s%s %s" NLS NLS
"TiMidity-0.2i by Tuukka Toivonen <tt@cgs.fi>." NLS
"TiMidity Win32 version by Davide Moretti <dave@rimini.com>." NLS
"TiMidity Windows 95 port by Nicolas Witczak." NLS
"Twsynth GUI by Daisuke Aoki <dai@y7.net>." NLS
" Japanese menu, dialog, etc by Saito <timidity@flashmail.com>." NLS
"TiMidity++ by Masanao Izumo <mo@goice.co.jp>." NLS
-, (strcmp(timidity_version, "current")) ? "version " : "", timidity_version);
+, (strcmp(timidity_version, "current")) ? "version " : "", timidity_version, arch_string);
MessageBoxA(hParentWnd, VersionText, "Version", MB_OK);
}
{
char TiMidityText[2024];
sprintf(TiMidityText,
-" TiMidity++ %s%s -- MIDI to WAVE converter and player" NLS
+" TiMidity++ %s%s %s -- MIDI to WAVE converter and player" NLS
" Copyright (C) 1999-2002 Masanao Izumo <mo@goice.co.jp>" NLS
" Copyright (C) 1995 Tuukka Toivonen <tt@cgs.fi>" NLS
NLS
" along with this program; if not, write to the Free Software" NLS
" Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA" NLS
,
-(strcmp(timidity_version, "current")) ? "version " : "", timidity_version
+(strcmp(timidity_version, "current")) ? "version " : "", timidity_version, arch_string
);
MessageBoxA(hParentWnd, TiMidityText, "TiMidity", MB_OK);
}
#endif /* HAVE_SYN_CONSOLE */
+#ifdef IA_W32G_SYN
+static int winplaymidi_sleep_level = 2;
+static DWORD winplaymidi_active_start_time = 0;
+
+void winplaymidi(void) {
+
+ if (winplaymidi_sleep_level < 1) {
+ winplaymidi_sleep_level = 1;
+ }
+ if (0 != rtsyn_buf_check()) {
+ winplaymidi_sleep_level = 0;
+ }
+ rtsyn_play_some_data();
+ if (winplaymidi_sleep_level == 1) {
+ DWORD ct = GetCurrentTime();
+ if (winplaymidi_active_start_time == 0 || ct < winplaymidi_active_start_time) {
+ winplaymidi_active_start_time = ct;
+ }
+ else if (ct - winplaymidi_active_start_time > 60000) {
+ winplaymidi_sleep_level = 2;
+ }
+ }
+ else if (winplaymidi_sleep_level == 0) {
+ winplaymidi_active_start_time = 0;
+ }
+
+ rtsyn_play_calculate();
+
+ if (winplaymidi_sleep_level >= 2) {
+ Sleep(100);
+ }
+ else if (winplaymidi_sleep_level > 0) {
+ Sleep(1);
+ }
+}
+#endif /* IA_W32G_SYN */
+
+
#ifdef HAVE_SYN_SOUNDSPEC
// ***************************************************************************
extern int INISaveSoundSpecWnd(void);
extern int INILoadSoundSpecWnd(void);
+extern int PauseOldTiMidity(void);
+
#endif /* __W32G_UT2_H__ */
#endif /* SUPPORT_SOUNDSPEC */
#include "wrd.h"
#include "w32g.h"
+#include "w32g_pref.h"
#include "w32g_utl.h"
#ifdef HAVE_SYS_STAT_H
#include <sys/stat.h>
extern int AutosavePlaylist;
extern int PosSizeSave;
extern unsigned char opt_normal_chorus_plus;
-#ifdef AU_LAME
-extern void lame_ConfigDialogInfoLoadINI();
-extern void lame_ConfigDialogInfoSaveINI();
-#endif
///r
//char DefaultPlaylistName[PLAYLIST_MAX][] = {"default.pls"};
#include "wsgtk_main.h"
#endif /* USE_GTK_GUI */
-#ifndef __W32__
#include <stdio.h>
+#ifndef __W32__
#include <termios.h>
//#include <term.h>
#include <unistd.h>
#endif /* __W32__ */
#if defined(__W32__)
+#include <conio.h>
+#define kbhit _kbhit
#define HAVE_DOS_KEYBOARD 1
#endif /* __W32__ */
static int ctl_pass_playing_list(int n, char *args[])
#else
// 0: OK, 2: Require to reset.
-int ctl_pass_playing_list2(int n, char *args[])
+static int ctl_pass_playing_list2(int n, char *args[])
#endif
{
int i, j, devnum, devok;
#endif /* !IA_W32G_SYN */
-#ifdef IA_W32G_SYN
-static int winplaymidi_sleep_level = 2;
-static DWORD winplaymidi_active_start_time = 0;
-
-void winplaymidi(void) {
-
- if (winplaymidi_sleep_level < 1) {
- winplaymidi_sleep_level = 1;
- }
- if (0 != rtsyn_buf_check()) {
- winplaymidi_sleep_level = 0;
- }
- rtsyn_play_some_data();
- if (winplaymidi_sleep_level == 1) {
- DWORD ct = GetCurrentTime();
- if (winplaymidi_active_start_time == 0 || ct < winplaymidi_active_start_time) {
- winplaymidi_active_start_time = ct;
- } else if (ct - winplaymidi_active_start_time > 60000) {
- winplaymidi_sleep_level = 2;
- }
- } else if (winplaymidi_sleep_level == 0) {
- winplaymidi_active_start_time = 0;
- }
-
- rtsyn_play_calculate();
-
- if (winplaymidi_sleep_level >= 2) {
- Sleep(100);
- } else if (winplaymidi_sleep_level > 0) {
- Sleep(1);
- }
-}
-#endif /* IA_W32G_SYN */
-
-
/*
* interface_<id>_loader();
*/
#include <windows.h>
#endif
#include "w32g_dib.h"
+#include "w32g_mag.h"
#include "w32g_wrd.h"
static int wrdt_open(char *dummy);
# define UNICODE
#endif
+#include <stdio.h>
+#include <stdlib.h>
#include <windows.h>
#undef MOUSE_MOVED
#include <curspriv.h>
/* callback routines */
static void CALLBACK winmm_in_callback(HMIDIIN hMidiIn,
- WORD wMsg, DWORD_PTR dwInstance,
- DWORD_PTR dwParam1, DWORD_PTR dwParam2);
+ WORD wMsg, DWORD dwInstance,
+ DWORD dwParam1, DWORD dwParam2);
static void CALLBACK winmm_streamout_callback(HMIDIOUT hmo, UINT wMsg,
- DWORD_PTR dwInstance, DWORD_PTR dwParam1,
- DWORD_PTR dwParam2);
+ DWORD dwInstance, DWORD dwParam1,
+ DWORD dwParam2);
#ifdef USE_SYSEX_BUFFERS
static void CALLBACK winmm_out_callback(HMIDIOUT hmo, UINT wMsg,
- DWORD_PTR dwInstance, DWORD_PTR dwParam1,
- DWORD_PTR dwParam2);
+ DWORD dwInstance, DWORD dwParam1,
+ DWORD dwParam2);
#endif
extern pm_fns_node pm_winmm_in_dictionary;
#include "porttime.h"
#include "windows.h"
+#include <mmsystem.h>
#include "time.h"
// SSE is enabled.
#ifndef _USE_SSE
# define USE_ALLOCA
+# define alloca _alloca
#endif
/* Default to floating point */
/** Obtain one of the modes available */
const SpeexMode * speex_lib_get_mode (int mode);
-#ifndef WIN32
+#ifndef _WIN32
/* We actually override the function in the narrowband case so that we can avoid linking in the wideband stuff */
#define speex_lib_get_mode(mode) ((mode)==SPEEX_MODEID_NB ? &speex_nb_mode : speex_lib_get_mode (mode))
#endif
static int auto_au_output_open(const char *input_filename)
{
- char *output_filename = (char *)safe_malloc(strlen(input_filename) + 5);
- char *ext, *p;
-
- strcpy(output_filename, input_filename);
- if((ext = strrchr(output_filename, '.')) == NULL)
- ext = output_filename + strlen(output_filename);
- else {
- /* strip ".gz" */
- if(strcasecmp(ext, ".gz") == 0) {
- *ext = '\0';
- if((ext = strrchr(output_filename, '.')) == NULL)
- ext = output_filename + strlen(output_filename);
- }
- }
+ char *output_filename = create_auto_output_name(input_filename, ".au", NULL, 0);
- /* replace '.' and '#' before ext */
- for(p = output_filename; p < ext; p++)
- if(*p == '.' || *p == '#')
- *p = '_';
+ if (!output_filename) {
+ return -1;
+ }
- if(*ext && isupper(*(ext + 1)))
- strcpy(ext, ".AU");
- else
- strcpy(ext, ".au");
if(au_output_open(output_filename, input_filename) == -1) {
free(output_filename);
return -1;
extern void safe_exit(int status);
extern const char *timidity_version;
+extern const char *arch_string; /* optcode.c */
extern MBlockList tmpbuffer;
extern char *output_text_code;
#elif defined(IA_W32GUI)
extern ControlMode w32gui_control_mode;
#define DEFAULT_CONTROL_MODE &w32gui_control_mode
-#elif defined(IA_W32G_SYN)
+#elif defined(IA_W32G_SYN) || defined(IA_WINSYN)
extern ControlMode winsyn_control_mode;
#define DEFAULT_CONTROL_MODE &winsyn_control_mode
#else
#ifdef IA_W32G_SYN
&winsyn_control_mode,
#endif /* IA_W32GUI */
-#if !defined(__MACOS__) && !defined(IA_W32GUI) && !defined(IA_W32G_SYN)
+#if !defined(__MACOS__) && !defined(IA_W32GUI) && !defined(IA_W32G_SYN) && !defined(IA_WINSYN)
&dumb_control_mode,
#endif
#ifdef IA_PLUGIN
_mm_storeu_pd(&info->mcount[i][REV_EX_RV_L1], vc[1]);
vr[0] = _mm_add_pd(vc[0], _mm_loadu_pd(&info->mphase[i][REV_EX_ER_L1])); // mcount+mphase
vr[1] = _mm_add_pd(vc[1], _mm_loadu_pd(&info->mphase[i][REV_EX_RV_L1])); // mcount+mphase
- vd[0] = _mm_set_pd(lookup2_sine_p(vr[0].m128d_f64[1]), lookup2_sine_p(vr[0].m128d_f64[0])); // lookup2_sine_p(mc)
- vd[1] = _mm_set_pd(lookup2_sine_p(vr[1].m128d_f64[1]), lookup2_sine_p(vr[1].m128d_f64[0])); // lookup2_sine_p(mc)
+ vd[0] = _mm_set_pd(lookup2_sine_p(MM_EXTRACT_F64(vr[0],1)), lookup2_sine_p(MM_EXTRACT_F64(vr[0],0))); // lookup2_sine_p(mc)
+ vd[1] = _mm_set_pd(lookup2_sine_p(MM_EXTRACT_F64(vr[1],1)), lookup2_sine_p(MM_EXTRACT_F64(vr[1],0))); // lookup2_sine_p(mc)
vd[0] = _mm_mul_pd(_mm_loadu_pd(&info->mdepth[i][REV_EX_ER_L1]), vd[0]); // mdepth* sine
vd[1] = _mm_mul_pd(_mm_loadu_pd(&info->mdepth[i][REV_EX_RV_L1]), vd[1]); // mdepth* sine
vfp[0] = _mm_sub_pd(_mm_sub_pd(vmi, _mm_loadu_pd(&info->mdelay[i][REV_EX_ER_L1])), vd[0]); // mindex-mdelay-mdepth
vfp[0] = _mm_sub_pd(vfp[0], _mm_cvtepi32_pd(vindex[0])); // fp-vindex
vfp[1] = _mm_sub_pd(vfp[1], _mm_cvtepi32_pd(vindex[1])); // fp-vindex
#endif
- vtmp[0] = _mm_loadu_pd(&info->buf[i][REV_EX_ER_L1][vindex[0].m128i_i32[0]]); // v1v2
- vtmp[1] = _mm_loadu_pd(&info->buf[i][REV_EX_ER_R1][vindex[0].m128i_i32[1]]); // v1v2
- vtmp[2] = _mm_loadu_pd(&info->buf[i][REV_EX_RV_L1][vindex[1].m128i_i32[0]]); // v1v2
- vtmp[3] = _mm_loadu_pd(&info->buf[i][REV_EX_RV_R1][vindex[1].m128i_i32[1]]); // v1v2
+ vtmp[0] = _mm_loadu_pd(&info->buf[i][REV_EX_ER_L1][MM_EXTRACT_I32(vindex[0],0)]); // v1v2
+ vtmp[1] = _mm_loadu_pd(&info->buf[i][REV_EX_ER_R1][MM_EXTRACT_I32(vindex[0],1)]); // v1v2
+ vtmp[2] = _mm_loadu_pd(&info->buf[i][REV_EX_RV_L1][MM_EXTRACT_I32(vindex[1],0)]); // v1v2
+ vtmp[3] = _mm_loadu_pd(&info->buf[i][REV_EX_RV_R1][MM_EXTRACT_I32(vindex[1],1)]); // v1v2
vv1[0] = _mm_shuffle_pd(vtmp[0], vtmp[1], 0x0);
vv1[1] = _mm_shuffle_pd(vtmp[2], vtmp[3], 0x0);
vv2[0] = _mm_shuffle_pd(vtmp[0], vtmp[1], 0x3);
_mm_storeu_pd(&info->mcount[i][REV_EX_RV_L1], vc[1]);
vr[0] = _mm_add_pd(vc[0], _mm_loadu_pd(&info->mphase[i][REV_EX_ER_L1])); // mcount+mphase
vr[1] = _mm_add_pd(vc[1], _mm_loadu_pd(&info->mphase[i][REV_EX_RV_L1])); // mcount+mphase
- vd[0] = _mm_set_pd(lookup2_sine_p(vr[0].m128d_f64[1]), lookup2_sine_p(vr[0].m128d_f64[0])); // lookup2_sine_p(mc)
- vd[1] = _mm_set_pd(lookup2_sine_p(vr[1].m128d_f64[1]), lookup2_sine_p(vr[1].m128d_f64[0])); // lookup2_sine_p(mc)
+ vd[0] = _mm_set_pd(lookup2_sine_p(MM_EXTRACT_F64(vr[0],1)), lookup2_sine_p(MM_EXTRACT_F64(vr[0],0))); // lookup2_sine_p(mc)
+ vd[1] = _mm_set_pd(lookup2_sine_p(MM_EXTRACT_F64(vr[1],1)), lookup2_sine_p(MM_EXTRACT_F64(vr[1],0))); // lookup2_sine_p(mc)
vd[0] = _mm_mul_pd(_mm_loadu_pd(&info->mdepth[i][REV_EX_ER_L1]), vd[0]); // mdepth* sine
vd[1] = _mm_mul_pd(_mm_loadu_pd(&info->mdepth[i][REV_EX_RV_L1]), vd[1]); // mdepth* sine
vfp[0] = _mm_sub_pd(_mm_sub_pd(vmi, _mm_loadu_pd(&info->mdelay[i][REV_EX_ER_L1])), vd[0]); // mindex-mdelay-mdepth
vfp[0] = _mm_sub_pd(vfp[0], _mm_cvtepi32_pd(vindex[0])); // fp-vindex
vfp[1] = _mm_sub_pd(vfp[1], _mm_cvtepi32_pd(vindex[1])); // fp-vindex
#endif
- vtmp[0] = _mm_loadu_pd(&info->buf[i][REV_EX_ER_L1][vindex[0].m128i_i32[0]]); // v1v2
- vtmp[1] = _mm_loadu_pd(&info->buf[i][REV_EX_ER_R1][vindex[0].m128i_i32[1]]); // v1v2
- vtmp[2] = _mm_loadu_pd(&info->buf[i][REV_EX_RV_L1][vindex[1].m128i_i32[0]]); // v1v2
- vtmp[3] = _mm_loadu_pd(&info->buf[i][REV_EX_RV_R1][vindex[1].m128i_i32[1]]); // v1v2
+ vtmp[0] = _mm_loadu_pd(&info->buf[i][REV_EX_ER_L1][MM_EXTRACT_I32(vindex[0],0)]); // v1v2
+ vtmp[1] = _mm_loadu_pd(&info->buf[i][REV_EX_ER_R1][MM_EXTRACT_I32(vindex[0],1)]); // v1v2
+ vtmp[2] = _mm_loadu_pd(&info->buf[i][REV_EX_RV_L1][MM_EXTRACT_I32(vindex[1],0)]); // v1v2
+ vtmp[3] = _mm_loadu_pd(&info->buf[i][REV_EX_RV_R1][MM_EXTRACT_I32(vindex[1],1)]); // v1v2
vv1[0] = _mm_shuffle_pd(vtmp[0], vtmp[1], 0x0);
vv1[1] = _mm_shuffle_pd(vtmp[2], vtmp[3], 0x0);
vv2[0] = _mm_shuffle_pd(vtmp[0], vtmp[1], 0x3);
_mm_storeu_pd(&info->mcount[i][REV_EX_RV_L1], vc[1]);
vr[0] = _mm_add_pd(vc[0], _mm_loadu_pd(&info->mphase[i][REV_EX_ER_L1])); // mcount+mphase
vr[1] = _mm_add_pd(vc[1], _mm_loadu_pd(&info->mphase[i][REV_EX_RV_L1])); // mcount+mphase
- vd[0] = _mm_set_pd(lookup2_sine_p(vr[0].m128d_f64[1]), lookup2_sine_p(vr[0].m128d_f64[0])); // lookup2_sine_p(mc)
- vd[1] = _mm_set_pd(lookup2_sine_p(vr[1].m128d_f64[1]), lookup2_sine_p(vr[1].m128d_f64[0])); // lookup2_sine_p(mc)
+ vd[0] = _mm_set_pd(lookup2_sine_p(MM_EXTRACT_F64(vr[0],1)), lookup2_sine_p(MM_EXTRACT_F64(vr[0],0))); // lookup2_sine_p(mc)
+ vd[1] = _mm_set_pd(lookup2_sine_p(MM_EXTRACT_F64(vr[1],1)), lookup2_sine_p(MM_EXTRACT_F64(vr[1],0))); // lookup2_sine_p(mc)
vd[0] = _mm_mul_pd(_mm_loadu_pd(&info->mdepth[i][REV_EX_ER_L1]), vd[0]); // mdepth* sine
vd[1] = _mm_mul_pd(_mm_loadu_pd(&info->mdepth[i][REV_EX_RV_L1]), vd[1]); // mdepth* sine
vfp[0] = _mm_sub_pd(_mm_sub_pd(vmi, _mm_loadu_pd(&info->mdelay[i][REV_EX_ER_L1])), vd[0]); // mindex-mdelay-mdepth
vfp[0] = _mm_sub_pd(vfp[0], _mm_cvtepi32_pd(vindex[0])); // fp-vindex
vfp[1] = _mm_sub_pd(vfp[1], _mm_cvtepi32_pd(vindex[1])); // fp-vindex
#endif
- vtmp[0] = _mm_loadu_pd(&info->buf[i][REV_EX_ER_L1][vindex[0].m128i_i32[0]]); // v1v2
- vtmp[1] = _mm_loadu_pd(&info->buf[i][REV_EX_ER_R1][vindex[0].m128i_i32[1]]); // v1v2
- vtmp[2] = _mm_loadu_pd(&info->buf[i][REV_EX_RV_L1][vindex[1].m128i_i32[0]]); // v1v2
- vtmp[3] = _mm_loadu_pd(&info->buf[i][REV_EX_RV_R1][vindex[1].m128i_i32[1]]); // v1v2
+ vtmp[0] = _mm_loadu_pd(&info->buf[i][REV_EX_ER_L1][MM_EXTRACT_I32(vindex[0],0)]); // v1v2
+ vtmp[1] = _mm_loadu_pd(&info->buf[i][REV_EX_ER_R1][MM_EXTRACT_I32(vindex[0],1)]); // v1v2
+ vtmp[2] = _mm_loadu_pd(&info->buf[i][REV_EX_RV_L1][MM_EXTRACT_I32(vindex[1],0)]); // v1v2
+ vtmp[3] = _mm_loadu_pd(&info->buf[i][REV_EX_RV_R1][MM_EXTRACT_I32(vindex[1],1)]); // v1v2
vv1[0] = _mm_shuffle_pd(vtmp[0], vtmp[1], 0x0);
vv1[1] = _mm_shuffle_pd(vtmp[2], vtmp[3], 0x0);
vv2[0] = _mm_shuffle_pd(vtmp[0], vtmp[1], 0x3);
_mm_storeu_pd(&info->acount[i][REV_EX_RV_L1], vc[1]);
vr[0] = _mm_add_pd(vc[0], _mm_loadu_pd(&info->aphase[i][REV_EX_ER_L1])); // count+phase
vr[1] = _mm_add_pd(vc[1], _mm_loadu_pd(&info->aphase[i][REV_EX_RV_L1])); // count+phase
- vd[0] = _mm_set_pd(lookup2_sine_p(vr[0].m128d_f64[1]), lookup2_sine_p(vr[0].m128d_f64[0])); // lookup2_sine_p(count)
- vd[1] = _mm_set_pd(lookup2_sine_p(vr[1].m128d_f64[1]), lookup2_sine_p(vr[1].m128d_f64[0])); // lookup2_sine_p(cuont)
+ vd[0] = _mm_set_pd(lookup2_sine_p(MM_EXTRACT_F64(vr[0],1)), lookup2_sine_p(MM_EXTRACT_F64(vr[0],0))); // lookup2_sine_p(count)
+ vd[1] = _mm_set_pd(lookup2_sine_p(MM_EXTRACT_F64(vr[1],1)), lookup2_sine_p(MM_EXTRACT_F64(vr[1],0))); // lookup2_sine_p(cuont)
vd[0] = _mm_mul_pd(_mm_loadu_pd(&info->adepth[i][REV_EX_ER_L1]), vd[0]); // depth* sine
vd[1] = _mm_mul_pd(_mm_loadu_pd(&info->adepth[i][REV_EX_RV_L1]), vd[1]); // depth* sine
vfp[0] = _mm_sub_pd(_mm_sub_pd(vai, _mm_loadu_pd(&info->adelay[i][REV_EX_ER_L1])), vd[0]); // index-delay-depth
vfp[0] = _mm_sub_pd(vfp[0], _mm_cvtepi32_pd(vindex[0])); // fp-vindex
vfp[1] = _mm_sub_pd(vfp[1], _mm_cvtepi32_pd(vindex[1])); // fp-vindex
#endif
- vtmp[0] = _mm_loadu_pd(&info->abuf[REV_EX_ER_L1][vindex[0].m128i_i32[0]]); // v1v2
- vtmp[1] = _mm_loadu_pd(&info->abuf[REV_EX_ER_R1][vindex[0].m128i_i32[1]]); // v1v2
- vtmp[2] = _mm_loadu_pd(&info->abuf[REV_EX_RV_L1][vindex[1].m128i_i32[0]]); // v1v2
- vtmp[3] = _mm_loadu_pd(&info->abuf[REV_EX_RV_R1][vindex[1].m128i_i32[1]]); // v1v2
+ vtmp[0] = _mm_loadu_pd(&info->abuf[REV_EX_ER_L1][MM_EXTRACT_I32(vindex[0],0)]); // v1v2
+ vtmp[1] = _mm_loadu_pd(&info->abuf[REV_EX_ER_R1][MM_EXTRACT_I32(vindex[0],1)]); // v1v2
+ vtmp[2] = _mm_loadu_pd(&info->abuf[REV_EX_RV_L1][MM_EXTRACT_I32(vindex[1],0)]); // v1v2
+ vtmp[3] = _mm_loadu_pd(&info->abuf[REV_EX_RV_R1][MM_EXTRACT_I32(vindex[1],1)]); // v1v2
vv1[0] = _mm_shuffle_pd(vtmp[0], vtmp[1], 0x0);
vv1[1] = _mm_shuffle_pd(vtmp[2], vtmp[3], 0x0);
vv2[0] = _mm_shuffle_pd(vtmp[0], vtmp[1], 0x3);
_mm_storeu_pd(&info->mcount[i][REV_EX_RV_L1], vc[1]);
vr[0] = _mm_add_pd(vc[0], _mm_loadu_pd(&info->mphase[i][REV_EX_ER_L1])); // mcount+mphase
vr[1] = _mm_add_pd(vc[1], _mm_loadu_pd(&info->mphase[i][REV_EX_RV_L1])); // mcount+mphase
- vd[0] = _mm_set_pd(lookup2_sine_p(vr[0].m128d_f64[1]), lookup2_sine_p(vr[0].m128d_f64[0])); // lookup2_sine_p(mc)
- vd[1] = _mm_set_pd(lookup2_sine_p(vr[1].m128d_f64[1]), lookup2_sine_p(vr[1].m128d_f64[0])); // lookup2_sine_p(mc)
+ vd[0] = _mm_set_pd(lookup2_sine_p(MM_EXTRACT_F64(vr[0],1)), lookup2_sine_p(MM_EXTRACT_F64(vr[0],0))); // lookup2_sine_p(mc)
+ vd[1] = _mm_set_pd(lookup2_sine_p(MM_EXTRACT_F64(vr[1],1)), lookup2_sine_p(MM_EXTRACT_F64(vr[1],0))); // lookup2_sine_p(mc)
vd[0] = _mm_mul_pd(_mm_loadu_pd(&info->mdepth[i][REV_EX_ER_L1]), vd[0]); // mdepth* sine
vd[1] = _mm_mul_pd(_mm_loadu_pd(&info->mdepth[i][REV_EX_RV_L1]), vd[1]); // mdepth* sine
vfp[0] = _mm_sub_pd(_mm_sub_pd(vmi, _mm_loadu_pd(&info->mdelay[i][REV_EX_ER_L1])), vd[0]); // mindex-mdelay-mdepth
vfp[0] = _mm_sub_pd(vfp[0], _mm_cvtepi32_pd(vindex[0])); // fp-vindex
vfp[1] = _mm_sub_pd(vfp[1], _mm_cvtepi32_pd(vindex[1])); // fp-vindex
#endif
- vtmp[0] = _mm_loadu_pd(&info->buf[i][REV_EX_ER_L1][vindex[0].m128i_i32[0]]); // v1v2
- vtmp[1] = _mm_loadu_pd(&info->buf[i][REV_EX_ER_R1][vindex[0].m128i_i32[1]]); // v1v2
- vtmp[2] = _mm_loadu_pd(&info->buf[i][REV_EX_RV_L1][vindex[1].m128i_i32[0]]); // v1v2
- vtmp[3] = _mm_loadu_pd(&info->buf[i][REV_EX_RV_R1][vindex[1].m128i_i32[1]]); // v1v2
+ vtmp[0] = _mm_loadu_pd(&info->buf[i][REV_EX_ER_L1][MM_EXTRACT_I32(vindex[0],0)]); // v1v2
+ vtmp[1] = _mm_loadu_pd(&info->buf[i][REV_EX_ER_R1][MM_EXTRACT_I32(vindex[0],1)]); // v1v2
+ vtmp[2] = _mm_loadu_pd(&info->buf[i][REV_EX_RV_L1][MM_EXTRACT_I32(vindex[1],0)]); // v1v2
+ vtmp[3] = _mm_loadu_pd(&info->buf[i][REV_EX_RV_R1][MM_EXTRACT_I32(vindex[1],1)]); // v1v2
vv1[0] = _mm_shuffle_pd(vtmp[0], vtmp[1], 0x0);
vv1[1] = _mm_shuffle_pd(vtmp[2], vtmp[3], 0x0);
vv2[0] = _mm_shuffle_pd(vtmp[0], vtmp[1], 0x3);
_mm_storeu_pd(&info->acount[i][REV_EX_RV_L1], vc[1]);
vr[0] = _mm_add_pd(vc[0], _mm_loadu_pd(&info->aphase[i][REV_EX_ER_L1])); // count+phase
vr[1] = _mm_add_pd(vc[1], _mm_loadu_pd(&info->aphase[i][REV_EX_RV_L1])); // count+phase
- vd[0] = _mm_set_pd(lookup2_sine_p(vr[0].m128d_f64[1]), lookup2_sine_p(vr[0].m128d_f64[0])); // lookup2_sine_p(count)
- vd[1] = _mm_set_pd(lookup2_sine_p(vr[1].m128d_f64[1]), lookup2_sine_p(vr[1].m128d_f64[0])); // lookup2_sine_p(cuont)
+ vd[0] = _mm_set_pd(lookup2_sine_p(MM_EXTRACT_F64(vr[0],1)), lookup2_sine_p(MM_EXTRACT_F64(vr[0],0))); // lookup2_sine_p(count)
+ vd[1] = _mm_set_pd(lookup2_sine_p(MM_EXTRACT_F64(vr[1],1)), lookup2_sine_p(MM_EXTRACT_F64(vr[1],0))); // lookup2_sine_p(cuont)
vd[0] = _mm_mul_pd(_mm_loadu_pd(&info->adepth[i][REV_EX_ER_L1]), vd[0]); // depth* sine
vd[1] = _mm_mul_pd(_mm_loadu_pd(&info->adepth[i][REV_EX_RV_L1]), vd[1]); // depth* sine
vfp[0] = _mm_sub_pd(_mm_sub_pd(vai, _mm_loadu_pd(&info->adelay[i][REV_EX_ER_L1])), vd[0]); // index-delay-depth
vfp[0] = _mm_sub_pd(vfp[0], _mm_cvtepi32_pd(vindex[0])); // fp-vindex
vfp[1] = _mm_sub_pd(vfp[1], _mm_cvtepi32_pd(vindex[1])); // fp-vindex
#endif
- vtmp[0] = _mm_loadu_pd(&info->abuf[REV_EX_ER_L1][vindex[0].m128i_i32[0]]); // v1v2
- vtmp[1] = _mm_loadu_pd(&info->abuf[REV_EX_ER_R1][vindex[0].m128i_i32[1]]); // v1v2
- vtmp[2] = _mm_loadu_pd(&info->abuf[REV_EX_RV_L1][vindex[1].m128i_i32[0]]); // v1v2
- vtmp[3] = _mm_loadu_pd(&info->abuf[REV_EX_RV_R1][vindex[1].m128i_i32[1]]); // v1v2
+ vtmp[0] = _mm_loadu_pd(&info->abuf[REV_EX_ER_L1][MM_EXTRACT_I32(vindex[0],0)]); // v1v2
+ vtmp[1] = _mm_loadu_pd(&info->abuf[REV_EX_ER_R1][MM_EXTRACT_I32(vindex[0],1)]); // v1v2
+ vtmp[2] = _mm_loadu_pd(&info->abuf[REV_EX_RV_L1][MM_EXTRACT_I32(vindex[1],0)]); // v1v2
+ vtmp[3] = _mm_loadu_pd(&info->abuf[REV_EX_RV_R1][MM_EXTRACT_I32(vindex[1],1)]); // v1v2
vv1[0] = _mm_shuffle_pd(vtmp[0], vtmp[1], 0x0);
vv1[1] = _mm_shuffle_pd(vtmp[2], vtmp[3], 0x0);
vv2[0] = _mm_shuffle_pd(vtmp[0], vtmp[1], 0x3);
vfp[0] = _mm_sub_pd(vfp[0], _mm_cvtepi32_pd(vindex[0])); // fp-vindex
vfp[1] = _mm_sub_pd(vfp[1], _mm_cvtepi32_pd(vindex[1])); // fp-vindex
#endif
- vin[0] = _mm_loadu_pd(&info->ptr[vindex[0].m128i_i32[0]]); // v1v2
- vin[1] = _mm_loadu_pd(&info->ptr[vindex[0].m128i_i32[1]]); // v1v2
- vin[2] = _mm_loadu_pd(&info->ptr[vindex[1].m128i_i32[0]]); // v1v2
- vin[3] = _mm_loadu_pd(&info->ptr[vindex[1].m128i_i32[1]]); // v1v2
+ vin[0] = _mm_loadu_pd(&info->ptr[MM_EXTRACT_I32(vindex[0],0)]); // v1v2
+ vin[1] = _mm_loadu_pd(&info->ptr[MM_EXTRACT_I32(vindex[0],1)]); // v1v2
+ vin[2] = _mm_loadu_pd(&info->ptr[MM_EXTRACT_I32(vindex[1],0)]); // v1v2
+ vin[3] = _mm_loadu_pd(&info->ptr[MM_EXTRACT_I32(vindex[1],1)]); // v1v2
vv1[0] = _mm_shuffle_pd(vin[0], vin[1], 0x0); // v1v1
vv1[1] = _mm_shuffle_pd(vin[2], vin[3], 0x0); // v1v1
vv2[0] = _mm_shuffle_pd(vin[0], vin[1], 0x3); // v2v2
#include "playmidi.h"
#include "readmidi.h"
#include "miditrace.h"
+#include "flac_a.h"
#ifdef __W32G__
#include "w32g.h"
+#include "w32g_pref.h"
#endif /* __W32G__ */
static int open_output(void); /* 0=success, 1=warning, -1=fatal error */
int include_enc, exclude_enc;
#ifdef AU_FLAC_DLL
- if (g_load_libFLAC_dll("libFLAC.dll")) {
+ if (g_load_libFLAC_dll()) {
ctl->cmsg(CMSG_ERROR, VERB_NORMAL,
"DLL load failed: %s", "libFLAC.dll, ogg.dll");
return -1;
--- /dev/null
+
+#pragma once
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "timidity.h"
+#include "common.h"
+
+#ifdef AU_FLAC
+
+extern void flac_set_option_verify(int);
+extern void flac_set_option_padding(int);
+extern void flac_set_compression_level(int);
+extern void flac_set_compression_level(int compression_level);
+
+#ifdef AU_OGGFLAC
+extern void flac_set_option_oggflac(int isogg);
+#endif
+
+#endif /* AU_FLAC */
#include "quantity.h"
#include "freq.h"
#include "support.h"
+#include "sfz.h"
#define INSTRUMENT_HASH_SIZE 128
struct InstrumentCache
#ifdef INT_SYNTH
extern void free_int_synth_file(Instrument *ip);
#endif
+#ifdef ENABLE_SFZ
+ extern void free_sfz_file(Instrument *ip);
+#endif
if (!ip) return;
free_int_synth_file(ip);
break;
#endif
+#ifdef ENABLE_SFZ
+ case INST_SFZ:
+ free_sfz_file(ip);
+ break;
+#endif
}
safe_free(ip);
}
ip = extract_scc_file(bank->tone[prog][elm]->name, bank->tone[prog][elm]->is_preset);
break;
#endif
+#ifdef ENABLE_SFZ
+ case 5: /* sfz extension */
+ ip = extract_sfz_file(bank->tone[prog][elm]->name);
+ break;
+#endif
default:
goto TONEBANK_INSTRUMENT_NULL;
break;
if(bank->tone[j][elm] == NULL)
continue;
ip = bank->tone[j][elm]->instrument;
- if(ip && (ip->type == INST_SF2 || ip->type == INST_PCM || ip->type == INST_MMS || ip->type == INST_SCC) &&
+ if(ip && (ip->type == INST_SF2 || ip->type == INST_PCM || ip->type == INST_MMS || ip->type == INST_SCC || ip->type == INST_SFZ) &&
(i == 0 || !tonebank[0]->tone[j][elm] || ip != tonebank[0]->tone[j][elm]->instrument) )
free_instrument(ip);
bank->tone[j][elm]->instrument = NULL;
if(bank->tone[j][elm] == NULL)
continue;
ip = bank->tone[j][elm]->instrument;
- if(ip && (ip->type == INST_SF2 || ip->type == INST_PCM || ip->type == INST_MMS || ip->type == INST_SCC) &&
+ if(ip && (ip->type == INST_SF2 || ip->type == INST_PCM || ip->type == INST_MMS || ip->type == INST_SCC || ip->type == INST_SFZ) &&
(i == 0 || !drumset[0]->tone[j][elm] || ip != drumset[0]->tone[j][elm]->instrument) )
free_instrument(ip);
bank->tone[j][elm]->instrument = NULL;
///r
#define INST_MMS 4 /* %mms */
#define INST_SCC 5 /* %scc */
+#define INST_SFZ 6 /* %sfz */
/* sfSampleType */
#define SF_SAMPLETYPE_MONO 1
2: %sample // wav,aiff
3: %mms
4: %scc
- 5-255: reserved
+ 5: %sfz
+ 6-255: reserved
*/
int16 amp;
int8 amp_normalize;
vofsi = _mm_srli_epi32(vofs, FRACTION_BITS);
vofsf = _mm_and_si128(vofs, vfmask);
vfp = _mm_mul_ps(_mm_cvtepi32_ps(vofsf), vec_divf); // int32 to float // calc fp
-#if !(defined(_MSC_VER) || defined(MSC_VER))
- ofsp1 = (int32 *)vofsi;
#if defined(IS_RS_DATA_T_DOUBLE)
- tmp1 = _mm_cvtpd_ps(_mm_loadu_pd(&rs_buf[ofsp1[0]])); // ofsi\82Æofsi+1\82ð\83\8d\81[\83h
- tmp2 = _mm_cvtpd_ps(_mm_loadu_pd(&rs_buf[ofsp1[1]])); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
- tmp3 = _mm_cvtpd_ps(_mm_loadu_pd(&rs_buf[ofsp1[2]])); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
- tmp4 = _mm_cvtpd_ps(_mm_loadu_pd(&rs_buf[ofsp1[3]])); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
+ tmp1 = _mm_cvtpd_ps(_mm_loadu_pd(&rs_buf[MM_EXTRACT_I32(vofsi,0)])); // ofsi\82Æofsi+1\82ð\83\8d\81[\83h
+ tmp2 = _mm_cvtpd_ps(_mm_loadu_pd(&rs_buf[MM_EXTRACT_I32(vofsi,1)])); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
+ tmp3 = _mm_cvtpd_ps(_mm_loadu_pd(&rs_buf[MM_EXTRACT_I32(vofsi,2)])); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
+ tmp4 = _mm_cvtpd_ps(_mm_loadu_pd(&rs_buf[MM_EXTRACT_I32(vofsi,3)])); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à
tmp1 = _mm_shuffle_ps(tmp1, tmp2, 0x44);
tmp3 = _mm_shuffle_ps(tmp3, tmp4, 0x44);
#else // defined(IS_RS_DATA_T_FLOAT)
- tmp1 = _mm_loadl_pi(tmp1, (__m64 *)&rs_buf[ofsp1[0]]); // L64bit ofsi\82Æofsi+1\82ð\83\8d\81[\83h
- tmp1 = _mm_loadh_pi(tmp1, (__m64 *)&rs_buf[ofsp1[1]]); // H64bit \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
- tmp3 = _mm_loadl_pi(tmp3, (__m64 *)&rs_buf[ofsp1[2]]); // L64bit \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
- tmp3 = _mm_loadh_pi(tmp3, (__m64 *)&rs_buf[ofsp1[3]]); // H64bit \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
+ tmp1 = _mm_loadl_pi(tmp1, (__m64 *)&rs_buf[MM_EXTRACT_I32(vofsi,0)]); // L64bit ofsi\82Æofsi+1\82ð\83\8d\81[\83h
+ tmp1 = _mm_loadh_pi(tmp1, (__m64 *)&rs_buf[MM_EXTRACT_I32(vofsi,1)]); // H64bit \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
+ tmp3 = _mm_loadl_pi(tmp3, (__m64 *)&rs_buf[MM_EXTRACT_I32(vofsi,2)]); // L64bit \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
+ tmp3 = _mm_loadh_pi(tmp3, (__m64 *)&rs_buf[MM_EXTRACT_I32(vofsi,3)]); // H64bit \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
#endif
-#else
-#if defined(IS_RS_DATA_T_DOUBLE)
- tmp1 = _mm_cvtpd_ps(_mm_loadu_pd(&rs_buf[vofsi.m128i_i32[0]])); // ofsi\82Æofsi+1\82ð\83\8d\81[\83h
- tmp2 = _mm_cvtpd_ps(_mm_loadu_pd(&rs_buf[vofsi.m128i_i32[1]])); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
- tmp3 = _mm_cvtpd_ps(_mm_loadu_pd(&rs_buf[vofsi.m128i_i32[2]])); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
- tmp4 = _mm_cvtpd_ps(_mm_loadu_pd(&rs_buf[vofsi.m128i_i32[3]])); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à
- tmp1 = _mm_shuffle_ps(tmp1, tmp2, 0x44);
- tmp3 = _mm_shuffle_ps(tmp3, tmp4, 0x44);
-#else // defined(IS_RS_DATA_T_FLOAT)
- tmp1 = _mm_loadl_pi(tmp1, (__m64 *)&rs_buf[vofsi.m128i_i32[0]]); // L64bit ofsi\82Æofsi+1\82ð\83\8d\81[\83h
- tmp1 = _mm_loadh_pi(tmp1, (__m64 *)&rs_buf[vofsi.m128i_i32[1]]); // H64bit \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
- tmp3 = _mm_loadl_pi(tmp3, (__m64 *)&rs_buf[vofsi.m128i_i32[2]]); // L64bit \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
- tmp3 = _mm_loadh_pi(tmp3, (__m64 *)&rs_buf[vofsi.m128i_i32[3]]); // H64bit \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
-#endif
-#endif // !(defined(_MSC_VER) || defined(MSC_VER))
vv1 = _mm_shuffle_ps(tmp1, tmp3, 0x88); // v1[0,1,2,3] // ofsi\82Ív1\82É
vv2 = _mm_shuffle_ps(tmp1, tmp3, 0xdd); // v2[0,1,2,3] // ofsi+1\82Ív2\82É\88Ú\93®
vec_out = MM_FMA_PS(_mm_sub_ps(vv2, vv1), vfp, vv1);
vofsf = _mm_and_si128(vofs, vfmask);
vfp1 = _mm_mul_pd(_mm_cvtepi32_pd(vofsf), vec_divf); // int32 to double // calc fp
vfp2 = _mm_mul_pd(_mm_cvtepi32_pd(_mm_shuffle_epi32(vofsf, 0x4E)), vec_divf); // int32 to double // calc fp
-#if !(defined(_MSC_VER) || defined(MSC_VER))
- ofsp1 = (int32 *)vofsi;
- tmp1 = _mm_loadu_pd(&rs_buf[ofsp1[0]]); // ofsi\82Æofsi+1\82ð\83\8d\81[\83h
- tmp2 = _mm_loadu_pd(&rs_buf[ofsp1[1]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
- tmp3 = _mm_loadu_pd(&rs_buf[ofsp1[2]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
- tmp4 = _mm_loadu_pd(&rs_buf[ofsp1[3]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
-#else
- tmp1 = _mm_loadu_pd(&rs_buf[vofsi.m128i_i32[0]]); // ofsi\82Æofsi+1\82ð\83\8d\81[\83h
- tmp2 = _mm_loadu_pd(&rs_buf[vofsi.m128i_i32[1]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
- tmp3 = _mm_loadu_pd(&rs_buf[vofsi.m128i_i32[2]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
- tmp4 = _mm_loadu_pd(&rs_buf[vofsi.m128i_i32[3]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à
-#endif // !(defined(_MSC_VER) || defined(MSC_VER))
+ tmp1 = _mm_loadu_pd(&rs_buf[MM_EXTRACT_I32(vofsi,0)]); // ofsi\82Æofsi+1\82ð\83\8d\81[\83h
+ tmp2 = _mm_loadu_pd(&rs_buf[MM_EXTRACT_I32(vofsi,1)]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
+ tmp3 = _mm_loadu_pd(&rs_buf[MM_EXTRACT_I32(vofsi,2)]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
+ tmp4 = _mm_loadu_pd(&rs_buf[MM_EXTRACT_I32(vofsi,3)]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à
vv11 = _mm_shuffle_pd(tmp1, tmp2, 0x00); // v1[0,1] // ofsi\82Ív1\82É
vv21 = _mm_shuffle_pd(tmp1, tmp2, 0x03); // v2[0,1] // ofsi+1\82Ív2\82É\88Ú\93®
vv12 = _mm_shuffle_pd(tmp3, tmp4, 0x00); // v1[2,3] // ofsi\82Ív1\82É
extern int mms_editor_override;
extern const char *scc_data_editor_load_name(int num);
extern void scc_data_editor_store_name(int num, const char *name);
+extern void scc_data_editor_clear_param(void);
extern void scc_data_editor_set_default_param(int set_num);
extern int scc_data_editor_get_param(int num);
extern void scc_data_editor_set_param(int num, int val);
case INST_SF2:
case INST_MOD:
case INST_PCM:
+#ifdef ENABLE_SFZ
+ case INST_SFZ:
+#endif
if(opt_resample_over_sampling){
int32 c2 = c * opt_resample_over_sampling;
resample_voice(v, sp, c2);
safe_free(sp->data);
sp->data = new_data;
sp->sample_rate = new_rate;
-
- sp->data_length = new_data_length << FRACTION_BITS;
+
+ sp->data_length = (splen_t)new_data_length << FRACTION_BITS;
sp->loop_start = loop_start * (1 << FRACTION_BITS);
sp->loop_end = loop_end * (1 << FRACTION_BITS);
}
#ifdef MYINI_LIBRARY_DEFIND_VAR
+#ifdef _WIN32
+#include <windows.h>
+#else
+
#ifndef INT8
typedef char INT8;
#endif
#endif
#endif
+#endif
#ifndef _TCHAR_DEFINED
#endif /* stdc */
#include "timidity.h"
+#include "common.h"
+
+const char *arch_string =
+#ifdef IX64CPU
+ #if USE_X64_EXT_INTRIN == 9
+ "[x64 AVX2]"
+ #elif USE_X64_EXT_INTRIN == 8
+ "[x64 AVX]"
+ #elif USE_X64_EXT_INTRIN == 7
+ "[x64 SSE4.2]"
+ #elif USE_X64_EXT_INTRIN == 6
+ "[x64 SSE4.1]"
+ #elif USE_X64_EXT_INTRIN == 5
+ "[x64 SSSE3]"
+ #elif USE_X64_EXT_INTRIN == 4
+ "[x64 SSE3]"
+ #elif USE_X64_EXT_INTRIN == 3
+ "[x64 SSE2]"
+ #elif USE_X64_EXT_INTRIN == 2
+ "[x64 SSE]"
+ #elif USE_X64_EXT_INTRIN == 1
+ "[x64 MMX]"
+ #else
+ "[x64]"
+ #endif
+#elif defined(IX86CPU)
+ #if USE_X86_EXT_INTRIN == 9
+ "[x86 AVX2]"
+ #elif USE_X86_EXT_INTRIN == 8
+ "[x86 AVX]"
+ #elif USE_X86_EXT_INTRIN == 7
+ "[x86 SSE4.2]"
+ #elif USE_X86_EXT_INTRIN == 6
+ "[x86 SSE4.1]"
+ #elif USE_X86_EXT_INTRIN == 5
+ "[x86 SSSE3]"
+ #elif USE_X86_EXT_INTRIN == 4
+ "[x86 SSE3]"
+ #elif USE_X86_EXT_INTRIN == 3
+ "[x86 SSE2]"
+ #elif USE_X86_EXT_INTRIN == 2
+ "[x86 SSE]"
+ #elif USE_X86_EXT_INTRIN == 1
+ "[x86 MMX]"
+ #else
+ "[x86]"
+ #endif
+#else
+ ""
+#endif
+;
/*****************************************************************************/
/*****************************************************************************/
#if (USE_X86_EXT_ASM || USE_X86_EXT_INTRIN || USE_X86_AMD_EXT_ASM || USE_X86_AMD_EXT_INTRIN)
-
+#ifdef __GNUC__
+inline void CPUID(int32 *regs, uint32 eax)
+{
+ uint32 ebx,ecx,edx;
+ __asm__ __volatile__ (
+#ifdef __x86_64__
+ "push %%rbx \n\t"
+#else
+ "push %%ebx \n\t"
+#endif
+ "cpuid \n\t"
+ "mov %%ebx, %1 \n\t"
+#ifdef __x86_64__
+ "pop %%rbx \n\t"
+#else
+ "pop %%ebx \n\t"
+#endif
+ : "+a"(eax), "=r"(ebx), "=c"(ecx), "=d"(edx)
+ );
+ regs[0] = eax;
+ regs[1] = ebx;
+ regs[2] = ecx;
+ regs[3] = edx;
+}
+#else
+#define CPUID __cpuid
+#endif
enum{
X86_VENDER_INTEL=0,
X86_VENDER_AMD,
static const char* x86_vendors[] =
{
"GenuineIntel",
- "AuthenticAMD"
+ "AuthenticAMD",
"Unknown ",
};
// \8ag\92£\83t\83\89\83O\8eæ\93¾
static inline int64 xgetbv(int index)
{
+#if defined(__GNUC__)
+ unsigned int eax, edx;
+ __asm__ __volatile__ (
+ "xgetbv \n\t"
+ : "=a"(eax), "=d"(edx)
+ : "c"(index)
+ );
+ return (uint64)eax|((uint64)edx<<32);
+#else
#if (USE_X86_EXT_ASM || USE_X86_AMD_EXT_ASM)
uint64 flg = 0;
//_asm {
//return flg;
return 0xFFFFFFFFFFFFFFFF; // asm\82Åxgetbv index \82Ç\82±\81E\81E\82í\82©\82ç\82ñ\82Ì\82Å\83X\83\8b\81[
#elif (USE_X86_EXT_INTRIN || USE_X86_AMD_EXT_INTRIN)
+#if _MSC_VER < 1600 // VC2010 (immintrin.h _xgetbv()
+ return 0xFFFFFFFFFFFFFFFF;
+#else
return _xgetbv(index);
+#endif /* _MSC_VER */
+#endif
#endif
-
}
uint32 flg4; // extended feature flg pg2
memset(vendor, 0, sizeof(vendor));
- __cpuid(reg, 0);
+ CPUID(reg,0);
cmd = reg[0];
((uint32*)vendor)[0] = reg[1];
((uint32*)vendor)[1] = reg[3];
break;
}
if(cmd >= 0x00000001){
- __cpuid(reg, 0x00000001);
+ CPUID(reg,0x00000001);
flg1 = reg[3];
flg2 = reg[2];
}
- __cpuid(reg, 0x80000000);
+ CPUID(reg,0x80000000);
cmd = reg[ 0 ];
if(cmd >= 0x80000001){
- __cpuid(reg, 0x80000001);
+ CPUID(reg,0x80000001);
flg4 = reg[2];
flg3 = reg[3];
}
#ifndef OPTCODE_H_INCLUDED
#define OPTCODE_H_INCLUDED 1
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wmacro-redefined"
+#endif
+
#if defined(_M_IX86) || defined(__i386__) || defined(__i386) || defined(_X86_) || defined(__X86__) || defined(__I86__)
#define IX86CPU 1
#endif
-#if defined(_M_X64) || defined(_AMD64_) || defined(_X64_) || defined(__X64__)
+#if defined(_M_X64) || defined(_AMD64_) || defined(_X64_) || defined(__X64__) || defined(__x86_64__)
#define IX64CPU 1
#undef IX86CPU
#undef IA64CPU
#if defined(USE_SSE5) // _MSC_VER >= 1700 VC2012?
#define USE_X86_AMD_EXT_INTRIN 6
-#eiif defined(USE_SSE4A) // _MSC_VER >= 1600 VC2010?
+#elif defined(USE_SSE4A) // _MSC_VER >= 1600 VC2010?
#define USE_X86_AMD_EXT_INTRIN 5
#elif defined(USE_3DNOW_PRO)
#define USE_X86_AMD_EXT_INTRIN 4
#endif
/* asm/intrin\95s\89Â\8fð\8c\8f \91¼\82É\82 \82ê\82Î\92Ç\89Á */
-#if !defined(IX86CPU)
-#define USE_X86_EXT_ASM 0
-#define USE_X86_AMD_EXT_ASM 0
-#endif
#if !defined(IX64CPU)
-#define USE_X64_EXT_ASM 0
-#define USE_X64_AMD_EXT_ASM 0
+#undef USE_X64_EXT_INTRIN
#define USE_X64_EXT_INTRIN 0
+#undef USE_X64_AMD_EXT_INTRIN
#define USE_X64_AMD_EXT_INTRIN 0
#endif
#if !defined(IX86CPU) && !defined(IX64CPU)
+#undef USE_X86_EXT_INTRIN
#define USE_X86_EXT_INTRIN 0
+#undef USE_X86_AMD_EXT_INTRIN
#define USE_X86_AMD_EXT_INTRIN 0
#endif
-#if defined(__GNUC__)
-#define USE_X86_EXT_INTRIN 0
-#define USE_X86_AMD_EXT_INTRIN 0
-#define USE_X64_EXT_INTRIN 0
-#define USE_X64_AMD_EXT_INTRIN 0
-#endif
-#if defined(__GNUC__)
-#define USE_X86_EXT_INTRIN 0
-#define USE_X86_AMD_EXT_INTRIN 0
-#define USE_X64_EXT_INTRIN 0
-#define USE_X64_AMD_EXT_INTRIN 0
-#endif
-
+/* Always disable inline asm */
+#undef USE_X86_EXT_ASM
+#define USE_X86_EXT_ASM 0
+#undef USE_X86_AMD_EXT_ASM
+#define USE_X86_AMD_EXT_ASM 0
+#undef USE_X64_EXT_ASM
+#define USE_X64_EXT_ASM 0
+#undef USE_X64_AMD_EXT_ASM
+#define USE_X64_AMD_EXT_ASM 0
+#undef SUPPORT_ASM_INTEL
/*****************************************************************************/
/* PowerPC's AltiVec enhancement */
#endif
/*****************************************************************************/
-#if OPT_MODE == 1
+#if OPT_MODE == 1 && USE_X86_EXT_ASM > 0
#ifdef LITTLE_ENDIAN
#define iman_ 0
#if (USE_X86_EXT_INTRIN || USE_X86_AMD_EXT_INTRIN)
#ifdef __GNUC__
-//#if defined(USE_X86_EXT_INTRIN) && (USE_X86_EXT_INTRIN >= 8)
-//#include <avxintrin.h>
-//#elif defined(USE_X86_EXT_INTRIN) && (USE_X86_EXT_INTRIN >= 7)
-//#include <nmmintrin.h>
-//#elif defined(USE_X86_EXT_INTRIN) && (USE_X86_EXT_INTRIN >= 6)
-//#include <smmintrin.h>
-//#elif defined(USE_X86_EXT_INTRIN) && (USE_X86_EXT_INTRIN >= 5)
-//#include <tmmintrin.h>
-//#elif defined(USE_X86_EXT_INTRIN) && (USE_X86_EXT_INTRIN >= 4)
-//#include <pmmintrin.h>
-//#elif defined(USE_X86_EXT_INTRIN) && (USE_X86_EXT_INTRIN >= 3)
-//#include <emmintrin.h>
-//#elif defined(USE_X86_EXT_INTRIN) && (USE_X86_EXT_INTRIN >= 2)
-//#include <xmmintrin.h>
-//#else
-//#include <mmintrin.h>
-//#endif
-//#if defined(USE_X86_AMD_EXT_INTRIN) && (USE_X86_AMD_EXT_INTRIN >= 2)
-//#include <mm3dnow.h>
-//#endif
-#include <immintrin.h>
-
+#include <x86intrin.h>
#elif (_MSC_VER >= 1600) // VC2010(VC10)
#include <intrin.h>
#else // VC2003(VC7) VC2005(VC8) VC2008(VC9)
#define MM_FMA3_PD(v00, v01, v10, v11, v20, v21) _mm_fmadd_pd(v20, v21, _mm_fmadd_pd(v10, v11, _mm_mul_pd(v00, v01)) )
#define MM_FMA4_PD(v00, v01, v10, v11, v20, v21, v30, v31) _mm_add_pd(\
_mm_fmadd_pd(v30, v31, _mm_mul_pd(v20, v21)), _mm_fmadd_pd(v10, v11, _mm_mul_pd(v00, v01)) )
-#define MM_FMA5_PD(v00, v01, v10, v11, v20, v21, v30, v31, v40, v41) _mm_fmadd_pd(v40, v41, \
- _mm_fmadd_pd(v30, v31, _mm_mul_pd(v20, v21)), _mm_fmadd_pd(v10, v11, _mm_mul_pd(v00, v01)) )
+#define MM_FMA5_PD(v00, v01, v10, v11, v20, v21, v30, v31, v40, v41) _mm_add_pd(_mm_fmadd_pd(v40, v41, \
+ _mm_fmadd_pd(v30, v31, _mm_mul_pd(v20, v21))), _mm_fmadd_pd(v10, v11, _mm_mul_pd(v00, v01)) )
#define MM_FMA6_PD(v00, v01, v10, v11, v20, v21, v30, v31, v40, v41, v50, v51) _mm_add_pd(\
_mm_fmadd_pd(v50, v51, _mm_fmadd_pd(v40, v41, _mm_mul_pd(v30, v31))), \
_mm_fmadd_pd(v20, v21, _mm_fmadd_pd(v10, v11, _mm_mul_pd(v00, v01))) )
#define MM_LSU_MUL_PS(ptr, vec_a) _mm_storeu_ps(ptr, _mm_mul_ps(_mm_loadu_ps(ptr), vec_a))
#endif
+#if (USE_X86_EXT_INTRIN >= 1)
+#if !(defined(_MSC_VER) || defined(MSC_VER))
+#define MM_EXTRACT_F32(reg,idx) _mm_cvtss_f32(_mm_shuffle_ps(reg,reg,idx))
+#define MM_EXTRACT_F64(reg,idx) _mm_cvtsd_f64(_mm_shuffle_pd(reg,reg,idx))
+#define MM_EXTRACT_I32(reg,idx) _mm_cvtsi128_si32(_mm_shuffle_epi32(reg,idx))
+#define MM256_EXTRACT_I32(reg,idx) _mm256_extract_epi32(reg,idx)
+#else
+#define MM_EXTRACT_F32(reg,idx) reg.m128_f32[idx]
+#define MM_EXTRACT_F64(reg,idx) reg.m128d_f64[idx]
+#define MM_EXTRACT_I32(reg,idx) reg.m128i_i32[idx]
+#define MM256_EXTRACT_I32(reg,idx) reg.m256i_i32[idx]
+#endif
+#endif // (USE_X86_EXT_INTRIN >= 1)
+
+/*
+ gather and scatter
+*/
+
+#if (USE_X86_EXT_INTRIN >= 9)
+#if (USE_X86_EXT_INTRIN >= 9)
+#define MM256_I32GATHER_I32(base, offset, scale) _mm256_i32gather_epi32(base, offset, scale)
+#else
+
+static TIMIDITY_FORCEINLINE __m256i mm256_i32gather_i32_impl(const int *base, __m256i offset, int scale)
+{
+ ALIGN32 int32 buf[8];
+ __m256i byte_offset = _mm256_mullo_epi32(offset, _mm256_set1_epi32(scale));
+#ifdef IX64CPU
+ __m256i vbase = _mm256_set1_epi64x((int64)base);
+ __m256i vptr0145 = _mm256_add_epi64(vbase, _mm256_unpacklo_epi32(byte_offset, _mm256_setzero_si256()));
+ __m256i vptr2367 = _mm256_add_epi64(vbase, _mm256_unpackhi_epi32(byte_offset, _mm256_setzero_si256()));
+ ALIGN32 const int32 *ptr0145[8];
+ ALIGN32 const int32 *ptr2367[8];
+ _mm256_store_si256((__m256i *)ptr0145, vptr0145);
+ _mm256_store_si256((__m256i *)ptr2367, vptr2367);
+
+ buf[0] = *ptr0145[0];
+ buf[1] = *ptr0145[1];
+ buf[2] = *ptr2367[0];
+ buf[3] = *ptr2367[1];
+ buf[4] = *ptr0145[2];
+ buf[5] = *ptr0145[3];
+ buf[6] = *ptr2367[2];
+ buf[7] = *ptr2367[3];
+#else
+ int i;
+ __m256i pointers = _mm256_add_epi32(_mm256_set1_epi32((int32)base), byte_offset);
+ _mm256_store_si256((__m256i *)buf, pointers);
+
+ for (i = 0; i < 8; i++) {
+ buf[i] = *(const int *)buf[i];
+ }
+#endif
+
+ return _mm256_load_si256((const __m256i *)buf);
+}
+
+#define MM256_I32GATHER_I32(base, offset, scale) mm256_i32gather_i32_impl(base, offset, scale)
+#endif // (USE_X86_EXT_INTRIN >= 9)
+
+static TIMIDITY_FORCEINLINE void mm256_i32scatter_i32_impl(void *base, __m256i offset, __m256i val, int scale)
+{
+ ALIGN32 int32 buf[8];
+ _mm256_store_si256((__m256i *)buf, val);
+
+ __m256i byte_offset = _mm256_mullo_epi32(offset, _mm256_set1_epi32(scale));
+#ifdef IX64CPU
+ __m256i vbase = _mm256_set1_epi64x((int64)base);
+ __m256i vptr0145 = _mm256_add_epi64(vbase, _mm256_unpacklo_epi32(byte_offset, _mm256_setzero_si256()));
+ __m256i vptr2367 = _mm256_add_epi64(vbase, _mm256_unpackhi_epi32(byte_offset, _mm256_setzero_si256()));
+ ALIGN32 int32 *ptr0145[4];
+ ALIGN32 int32 *ptr2367[4];
+ _mm256_store_si256((__m256i *)ptr0145, vptr0145);
+ _mm256_store_si256((__m256i *)ptr2367, vptr2367);
+
+ *ptr0145[0] = buf[0];
+ *ptr0145[1] = buf[1];
+ *ptr2367[0] = buf[2];
+ *ptr2367[1] = buf[3];
+ *ptr0145[2] = buf[4];
+ *ptr0145[3] = buf[5];
+ *ptr2367[2] = buf[6];
+ *ptr2367[3] = buf[7];
+#else
+ __m256i vptr = _mm256_add_epi32(_mm256_set1_epi32((int32)base), byte_offset);
+ ALIGN32 int32 *ptr[8];
+ _mm256_store_si256((__m256i *)ptr, vptr);
+
+ for (int i = 0; i < 8; i++) {
+ *ptr[i] = buf[i];
+ }
+#endif
+}
+
+#define MM256_I32SCATTER_I32(base, offset, val, scale) mm256_i32scatter_i32_impl(base, offset, val, scale)
+
+#endif // (USE_X86_EXT_INTRIN >= 9)
+
+#if (USE_X86_EXT_INTRIN >= 1)
+#if (USE_X86_EXT_INTRIN >= 9)
+#define MM_I32GATHER_I32(base, offset, scale) _mm_i32gather_epi32(base, offset, scale)
+#elif (USE_X86_EXT_INTRIN >= 6)
+
+static TIMIDITY_FORCEINLINE __m128i mm_i32gather_i32_impl(const int *base, __m128i offset, int scale)
+{
+ ALIGN16 int32 buf[4];
+ __m128i byte_offset = _mm_mullo_epi32(offset, _mm_set1_epi32(scale));
+#ifdef IX64CPU
+ __m128i vbase = _mm_set1_epi64x((int64)base);
+ __m128i vptr01 = _mm_add_epi64(vbase, _mm_unpacklo_epi32(byte_offset, _mm_setzero_si128()));
+ __m128i vptr23 = _mm_add_epi64(vbase, _mm_unpackhi_epi32(byte_offset, _mm_setzero_si128()));
+ ALIGN16 const int32 *ptr01[2];
+ ALIGN16 const int32 *ptr23[2];
+ _mm_store_si128((__m128i *)ptr01, vptr01);
+ _mm_store_si128((__m128i *)ptr23, vptr23);
+
+ buf[0] = *ptr01[0];
+ buf[1] = *ptr01[1];
+ buf[2] = *ptr23[0];
+ buf[3] = *ptr23[1];
+#else
+ int i;
+ __m128i pointers = _mm_add_epi32(_mm_set1_epi32((int32)base), byte_offset);
+ _mm_store_si128((__m128i *)buf, pointers);
+
+ for (i = 0; i < 4; i++) {
+ buf[i] = *(const int *)buf[i];
+ }
+#endif
+
+ return _mm_load_si128((const __m128i *)buf);
+}
+
+#define MM_I32GATHER_I32(base, offset, scale) mm_i32gather_i32_impl(base, offset, scale)
+#endif // (USE_X86_EXT_INTRIN >= 6)
+#endif // (USE_X86_EXT_INTRIN >= 1)
+
+#if (USE_X86_EXT_INTRIN >= 6)
+
+static TIMIDITY_FORCEINLINE void mm_i32scatter_i32_impl(void *base, __m128i offset, __m128i val, int scale)
+{
+ ALIGN16 int32 buf[4];
+ __m128i byte_offset;
+
+ _mm_store_si128((__m128i *)buf, val);
+ byte_offset = _mm_mullo_epi32(offset, _mm_set1_epi32(scale));
+#ifdef IX64CPU
+ {
+ __m128i vbase = _mm_set1_epi64x((int64)base);
+ __m128i vptr01 = _mm_add_epi64(vbase, _mm_unpacklo_epi32(byte_offset, _mm_setzero_si128()));
+ __m128i vptr23 = _mm_add_epi64(vbase, _mm_unpackhi_epi32(byte_offset, _mm_setzero_si128()));
+ ALIGN16 int32 *ptr01[2];
+ ALIGN16 int32 *ptr23[2];
+ _mm_store_si128((__m128i *)ptr01, vptr01);
+ _mm_store_si128((__m128i *)ptr23, vptr23);
+
+ *ptr01[0] = buf[0];
+ *ptr01[1] = buf[1];
+ *ptr23[0] = buf[2];
+ *ptr23[1] = buf[3];
+ }
+#else
+ {
+ __m128i vptr = _mm_add_epi32(_mm_set1_epi32((int32)base), byte_offset);
+ ALIGN16 int32 *ptr[4];
+ _mm_store_si128((__m128i *)ptr, vptr);
+
+ *ptr[0] = buf[0];
+ *ptr[1] = buf[1];
+ *ptr[2] = buf[2];
+ *ptr[3] = buf[3];
+ }
+#endif
+}
+
+#define MM_I32SCATTER_I32(base, offset, val, scale) mm_i32scatter_i32_impl(base, offset, val, scale)
+
+#endif // (USE_X86_EXT_INTRIN >= 1)
#define IS_ALIGN(ptr) (!((int32)ptr & (ALIGN_SIZE - 1)))
extern int is_x86ext_available(void);
#define memset switch_memset
#endif /* altivec */
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
#endif /* OPTCODE_H_INCLUDED */
extern PlayMode wasapi_play_mode;
#endif /* AU_WASAPI */
+#ifdef AU_ASIO
+extern PlayMode asio_play_mode;
+#endif /* AU_ASIO */
+
#ifdef AU_PORTAUDIO
#ifndef AU_PORTAUDIO_DLL
extern PlayMode portaudio_play_mode;
&wasapi_play_mode,
#endif /* AU_WASAPI */
+#if defined(AU_ASIO)
+ &asio_play_mode,
+#endif /* AU_ASIO */
+
#if defined(AU_PORTAUDIO)
#ifndef AU_PORTAUDIO_DLL
&portaudio_play_mode,
__m128 vmul = _mm_set1_ps((float)MAX_8BIT_SIGNED);
for(i = 0; i < c; i += 4){
__m128 vec_f = _mm_mul_ps(F128_CLIP_INPUT(&lp[i], gain), vmul);
-#if !(defined(_MSC_VER) || defined(MSC_VER))
- {
- float *out = (float *)vec_f;
- cp[i] = (int8)(out[0]);
- cp[i] = (int8)(out[1]);
- cp[i] = (int8)(out[2]);
- cp[i] = (int8)(out[3]);
- }
-#else
- cp[i] = (int8)(vec_f.m128_f32[0]);
- cp[i] = (int8)(vec_f.m128_f32[1]);
- cp[i] = (int8)(vec_f.m128_f32[2]);
- cp[i] = (int8)(vec_f.m128_f32[3]);
-#endif // !(defined(_MSC_VER) || defined(MSC_VER))
+ cp[i] = (int8)(MM_EXTRACT_F32(vec_f,0));
+ cp[i] = (int8)(MM_EXTRACT_F32(vec_f,1));
+ cp[i] = (int8)(MM_EXTRACT_F32(vec_f,2));
+ cp[i] = (int8)(MM_EXTRACT_F32(vec_f,3));
}
}
#else
__m128i vex = _mm_set1_epi8(0x80);
for(i = 0; i < c; i += 4){
__m128 vec_f = _mm_mul_ps(F128_CLIP_INPUT(&lp[i], gain), vmul);
-#if !(defined(_MSC_VER) || defined(MSC_VER))
- {
- float *out = (float *)vec_f;
- cp[i] = 0x80 ^ (uint8)(out[0]);
- cp[i] = 0x80 ^ (uint8)(out[1]);
- cp[i] = 0x80 ^ (uint8)(out[2]);
- cp[i] = 0x80 ^ (uint8)(out[3]);
- }
-#else
- cp[i] = 0x80 ^ (uint8)(vec_f.m128_f32[0]);
- cp[i] = 0x80 ^ (uint8)(vec_f.m128_f32[1]);
- cp[i] = 0x80 ^ (uint8)(vec_f.m128_f32[2]);
- cp[i] = 0x80 ^ (uint8)(vec_f.m128_f32[3]);
-#endif // !(defined(_MSC_VER) || defined(MSC_VER))
+ cp[i] = 0x80 ^ (uint8)(MM_EXTRACT_F32(vec_f,0));
+ cp[i] = 0x80 ^ (uint8)(MM_EXTRACT_F32(vec_f,1));
+ cp[i] = 0x80 ^ (uint8)(MM_EXTRACT_F32(vec_f,2));
+ cp[i] = 0x80 ^ (uint8)(MM_EXTRACT_F32(vec_f,3));
}
}
#else
__m256d vmul = _mm256_set1_pd((double)MAX_16BIT_SIGNED);
for(i = 0; i < c; i += 4){
__m128i vec0 = _mm256_cvttpd_epi32(_mm256_mul_pd(D256_CLIP_INPUT(&lp[i], gain), vmul));
-#if !(defined(_MSC_VER) || defined(MSC_VER))
- {
- int32 *out = (int32 *)vec0;
- up[i] = AUDIO_S2U(out[0]);
- up[i + 1] = AUDIO_S2U(out[1]);
- up[i + 2] = AUDIO_S2U(out[2]);
- up[i + 3] = AUDIO_S2U(out[3]);
- }
-#else
- up[i] = AUDIO_S2U(vec0.m128i_i32[0]);
- up[i + 1] = AUDIO_S2U(vec0.m128i_i32[1]);
- up[i + 2] = AUDIO_S2U(vec0.m128i_i32[2]);
- up[i + 3] = AUDIO_S2U(vec0.m128i_i32[3]);
-#endif // !(defined(_MSC_VER) || defined(MSC_VER))
+ up[i] = AUDIO_S2U(MM_EXTRACT_I32(vec0,0));
+ up[i + 1] = AUDIO_S2U(MM_EXTRACT_I32(vec0,1));
+ up[i + 2] = AUDIO_S2U(MM_EXTRACT_I32(vec0,2));
+ up[i + 3] = AUDIO_S2U(MM_EXTRACT_I32(vec0,3));
}
}
#elif (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE)
__m128 vec_f12 = _mm_cvtpd_ps(_mm_load_pd(&lp[i + 2]));
__m128 vec_f1 = _mm_shuffle_ps(vec_f11, vec_f12, 0x44);
__m128i vec_i32 = _mm_cvttps_epi32(_mm_mul_ps(F128_CLIP_MM(vec_f1, gain), vmul));
-#if !(defined(_MSC_VER) || defined(MSC_VER))
- {
- int32 *out = (int32 *)vec_i32;
- up[i] = AUDIO_S2U(out[0]);
- up[i + 1] = AUDIO_S2U(out[1]);
- up[i + 2] = AUDIO_S2U(out[2]);
- up[i + 3] = AUDIO_S2U(out[3]);
- }
-#else
- up[i] = AUDIO_S2U(vec_i32.m128i_i32[0]);
- up[i + 1] = AUDIO_S2U(vec_i32.m128i_i32[1]);
- up[i + 2] = AUDIO_S2U(vec_i32.m128i_i32[2]);
- up[i + 3] = AUDIO_S2U(vec_i32.m128i_i32[3]);
-#endif // !(defined(_MSC_VER) || defined(MSC_VER))
+ up[i] = AUDIO_S2U(MM_EXTRACT_I32(vec_i32,0));
+ up[i + 1] = AUDIO_S2U(MM_EXTRACT_I32(vec_i32,1));
+ up[i + 2] = AUDIO_S2U(MM_EXTRACT_I32(vec_i32,2));
+ up[i + 3] = AUDIO_S2U(MM_EXTRACT_I32(vec_i32,3));
}
}
#elif (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_FLOAT)
__m128 vmul = _mm_set1_ps((float)MAX_16BIT_SIGNED);
for(i = 0; i < c; i += 4){
__m128i vec0 = _mm_cvttps_epi32(_mm_mul_ps(F128_CLIP_INPUT(&lp[i], gain), vmul));
-#if !(defined(_MSC_VER) || defined(MSC_VER))
- {
- int32 *out = (int32 *)vec0;
- up[i] = AUDIO_S2U(out[0]);
- up[i + 1] = AUDIO_S2U(out[1]);
- up[i + 2] = AUDIO_S2U(out[2]);
- up[i + 3] = AUDIO_S2U(out[3]);
- }
-#else
- up[i] = AUDIO_S2U(vec0.m128i_i32[0]);
- up[i + 1] = AUDIO_S2U(vec0.m128i_i32[1]);
- up[i + 2] = AUDIO_S2U(vec0.m128i_i32[2]);
- up[i + 3] = AUDIO_S2U(vec0.m128i_i32[3]);
-#endif // !(defined(_MSC_VER) || defined(MSC_VER))
+ up[i] = AUDIO_S2U(MM_EXTRACT_I32(vec0,0));
+ up[i + 1] = AUDIO_S2U(MM_EXTRACT_I32(vec0,1));
+ up[i + 2] = AUDIO_S2U(MM_EXTRACT_I32(vec0,2));
+ up[i + 3] = AUDIO_S2U(MM_EXTRACT_I32(vec0,3));
}
}
#else
__m256d vmul = _mm256_set1_pd((double)MAX_16BIT_SIGNED);
for(i = 0; i < c; i += 4){
__m128i vec0 = _mm256_cvttpd_epi32(_mm256_mul_pd(D256_CLIP_INPUT(&lp[i], gain), vmul));
-#if !(defined(_MSC_VER) || defined(MSC_VER))
- {
- int32 *out = (int32 *)vec0;
- up[i] = AUDIO_S2A(out[0]);
- up[i + 1] = AUDIO_S2A(out[1]);
- up[i + 2] = AUDIO_S2A(out[2]);
- up[i + 3] = AUDIO_S2A(out[3]);
- }
-#else
- up[i] = AUDIO_S2A(vec0.m128i_i32[0]);
- up[i + 1] = AUDIO_S2A(vec0.m128i_i32[1]);
- up[i + 2] = AUDIO_S2A(vec0.m128i_i32[2]);
- up[i + 3] = AUDIO_S2A(vec0.m128i_i32[3]);
-#endif // !(defined(_MSC_VER) || defined(MSC_VER))
+ up[i] = AUDIO_S2A(MM_EXTRACT_I32(vec0,0));
+ up[i + 1] = AUDIO_S2A(MM_EXTRACT_I32(vec0,1));
+ up[i + 2] = AUDIO_S2A(MM_EXTRACT_I32(vec0,2));
+ up[i + 3] = AUDIO_S2A(MM_EXTRACT_I32(vec0,3));
}
}
#elif (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE)
__m128 vec_f12 = _mm_cvtpd_ps(_mm_load_pd(&lp[i + 2]));
__m128 vec_f1 = _mm_shuffle_ps(vec_f11, vec_f12, 0x44);
__m128i vec_i32 = _mm_cvttps_epi32(_mm_mul_ps(F128_CLIP_MM(vec_f1, gain), vmul));
-#if !(defined(_MSC_VER) || defined(MSC_VER))
- {
- int32 *out = (int32 *)vec_i32;
- up[i] = AUDIO_S2A(out[0]);
- up[i + 1] = AUDIO_S2A(out[1]);
- up[i + 2] = AUDIO_S2A(out[2]);
- up[i + 3] = AUDIO_S2A(out[3]);
- }
-#else
- up[i] = AUDIO_S2A(vec_i32.m128i_i32[0]);
- up[i + 1] = AUDIO_S2A(vec_i32.m128i_i32[1]);
- up[i + 2] = AUDIO_S2A(vec_i32.m128i_i32[2]);
- up[i + 3] = AUDIO_S2A(vec_i32.m128i_i32[3]);
-#endif // !(defined(_MSC_VER) || defined(MSC_VER))
+ up[i] = AUDIO_S2A(MM_EXTRACT_I32(vec_i32,0));
+ up[i + 1] = AUDIO_S2A(MM_EXTRACT_I32(vec_i32,1));
+ up[i + 2] = AUDIO_S2A(MM_EXTRACT_I32(vec_i32,2));
+ up[i + 3] = AUDIO_S2A(MM_EXTRACT_I32(vec_i32,3));
}
}
#else
__m128 vmul = _mm_set1_ps((float)MAX_16BIT_SIGNED);
for(i = 0; i < c; i += 4){
__m128 vec_f = _mm_mul_ps(F128_CLIP_INPUT(&lp[i], gain), vmul);
-#if !(defined(_MSC_VER) || defined(MSC_VER))
- {
- float *out = (float *)vec_f;
- sp[i] = (int16)(out[0]);
- sp[i] = (int16)(out[1]);
- sp[i] = (int16)(out[2]);
- sp[i] = (int16)(out[3]);
- }
-#else
- sp[i] = (int16)(vec_f.m128_f32[0]);
- sp[i] = (int16)(vec_f.m128_f32[1]);
- sp[i] = (int16)(vec_f.m128_f32[2]);
- sp[i] = (int16)(vec_f.m128_f32[3]);
-#endif // !(defined(_MSC_VER) || defined(MSC_VER))
+ sp[i] = (int16)(MM_EXTRACT_F32(vec_f,0));
+ sp[i] = (int16)(MM_EXTRACT_F32(vec_f,1));
+ sp[i] = (int16)(MM_EXTRACT_F32(vec_f,2));
+ sp[i] = (int16)(MM_EXTRACT_F32(vec_f,3));
}
}
#else
__m128 vmul = _mm_set1_ps((float)MAX_24BIT_SIGNED);
for(i = 0; i < c; i += 4){ // 108 inst in loop
__m128 vec_f = _mm_mul_ps(F128_CLIP_INPUT(&lp[i], gain), vmul);
-#if !(defined(_MSC_VER) || defined(MSC_VER))
- {
- float *out = (float *)vec_f;
- STORE_S24(cp, (int32)(out[0]));
- STORE_S24(cp, (int32)(out[1]));
- STORE_S24(cp, (int32)(out[2]));
- STORE_S24(cp, (int32)(out[3]));
- }
-#else
- STORE_S24(cp, (int32)(vec_f.m128_f32[0]));
- STORE_S24(cp, (int32)(vec_f.m128_f32[1]));
- STORE_S24(cp, (int32)(vec_f.m128_f32[2]));
- STORE_S24(cp, (int32)(vec_f.m128_f32[3]));
-#endif // !(defined(_MSC_VER) || defined(MSC_VER))
+ STORE_S24(cp, (int32)(MM_EXTRACT_F32(vec_f,0)));
+ STORE_S24(cp, (int32)(MM_EXTRACT_F32(vec_f,1)));
+ STORE_S24(cp, (int32)(MM_EXTRACT_F32(vec_f,2)));
+ STORE_S24(cp, (int32)(MM_EXTRACT_F32(vec_f,3)));
}
}
#else
__m128 vmul = _mm_set1_ps((float)MAX_32BIT_SIGNED);
for(i = 0; i < c; i += 4){
__m128 vec_f = _mm_mul_ps(F128_CLIP_INPUT(&lp[i], gain), vmul);
-#if !(defined(_MSC_VER) || defined(MSC_VER))
- {
- float *out = (float *)vec_f;
- sp[i] = (int32)(out[0]);
- sp[i] = (int32)(out[1]);
- sp[i] = (int32)(out[2]);
- sp[i] = (int32)(out[3]);
- }
-#else
- sp[i] = (int32)(vec_f.m128_f32[0]);
- sp[i] = (int32)(vec_f.m128_f32[1]);
- sp[i] = (int32)(vec_f.m128_f32[2]);
- sp[i] = (int32)(vec_f.m128_f32[3]);
-#endif // !(defined(_MSC_VER) || defined(MSC_VER))
+ sp[i] = (int32)(MM_EXTRACT_F32(vec_f,0));
+ sp[i] = (int32)(MM_EXTRACT_F32(vec_f,1));
+ sp[i] = (int32)(MM_EXTRACT_F32(vec_f,2));
+ sp[i] = (int32)(MM_EXTRACT_F32(vec_f,3));
}
}
#else
__m128 gain = _mm_set1_ps((float)INPUT_GAIN);
for(i = c - 4; i >= 0; i -= 4){
__m128 vec_f = F128_CLIP_INPUT(&lp[i], gain);
-#if !(defined(_MSC_VER) || defined(MSC_VER))
- {
- float *out = (float *)vec_f;
- sp[i] = (double)(out[0]);
- sp[i] = (double)(out[1]);
- sp[i] = (double)(out[2]);
- sp[i] = (double)(out[3]);
- }
-#else
- sp[i] = (double)(vec_f.m128_f32[0]);
- sp[i] = (double)(vec_f.m128_f32[1]);
- sp[i] = (double)(vec_f.m128_f32[2]);
- sp[i] = (double)(vec_f.m128_f32[3]);
-#endif // !(defined(_MSC_VER) || defined(MSC_VER))
+ sp[i] = (double)(MM_EXTRACT_F32(vec_f,0));
+ sp[i] = (double)(MM_EXTRACT_F32(vec_f,1));
+ sp[i] = (double)(MM_EXTRACT_F32(vec_f,2));
+ sp[i] = (double)(MM_EXTRACT_F32(vec_f,3));
}
}
#elif defined(DATA_T_DOUBLE)
if(!special_patch[j]){
vp->reserve_offset = 0;
}else{
- vp->reserve_offset = special_patch[j]->sample_offset << FRACTION_BITS;
+ vp->reserve_offset = (splen_t)special_patch[j]->sample_offset << FRACTION_BITS;
if(vp->sample->modes & MODES_LOOPING) {
if(vp->reserve_offset > vp->sample->loop_end)
vp->reserve_offset = vp->sample->loop_start;
vevol = _mm_shuffle_ps(vevol, vevol, 0x44);
}
vsp = _mm_mul_ps(_mm_loadu_ps(src), vevol);
-#if !(defined(_MSC_VER) || defined(MSC_VER))
- {
- float *out = (float *)vsp;
- *(src++) = out[0];
- *(src++) = out[1];
- }
-#else
- *(src++) = vsp.m128_f32[0];
- *(src++) = vsp.m128_f32[1];
-#endif // !(defined(_MSC_VER) || defined(MSC_VER))
+ *(src++) = MM_EXTRACT_F32(vsp,0);
+ *(src++) = MM_EXTRACT_F32(vsp,1);
}
#else // ! USE_X86_EXT_INTRIN
extern int32 get_current_play_tempo(void);
extern void init_voice(int i);
extern void update_voice(int i);
+
+extern int calc_bend_val(int val);
+extern void kill_all_voices(void);
+
#endif /* ___PLAYMIDI_H_ */
#ifdef __W32__
#include <windows.h>
+#include <mmsystem.h>
#endif /* __W32__ */
#include <portaudio.h>
#ifdef PORTAUDIO_V19
extern void recompute_multi_eq_sd(void);
extern void recompute_mfx_effect_sd(struct mfx_effect_sd_t *st, int marge);
extern void realloc_mfx_effect_sd(struct mfx_effect_sd_t *st, int patch);
+extern void recompute_chorus_status_sd(struct mfx_effect_sd_t *st, int marge);
+extern void realloc_chorus_status_sd(struct mfx_effect_sd_t *st, int patch);
+extern void recompute_chorus_status_sd(struct mfx_effect_sd_t *st, int marge);
+extern void realloc_reverb_status_sd(struct mfx_effect_sd_t *st, int patch);
+extern void recompute_reverb_status_sd(struct mfx_effect_sd_t *st, int marge);
extern void control_effect_sd(MidiEvent *ev);
extern Instrument *recompute_userdrum(int bank, int prog, int elm);
extern void add_channel_layer(int, int);
extern void remove_channel_layer(int);
+extern void readmidi_read_init(void);
extern void free_readmidi(void);
+extern void free_time_segments(void);
+
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#endif /* ___READMIDI_H_ */
/* Simple linear interpolation */
-static DATA_T resample_linear(const sample_t *src, splen_t ofs, resample_rec_t *rec)
+static inline DATA_T resample_linear(const sample_t *src, splen_t ofs, resample_rec_t *rec)
{
const spos_t ofsi = ofs >> FRACTION_BITS;
fract_t ofsf = ofs & FRACTION_MASK;
{
const int32 *src = (const int32*)srci;
const spos_t ofsi = ofs >> FRACTION_BITS;
-// FLOAT_T v1 = src[ofsi], fp = (ofs & FRACTION_MASK);
-// return (v1 + (FLOAT_T)((int64)(src[ofsi + 1]) - (int64)(src[ofsi])) * fp * div_fraction) * OUT_INT32; // FLOAT_T
#if defined(DATA_T_DOUBLE) || defined(DATA_T_FLOAT)
- FLOAT_T v1 = src[ofsi], fp = (ofs & FRACTION_MASK);
- return (v1 + (FLOAT_T)((int64)(src[ofsi + 1]) - (int64)(src[ofsi])) * fp * div_fraction) * OUT_INT32; // FLOAT_T
+ FLOAT_T v1 = src[ofsi], v2 = src[ofsi + 1], fp = (ofs & FRACTION_MASK);
+ return (v1 + (v2 - v1) * fp * div_fraction) * OUT_INT32; // FLOAT_T
#else // DATA_T_IN32
fract_t ofsf = ofs & FRACTION_MASK;
- int32 v1 = src[ofsi], v2 = src[ofsi + 1];
- return v1 + imuldiv_fraction_int32(v2 - v1, ofsf);
+ int32 v1 = src[ofsi] >> 16, v2 = src[ofsi + 1] >> 16;
+ return v1 + imuldiv_fraction(v2 - v1, ofsf);
#endif
}
-static DATA_T resample_linear_float(const sample_t *srci, splen_t ofs, resample_rec_t *rec)
+static inline DATA_T resample_linear_float(const sample_t *srci, splen_t ofs, resample_rec_t *rec)
{
const float *src = (const float*)srci;
const spos_t ofsi = ofs >> FRACTION_BITS;
temp1 = (v[1] + v[2]) * DIV_6 * div_fraction;
return temp1 * OUT_INT32; // FLOAT_T
do_linear:
- //v[1] = src[ofsi];
- //v[2] = (int64)(src[ofsi + 1]) - (int64)(src[ofsi]);
- //return (v[1] + v[2] * (FLOAT_T)ofsf * div_fraction) * OUT_INT32; // FLOAT_T
#if defined(DATA_T_DOUBLE) || defined(DATA_T_FLOAT)
v[1] = src[ofsi];
- v[2] = (int64)(src[ofsi + 1]) - (int64)(src[ofsi]);
- return (v[1] + v[2] * (FLOAT_T)ofsf * div_fraction) * OUT_INT32; // FLOAT_T
+ v[2] = src[ofsi + 1];
+ return (v[1] + (v[2] - v[1]) * (FLOAT_T)ofsf * div_fraction) * OUT_INT32; // FLOAT_T
#else // DATA_T_IN32
v[1] = src[ofsi];
v[2] = src[ofsi + 1];
just keep this labeled as resample_lagrange(), even if it really is the
Newton form of the polynomial. */
-static DATA_T resample_lagrange(const sample_t *src, splen_t ofs, resample_rec_t *rec)
+static inline DATA_T resample_lagrange(const sample_t *src, splen_t ofs, resample_rec_t *rec)
{
const spos_t ofsi = ofs >> FRACTION_BITS;
fract_t ofsf = ofs & FRACTION_MASK;
v[3] += v[0];
return v[3] * OUT_INT32;
do_linear:
- //v[1] = src[ofsi];
- //v[2] = (int64)(src[ofsi + 1]) - (int64)(src[ofsi]);
- //return (v[1] + v[2] * (FLOAT_T)ofsf * div_fraction) * OUT_INT32; // FLOAT_T
#if defined(DATA_T_DOUBLE) || defined(DATA_T_FLOAT)
v[1] = src[ofsi];
- v[2] = (int64)(src[ofsi + 1]) - (int64)(src[ofsi]);
- return (v[1] + v[2] * (FLOAT_T)ofsf * div_fraction) * OUT_INT32; // FLOAT_T
+ v[2] = src[ofsi + 1];
+ return (v[1] + (v[2] - v[1]) * (FLOAT_T)ofsf * div_fraction) * OUT_INT32; // FLOAT_T
#else // DATA_T_IN32
v[1] = src[ofsi];
v[2] = src[ofsi + 1];
#endif
}
-static DATA_T resample_lagrange_float(const sample_t *srci, splen_t ofs, resample_rec_t *rec)
+static inline DATA_T resample_lagrange_float(const sample_t *srci, splen_t ofs, resample_rec_t *rec)
{
const float *src = (const float*)srci;
const spos_t ofsi = ofs >> FRACTION_BITS;
double tmp;
for (i = 0; i < gauss_n; i += 8){
#if (USE_X86_EXT_INTRIN >= 9)
- __m256i vec32 = _mm256_cvtepi16_epi32(_mm256_loadu_si256((__m128i *)&sptr[i])); // low i16*8 > i32*8
+ __m256i vec32 = _mm256_cvtepi16_epi32(_mm_loadu_si128((__m128i *)&sptr[i])); // low i16*8 > i32*8
__m128i vec1 = _mm256_extracti128_si256(vec32, 0x0);
__m128i vec2 = _mm256_extracti128_si256(vec32, 0x1);
#else
FLOAT_T c,s = 0.0, va = 0.0, vb = 0.0;
if(rec->mode == RESAMPLE_MODE_BIDIR_LOOP){
- //FLOAT_T v1 = src[ofsi];
- //FLOAT_T v2 = src[ofsi + 1];
- //return (v1 + (v2 - v1) * fp) * OUT_INT32;
#if defined(DATA_T_DOUBLE) || defined(DATA_T_FLOAT)
FLOAT_T v1 = src[ofsi];
FLOAT_T v2 = src[ofsi + 1];
width *= 2;
#if (USE_X86_EXT_INTRIN >= 8) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
if(width >= 16 && !(width & 0x7)){
- __m256d sum = _mm256_set_pd(0, 0, 0, 0);
+ __m256d sum = _mm256_setzero_pd();
__m128d sum1, sum2;
for (i = 0; i < width; i += 8){
#if (USE_X86_EXT_INTRIN >= 9)
- __m256i vec32 = _mm256_cvtepi16_epi32(_mm256_loadu_si256((__m128i *)&v1[i])); // low i16*8 > i32*8
+ __m256i vec32 = _mm256_cvtepi16_epi32(_mm_loadu_si128((__m128i *)&v1[i])); // low i16*8 > i32*8
__m128i vec1 = _mm256_extracti128_si256(vec32, 0x0);
__m128i vec2 = _mm256_extracti128_si256(vec32, 0x1);
#else
sum1 = _mm_add_pd(sum1, _mm_shuffle_pd(sum1, sum1, 0x1)); // v0=v0+v1 v1=v1+v0
_mm_store_sd(&sample_sum, sum1);
}else
-#elif (USE_X86_EXT_INTRIN >= 6) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
- if(width >= 16 && !(width & 0x3)){
- __m128d sum = _mm_set_pd(0, 0);
- for (i = 0; i < width; i += 4){
- __m128i vec32l = _mm_cvtepi16_epi32(_mm_loadu_si128((__m128i *)&v1[i])); // low i16*4 > i32*4
- __m128d vecd0 = _mm_cvtepi32_pd(vec32l); // low low i32*2 > d*2
- __m128d vecd2 = _mm_cvtepi32_pd(_mm_shuffle_epi32(vec32l, 0x4e)); // low hi i32*2 > d*2
- sum = MM_FMA_PD(vecd0, _mm_load_pd(&coef[i]), sum);
- sum = MM_FMA_PD(vecd2, _mm_load_pd(&coef[i + 2]), sum);
- }
- sum = _mm_add_pd(sum, _mm_shuffle_pd(sum, sum, 0x1)); // v0=v0+v1 v1=v1+v0
- _mm_store_sd(&sample_sum, sum);
- }else
#elif (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
if(width >= 16 && !(width & 0x3)){
- __m128d sum1 = _mm_set_pd(0, 0);
- __m128d sum2 = _mm_set_pd(0, 0);
+ __m128d sum1 = _mm_setzero_pd();
+ __m128d sum2 = _mm_setzero_pd();
for (i = 0; i < width; i += 4){
- __m128d vecd0 = _mm_set_pd(v1[i + 1], v1[i]);
- __m128d vecd2 = _mm_set_pd(v1[i + 3], v1[i + 2]);
+#if (USE_X86_EXT_INTRIN >= 6) // sse4.1 , _mm_ cvtepi16_epi32()
+ __m128i vi16 = _mm_loadu_si128((__m128i *)&v1[i]);
+ __m128i vi32 = _mm_cvtepi16_epi32(vi16);
+#else
+ __m128i vi32 = _mm_set_epi32(v1[i + 3], v1[i + 2], v1[i + 1], v1[i]);
+#endif
+ __m128d vecd0 = _mm_cvtepi32_pd(vi32);
+ __m128d vecd2 = _mm_cvtepi32_pd(_mm_shuffle_epi32(vi32, 0x4E)); // swap lo64 hi64
sum1 = MM_FMA_PD(vecd0, _mm_load_pd(&coef[i]), sum1);
sum2 = MM_FMA_PD(vecd2, _mm_load_pd(&coef[i + 2]), sum2);
}
width *= 2;
#if (USE_X86_EXT_INTRIN >= 8) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
if(width >= 16 && !(width & 0x7)){
- __m256d sum = _mm256_set_pd(0, 0, 0, 0);
+ __m256d sum = _mm256_setzero_pd();
__m128d sum1, sum2;
for (i = 0; i < width; i += 8){
#if (USE_X86_EXT_INTRIN >= 9)
}else
#elif (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
if(width >= 16 && !(width & 0x3)){
- __m128d sum1 = _mm_set_pd(0, 0);
- __m128d sum2 = _mm_set_pd(0, 0);
+ __m128d sum1 = _mm_setzero_pd();
+ __m128d sum2 = _mm_setzero_pd();
for (i = 0; i < width; i += 4){
__m128i vec32i0 = _mm_loadu_si128((__m128i *)&v1[i]);
sum1 = MM_FMA_PD(_mm_cvtepi32_pd(vec32i0), _mm_load_pd(&coef[i]), sum1);
width *= 2;
#if (USE_X86_EXT_INTRIN >= 8) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
if(width >= 16 && !(width & 0x7)){
- __m256d sum = _mm256_set_pd(0, 0, 0, 0);
+ __m256d sum = _mm256_setzero_pd();
__m128d sum1, sum2;
for (i = 0; i < width; i += 8){
__m256 vecf = _mm256_loadu_ps(&v1[i]);
}else
#elif (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
if(width >= 16 && !(width & 0x3)){
- __m128d sum1 = _mm_set_pd(0, 0);
- __m128d sum2 = _mm_set_pd(0, 0);
+ __m128d sum1 = _mm_setzero_pd();
+ __m128d sum2 = _mm_setzero_pd();
for (i = 0; i < width; i += 4){
__m128 vecf0 = _mm_loadu_ps(&v1[i]);
sum1 = MM_FMA_PD(_mm_cvtps_pd(vecf0), _mm_load_pd(&coef[i]), sum1);
width *= 2;
#if (USE_X86_EXT_INTRIN >= 8) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
if(width >= 16 && !(width & 0x7)){
- __m256d sum = _mm256_set_pd(0, 0, 0, 0);
+ __m256d sum = _mm256_setzero_pd();
__m128d sum1, sum2;
for (i = 0; i < width; i += 8){
sum = MM256_FMA_PD(_mm256_loadu_pd(&v1[i]), _mm256_load_pd(&coef[i]), sum);
}else
#elif (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
if(width >= 16 && !(width & 0x3)){
- __m128d sum1 = _mm_set_pd(0, 0);
- __m128d sum2 = _mm_set_pd(0, 0);
+ __m128d sum1 = _mm_setzero_pd();
+ __m128d sum2 = _mm_setzero_pd();
for (i = 0; i < width; i += 4){
sum1 = MM_FMA_PD(_mm_loadu_pd(&v1[i]), _mm_load_pd(&coef[i]), sum1);
sum2 = MM_FMA_PD(_mm_loadu_pd(&v1[i + 2]), _mm_load_pd(&coef[i + 2]), sum2);
/*************** optimize linear resample *****************/
#if defined(PRECALC_LOOPS)
-//#define LO_LOOP_CALC // interpolation sample loop calc
#define LO_OPTIMIZE_INCREMENT
static inline DATA_T resample_linear_single(Voice *vp)
{
-#ifdef LO_LOOP_CALC // interpolation sample loop calc
-/*
-\95â\8a®\93_\83\8b\81[\83v\90Ü\82è\95Ô\82µ\91Î\89\9e
-\82¾\82ª\8dÅ\93K\89»\82È\82Ì\82É\95\89\89×\82Ì\96â\91è\82ª\81E\81E
-SF2\8ed\97l\8f\80\8b\92(\83\8b\81[\83v\91O\8cã4\83T\83\93\83v\83\8b) \82Ü\82½\82Í PAT(\83\8b\81[\83v\91O\8cã1\83T\83\93\83v\83\8b\81H) \82Å\82 \82ê\82Î\82»\82à\82»\82à\95s\97v\82È\82à\82Ì
-*/
- sample_t *src = vp->sample->data;
- const resample_rec_t *resrc = &vp->resrc;
- const fract_t ofsf = resrc->offset & FRACTION_MASK;
- const spos_t ofsls = resrc->loop_start >> FRACTION_BITS;
- const spos_t ofsle = resrc->loop_end >> FRACTION_BITS;
- const spos_t ofsi = resrc->offset >> FRACTION_BITS;
- spos_t ofsi2 = ofsi + 1;
- int32 v1, v2;
-
- switch(resrc->mode){
- case RESAMPLE_MODE_PLAIN:
- // safe end+128 sample
- break;
- case RESAMPLE_MODE_LOOP:
- if(ofsi2 >= ofsle)
- ofsi2 = ofsi2 - (ofsle - ofsls);
- break;
- case RESAMPLE_MODE_BIDIR_LOOP:
- if(resrc->increment >= 0){
- if(ofsi2 >= ofsle)
- ofsi2 = (ofsle << 1) - ofsi2;
- }
- break;
- }
- v1 = src[ofsi];
- v2 = src[ofsi2];
-#if defined(DATA_T_DOUBLE) || defined(DATA_T_FLOAT)
- return ((FLOAT_T)v1 + (FLOAT_T)(v2 - v1) * (FLOAT_T)ofsf * div_fraction) * OUT_INT16;
-#else // DATA_T_IN32
- return (v1 + imuldiv_fraction((v2 - v1), ofsf);
-#endif
-#else
sample_t *src = vp->sample->data;
const fract_t ofsf = vp->resrc.offset & FRACTION_MASK;
const spos_t ofsi = vp->resrc.offset >> FRACTION_BITS;
#else // DATA_T_IN32
return (v1 + imuldiv_fraction((v2 - v1), ofsf));
#endif
-#endif // LO_LOOP_CALC
}
-#if 0// (USE_X86_EXT_INTRIN >= 9)
+#if (USE_X86_EXT_INTRIN >= 9)
// offset:int32*8, resamp:float*8
// \83\8b\81[\83v\93à\95\94\82Ìoffset\8cv\8eZ\82ðint32\92l\88æ\82É\82·\82é , (sample_increment * (req_count+1)) < int32 max
static inline DATA_T *resample_linear_multi(Voice *vp, DATA_T *dest, int32 req_count, int32 *out_count)
sample_t *src = vp->sample->data + (prec_offset >> FRACTION_BITS);
int32 start_offset = (int32)(resrc->offset - prec_offset); // (offset\8cv\8eZ\82ðint32\92l\88æ\82É\82·\82é(SIMD\97p
int32 inc = resrc->increment;
- __m256i vint = _mm256_set_epi32(inc * 7, inc * 6, inc * 5, inc * 4, inc * 3, inc * 2, inc, 0)
+ __m256i vinit = _mm256_set_epi32(inc * 7, inc * 6, inc * 5, inc * 4, inc * 3, inc * 2, inc, 0);
__m256i vofs = _mm256_add_epi32(_mm256_set1_epi32(start_offset), vinit);
__m256i vinc = _mm256_set1_epi32(inc * 8), vfmask = _mm256_set1_epi32((int32)FRACTION_MASK);
__m256 vec_divo = _mm256_set1_ps(DIV_15BIT), vec_divf = _mm256_set1_ps(div_fraction);
for(i = 0; i < count; i += 8) {
__m256i vofsi1 = _mm256_srli_epi32(vofs, FRACTION_BITS);
__m256i vofsi2 = _mm256_add_epi32(vofsi1, vvar1);
- int32 ofs0 = _mm_cvtsi128_si32(_mm256_extracti128si256(vofsi1, 0x0));
- __m256i vin1 = _mm256_loadu_si256((__m256i *)&src[ofs0]); // int16*16
- __m256i vofsib = _mm256_permutevar8x32_epi32(vofsi1, _mm256_setzero_epi32());
+ int32 ofs0 = _mm_cvtsi128_si32(_mm256_extracti128_si256(vofsi1, 0x0));
+ __m128i vin1 = _mm_loadu_si128((__m128i *)&src[ofs0]); // int16*16
+ __m256i vofsib = _mm256_permutevar8x32_epi32(vofsi1, _mm256_setzero_si256());
__m256i vofsub1 = _mm256_sub_epi32(vofsi1, vofsib);
__m256i vofsub2 = _mm256_sub_epi32(vofsi2, vofsib);
__m256 vvf1 = _mm256_cvtepi32_ps(_mm256_cvtepi16_epi32(vin1)); // int16 to float (float\95Ï\8a·\82ÅH128bit\82Í\8fÁ\82¦\82é
dest += 8;
#else // DATA_T_IN32
__m256 vec_out = MM256_FMA_PS(_mm256_sub_ps(vv2, vv1), _mm256_mul_ps(vfp, vec_divf), vv1);
- _mm256_storeu_si256(__m256i *)dest, _mm256_cvtps_epi32(vec_out));
+ _mm256_storeu_si256((__m256i *)dest, _mm256_cvtps_epi32(vec_out));
dest += 8;
#endif
vofs = _mm256_add_epi32(vofs, vinc);
for(; i < count; i += 8) {
__m256i vofsi = _mm256_srli_epi32(vofs, FRACTION_BITS);
-#if !(defined(_MSC_VER) || defined(MSC_VER))
- int32 *ofsp = (int32 *)vofsi;
- __m128i vin1 = _mm_loadu_si128((__m128i *)&src[ofsp[0]]); // ofsi\82Æofsi+1\82ð\83\8d\81[\83h
- __m128i vin2 = _mm_loadu_si128((__m128i *)&src[ofsp[1]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
- __m128i vin3 = _mm_loadu_si128((__m128i *)&src[ofsp[2]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
- __m128i vin4 = _mm_loadu_si128((__m128i *)&src[ofsp[3]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
- __m128i vin5 = _mm_loadu_si128((__m128i *)&src[ofsp[4]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
- __m128i vin6 = _mm_loadu_si128((__m128i *)&src[ofsp[5]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
- __m128i vin7 = _mm_loadu_si128((__m128i *)&src[ofsp[6]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
- __m128i vin8 = _mm_loadu_si128((__m128i *)&src[ofsp[7]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
+#if 1
+ __m256i vsrc01 = MM256_I32GATHER_I32((const int*)src, vofsi, 2);
+ __m256i vsrc0 = _mm256_srai_epi32(_mm256_slli_epi32(vsrc01, 16), 16);
+ __m256i vsrc1 = _mm256_srai_epi32(vsrc01, 16);
+ __m256 vv1 = _mm256_cvtepi32_ps(vsrc0);
+ __m256 vv2 = _mm256_cvtepi32_ps(vsrc1);
#else
- __m128i vin1 = _mm_loadu_si128((__m128i *)&src[vofsi.m256i_i32[0]]); // ofsi\82Æofsi+1\82ð\83\8d\81[\83h
- __m128i vin2 = _mm_loadu_si128((__m128i *)&src[vofsi.m256i_i32[1]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
- __m128i vin3 = _mm_loadu_si128((__m128i *)&src[vofsi.m256i_i32[2]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
- __m128i vin4 = _mm_loadu_si128((__m128i *)&src[vofsi.m256i_i32[3]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
- __m128i vin5 = _mm_loadu_si128((__m128i *)&src[vofsi.m256i_i32[4]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
- __m128i vin6 = _mm_loadu_si128((__m128i *)&src[vofsi.m256i_i32[5]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
- __m128i vin7 = _mm_loadu_si128((__m128i *)&src[vofsi.m256i_i32[6]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
- __m128i vin8 = _mm_loadu_si128((__m128i *)&src[vofsi.m256i_i32[7]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
-#endif
+ __m128i vin1 = _mm_loadu_si128((__m128i *)&src[MM256_EXTRACT_I32(vofsi,0)]); // ofsi\82Æofsi+1\82ð\83\8d\81[\83h
+ __m128i vin2 = _mm_loadu_si128((__m128i *)&src[MM256_EXTRACT_I32(vofsi,1)]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
+ __m128i vin3 = _mm_loadu_si128((__m128i *)&src[MM256_EXTRACT_I32(vofsi,2)]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
+ __m128i vin4 = _mm_loadu_si128((__m128i *)&src[MM256_EXTRACT_I32(vofsi,3)]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
+ __m128i vin5 = _mm_loadu_si128((__m128i *)&src[MM256_EXTRACT_I32(vofsi,4)]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
+ __m128i vin6 = _mm_loadu_si128((__m128i *)&src[MM256_EXTRACT_I32(vofsi,5)]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
+ __m128i vin7 = _mm_loadu_si128((__m128i *)&src[MM256_EXTRACT_I32(vofsi,6)]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
+ __m128i vin8 = _mm_loadu_si128((__m128i *)&src[MM256_EXTRACT_I32(vofsi,7)]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
__m128i vin12 = _mm_unpacklo_epi16(vin1, vin2); // [v11v21]e96,[v12v22]e96 to [v11v12v21v22]e64
__m128i vin34 = _mm_unpacklo_epi16(vin3, vin4); // [v13v23]e96,[v14v24]e96 to [v13v14v23v24]e64
__m128i vin56 = _mm_unpacklo_epi16(vin5, vin6); // \93¯\82¶
__m128i vin78 = _mm_unpacklo_epi16(vin7, vin8); // \93¯\82¶
- __m128i vi1234 = _mm_unpacklo_epi32(vin12, vin34); // [v11v12,v21v22]e64,[v13v14,v23v24]e64 to [v11v12v13v14,v21v22v23v24]e0
- __m128i vi5678 = _mm_unpacklo_epi32(vin56, vin78); // [v15v16,v25v26]e64,[v17v18,v27v28]e64 to [v15v16v17v18,v25v26v27v28]e0
+ __m128i vin1234 = _mm_unpacklo_epi32(vin12, vin34); // [v11v12,v21v22]e64,[v13v14,v23v24]e64 to [v11v12v13v14,v21v22v23v24]e0
+ __m128i vin5678 = _mm_unpacklo_epi32(vin56, vin78); // [v15v16,v25v26]e64,[v17v18,v27v28]e64 to [v15v16v17v18,v25v26v27v28]e0
__m256i viall = MM256_SET2X_SI256(vin1234, vin5678); // 256bit =128bit+128bit
__m256i vsi16_1 = _mm256_permute4x64_epi64(viall, 0xD8); // v1\82ðL128bit\82É\82Ü\82Æ\82ß
__m256i vsi16_2 = _mm256_permute4x64_epi64(viall, 0x8D); // v2\82ðL128bit\82É\82Ü\82Æ\82ß
- __m256 vv1 = _mm256_cvtepi32_ps(_mm256_cvtepi16_epi32(vsi16_1)); // int16 to float (float\95Ï\8a·\82ÅH128bit\82Í\8fÁ\82¦\82é
- __m256 vv2 = _mm256_cvtepi32_ps(_mm256_cvtepi16_epi32(vsi16_2)); // int16 to float (float\95Ï\8a·\82ÅH128bit\82Í\8fÁ\82¦\82é
+ __m256 vv1 = _mm256_cvtepi32_ps(_mm256_cvtepi16_epi32(_mm256_extracti128_si256(vsi16_1, 0))); // int16 to float (float\95Ï\8a·\82ÅH128bit\82Í\8fÁ\82¦\82é
+ __m256 vv2 = _mm256_cvtepi32_ps(_mm256_cvtepi16_epi32(_mm256_extracti128_si256(vsi16_2, 0))); // int16 to float (float\95Ï\8a·\82ÅH128bit\82Í\8fÁ\82¦\82é
+#endif
__m256 vfp = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256(vofs, vfmask)), vec_divf);
#if defined(DATA_T_DOUBLE)
__m256 vec_out = _mm256_mul_ps(MM256_FMA_PS(_mm256_sub_ps(vv2, vv1), _mm256_mul_ps(vfp, vec_divf), vv1), vec_divo);
#endif
vofs = _mm256_add_epi32(vofs, vinc);
}
- resrc->offset = prec_offset + (splen_t)(vofs.m256i_i32[0]);
+ resrc->offset = prec_offset + (splen_t)(MM256_EXTRACT_I32(vofs, 0));
*out_count = i;
return dest;
}
dest += 4;
#elif defined(DATA_T_FLOAT) // DATA_T_FLOAT
__m128 vec_out = _mm_mul_ps(MM_FMA_PS(_mm_sub_ps(vv2, vv1), vfp, vv1), vec_divo);
- _mm256_storeu_ps(dest, vec_out);
+ _mm_storeu_ps(dest, vec_out);
dest += 4;
#else // DATA_T_IN32
__m128 vec_out = MM_FMA_PS(_mm_sub_ps(vv2, vv1), vfp, vv1);
}else
#if 0 // 2set
if(inc < opt_inc2){ // 2\83Z\83b\83g
- const __m128i vvar4 = _mm_set1_epi32(4);
+ const __m128i vvar3 = _mm_set1_epi32(3);
for(i = 0; i < count; i += 4) {
__m128i vofsi1 = _mm_srli_epi32(vofs, FRACTION_BITS);
__m128i vofsi2 = _mm_add_epi32(vofsi1, vvar1);
__m128i vofsib = _mm_shuffle_epi32(vofsi1, 0x0);
__m128i vofsub1 = _mm_sub_epi32(vofsi1, vofsib);
__m128i vofsub2 = _mm_sub_epi32(vofsi2, vofsib);
- __m128i vrmg1 = _mm_cmpgt_epi32(vofsub1, vvar4); // \83I\83t\83Z\83b\83g\8d·\82ª4\92´\89ß\82Ì\8fð\8c\8f\82Å\83}\83X\83N\8dì\90¬
- __m128i vrmg2 = _mm_cmpgt_epi32(vofsub2, vvar4); // \83I\83t\83Z\83b\83g\8d·\82ª4\92´\89ß\82Ì\8fð\8c\8f\82Å\83}\83X\83N\8dì\90¬
- __m128i vrme1 = _mm_cmpeq_epi32(vofsub1, vvar4); // \83I\83t\83Z\83b\83g\8d·\82ª4\93¯\93\99\82Ì\8fð\8c\8f\82Å\83}\83X\83N\8dì\90¬
- __m128i vrme2 = _mm_cmpeq_epi32(vofsub2, vvar4); // \83I\83t\83Z\83b\83g\8d·\82ª4\93¯\93\99\82Ì\8fð\8c\8f\82Å\83}\83X\83N\8dì\90¬
- __m128i vrm1 = _mm_or_si128(vrmg1, vrme1); // 4\88È\8fã\82É\82·\82é\82½\82ß\82É\83}\83X\83N\8d\87\90¬
- __m128i vrm2 = _mm_or_si128(vrmg2, vrme2); // 4\88È\8fã\82É\82·\82é\82½\82ß\82É\83}\83X\83N\8d\87\90¬
+ __m128i vrm1 = _mm_cmpgt_epi32(vofsub1, vvar3); // \83I\83t\83Z\83b\83g\8d·\82ª4\88È\8fã\82Ì\8fð\8c\8f\82Å\83}\83X\83N\8dì\90¬
+ __m128i vrm2 = _mm_cmpgt_epi32(vofsub2, vvar3); // \83I\83t\83Z\83b\83g\8d·\82ª4\88È\8fã\82Ì\8fð\8c\8f\82Å\83}\83X\83N\8dì\90¬
// src2 offset\82ª\89º\88Ê2bit\82Ì\82Ý\97L\8cø\82Å\82 \82ê\82Î4\82ð\92´\82¦\82é\95\94\95ª\82É\83}\83X\83N\95s\97v\82Ì\82Í\82¸
__m128 vv11 = _mm_permutevar_ps(vvf1, vofsub1); // v1 ofsi
__m128 vv12 = _mm_permutevar_ps(vvf2, vofsub1); // v1 ofsi
}else
#endif // 2set
-// x86\82¾\82Æ\82Ù\82Æ\82ñ\82Ç\95Ï\82í\82ç\82È\82¢ x64\82¾\82Æ\82â\82â\91¬\82¢
+// x86\82¾\82Æ\82Ù\82Æ\82ñ\82Ç\95Ï\82í\82ç\82È\82¢ x64\82¾\82Æ\82â\82â\91¬\82¢ 1.5%\81E\81E
#elif (USE_X86_EXT_INTRIN >= 5) && defined(IX64CPU)
// \8dÅ\93K\89»\83\8c\81[\83g = (\83\8d\81[\83h\83f\81[\83^\90\94 - \8f\89\8aú\83I\83t\83Z\83b\83g\8f¬\90\94\95\94\82Ì\8dÅ\91å\92l(1\96¢\96\9e) - \95â\8aÔ\83|\83C\83\93\83g\90\94(linear\82Í1) ) / \83I\83t\83Z\83b\83g\83f\81[\83^\90\94
// \83\8d\81[\83h\83f\81[\83^\90\94\82Í_mm_shuffle_epi8\88µ\82¦\82é\82Ìint16\82Ì8\83Z\83b\83g\82É\82È\82é (=int8*16)
#endif
vofs = _mm_add_epi32(vofs, vinc);
}
- }
+ }else
#endif
#endif // LO_OPTIMIZE_INCREMENT
const __m128 vec_divo = _mm_set1_ps(DIV_15BIT);
for(; i < count; i += 4) {
__m128i vofsi = _mm_srli_epi32(vofs, FRACTION_BITS);
-#if !(defined(_MSC_VER) || defined(MSC_VER))
- int32 *ofsp = (int32 *)vofsi;
- __m128i vin1 = _mm_loadu_si128((__m128i *)&src[ofsp[0]]); // ofsi\82Æofsi+1\82ð\83\8d\81[\83h
- __m128i vin2 = _mm_loadu_si128((__m128i *)&src[ofsp[1]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
- __m128i vin3 = _mm_loadu_si128((__m128i *)&src[ofsp[2]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
- __m128i vin4 = _mm_loadu_si128((__m128i *)&src[ofsp[3]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
-#else
- __m128i vin1 = _mm_loadu_si128((__m128i *)&src[vofsi.m128i_i32[0]]); // ofsi\82Æofsi+1\82ð\83\8d\81[\83h
- __m128i vin2 = _mm_loadu_si128((__m128i *)&src[vofsi.m128i_i32[1]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
- __m128i vin3 = _mm_loadu_si128((__m128i *)&src[vofsi.m128i_i32[2]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
- __m128i vin4 = _mm_loadu_si128((__m128i *)&src[vofsi.m128i_i32[3]]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
-#endif
+ __m128i vin1 = _mm_loadu_si128((__m128i *)&src[MM_EXTRACT_I32(vofsi,0)]); // ofsi\82Æofsi+1\82ð\83\8d\81[\83h
+ __m128i vin2 = _mm_loadu_si128((__m128i *)&src[MM_EXTRACT_I32(vofsi,1)]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
+ __m128i vin3 = _mm_loadu_si128((__m128i *)&src[MM_EXTRACT_I32(vofsi,2)]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
+ __m128i vin4 = _mm_loadu_si128((__m128i *)&src[MM_EXTRACT_I32(vofsi,3)]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
__m128i vin12 = _mm_unpacklo_epi16(vin1, vin2); // [v11v21]e96,[v12v22]e96 to [v11v12v21v22]e64
__m128i vin34 = _mm_unpacklo_epi16(vin3, vin4); // [v13v23]e96,[v14v24]e96 to [v13v14v23v24]e64
__m128i vi16 = _mm_unpacklo_epi32(vin12, vin34); // [v11v12,v21v22]e64,[v13v14,v23v24]e64 to [v11v12v13v14,v21v22v23v24]e0
vofs = _mm_add_epi32(vofs, vinc);
}
}
- resrc->offset = prec_offset + (splen_t)(vofs.m128i_i32[0]);
+ resrc->offset = prec_offset + (splen_t)(MM_EXTRACT_I32(vofs,0));
*out_count = i;
return dest;
}
vv2 = _mm_cvt_si2ss(vv2, src[++ofsi]), vv2 = _mm_shuffle_ps(vv2, vv2, 0x1b);
#if defined(DATA_T_DOUBLE)
vec_out = _mm_mul_ps(MM_FMA_PS(_mm_sub_ps(vv2, vv1), _mm_mul_ps(vfp, vec_divf), vv1), vec_divo);
-#if !(defined(_MSC_VER) || defined(MSC_VER))
- {
- float *out = (float *)vec_out;
- *dest++ = (DATA_T)out[0];
- *dest++ = (DATA_T)out[1];
- *dest++ = (DATA_T)out[2];
- *dest++ = (DATA_T)out[3];
- }
-#else
- *dest++ = (DATA_T)vec_out.m128_f32[0];
- *dest++ = (DATA_T)vec_out.m128_f32[1];
- *dest++ = (DATA_T)vec_out.m128_f32[2];
- *dest++ = (DATA_T)vec_out.m128_f32[3];
-#endif
+ *dest++ = (DATA_T)MM_EXTRACT_F32(vec_out,0);
+ *dest++ = (DATA_T)MM_EXTRACT_F32(vec_out,1);
+ *dest++ = (DATA_T)MM_EXTRACT_F32(vec_out,2);
+ *dest++ = (DATA_T)MM_EXTRACT_F32(vec_out,3);
#elif defined(DATA_T_FLOAT) // DATA_T_FLOAT
_mm_storeu_ps(dest, _mm_mul_ps(MM_FMA_PS(_mm_sub_ps(vv2, vv1), _mm_mul_ps(vfp, vec_divf), vv1), vec_divo));
dest += 4;
}
#else // normal
-
+// \83\8b\81[\83v\93à\95\94\82Ìoffset\8cv\8eZ\82ðint32\92l\88æ\82É\82·\82é , (sample_increment * (req_count+1)) < int32 max
static inline DATA_T *resample_linear_multi(Voice *vp, DATA_T *dest, int32 req_count, int32 *out_count)
{
int32 i;
- resample_rec_t *resrc = &vp->resrc;
- sample_t *src = vp->sample->data;
+ resample_rec_t *resrc = &vp->resrc;
+ splen_t prec_offset = resrc->offset & INTEGER_MASK;
+ sample_t *src = vp->sample->data + (prec_offset >> FRACTION_BITS);
+ const int32 start_offset = (int32)(resrc->offset - prec_offset); // offset\8cv\8eZ\82ðint32\92l\88æ\82É\82·\82é(SIMD\97p
+ int32 ofs = (int32)(resrc->offset & FRACTION_MASK);
+ const int32 inc = resrc->increment;
for(i = 0; i < req_count; i++) {
- spos_t ofsi = resrc->offset >> FRACTION_BITS;
- fract_t ofsf = resrc->offset & FRACTION_MASK;
+ int32 ofsi = ofs >> FRACTION_BITS;
+ int32 ofsf = ofs & FRACTION_MASK;
int32 v1 = src[ofsi];
int32 v2 = src[ofsi + 1];
// *dest++ = ((FLOAT_T)v1 + (FLOAT_T)(v2 - v1) * (FLOAT_T)ofsf * div_fraction) * OUT_INT16;
#else
*dest++ = (v1 + imuldiv_fraction((v2 - v1), ofsf);
#endif
- resrc->offset += resrc->increment;
+ ofs += inc;
}
+ resrc->offset = prec_offset + (splen_t)ofs;
*out_count = i;
return dest;
}
#endif
-
-
static void lo_rs_plain(Voice *vp, DATA_T *dest, int32 count)
{
/* Play sample until end, then free the voice. */
resample_rec_t *resrc = &vp->resrc;
int32 i = 0, j;
-#ifdef LO_LOOP_CALC
- resrc->mode = RESAMPLE_MODE_PLAIN;
-#endif
if (resrc->increment < 0) resrc->increment = -resrc->increment; /* In case we're coming out of a bidir loop */
j = PRECALC_LOOP_COUNT(resrc->offset, resrc->data_length, resrc->increment) + 2; // safe end+128 sample
if (j > count) {j = count;}
resample_rec_t *resrc = &vp->resrc;
int32 i = 0, j;
-#ifdef LO_LOOP_CALC
- resrc->mode = RESAMPLE_MODE_LOOP;
-#endif
j = PRECALC_LOOP_COUNT(resrc->offset, resrc->loop_end, resrc->increment) - 2; // 2point interpolation
if (j > count) {j = count;}
else if(j < 0) {j = 0;}
resample_rec_t *resrc = &vp->resrc;
int32 i = 0, j = 0;
-#ifdef LO_LOOP_CALC
- resrc->mode = RESAMPLE_MODE_BIDIR_LOOP;
-#endif
if (resrc->increment > 0){
j = PRECALC_LOOP_COUNT(resrc->offset, resrc->loop_end, resrc->increment) - 2; // 2point interpolation
if (j > count) {j = count;}
lo_rs_loop(vp, ptr, count); /* loop */
}
}
-
#endif /* optimize linear resample */
+/*************** optimize linear float resample *****************/
+#if defined(PRECALC_LOOPS)
+#define LO_OPTIMIZE_INCREMENT
+static inline DATA_T resample_linear_float_single(Voice *vp)
+{
+ const float *src = (const float*)vp->sample->data;
+ const fract_t ofsf = vp->resrc.offset & FRACTION_MASK;
+ const spos_t ofsi = vp->resrc.offset >> FRACTION_BITS;
+#if defined(DATA_T_DOUBLE) || defined(DATA_T_FLOAT)
+ FLOAT_T v1 = src[ofsi], v2 = src[ofsi + 1], fp = ofsf;
+ return (v1 + (v2 - v1) * fp * div_fraction); // FLOAT_T
+#else // DATA_T_IN32
+ int32 v1 = (int32)(src[ofsi] * M_16BIT), v2 = (int32)(src[ofsi + 1] * M_16BIT);
+ return v1 + imuldiv_fraction(v2 - v1, ofsf);
+#endif
+}
-
-/*************** resampling with fixed increment *****************/
-///r
-static void rs_plain_c(int v, DATA_T *ptr, int32 count)
+#if (USE_X86_EXT_INTRIN >= 3)
+// offset:int32*4, resamp:float*4
+// \83\8b\81[\83v\93à\95\94\82Ìoffset\8cv\8eZ\82ðint32\92l\88æ\82É\82·\82é , (sample_increment * (req_count+1)) < int32 max
+static inline DATA_T *resample_linear_float_multi(Voice *vp, DATA_T *dest, int32 req_count, int32 *out_count)
{
- Voice *vp = &voice[v];
- DATA_T *dest = ptr + vp->resrc.buffer_offset;
- cache_t *src = (cache_t *)vp->sample->data;
- int32 count2 = count;
- splen_t ofs, i, le;
-
- le = vp->sample->loop_end >> FRACTION_BITS;
- ofs = vp->resrc.offset >> FRACTION_BITS;
+ resample_rec_t *resrc = &vp->resrc;
+ int32 i = 0;
+ const uint32 req_count_mask = ~(0x3);
+ const int32 count = req_count & req_count_mask;
+ splen_t prec_offset = resrc->offset & INTEGER_MASK;
+ float *src = (float *)vp->sample->data + (prec_offset >> FRACTION_BITS);
+ const int32 start_offset = (int32)(resrc->offset - prec_offset); // offset\8cv\8eZ\82ðint32\92l\88æ\82É\82·\82é(SIMD\97p
+ const int32 inc = resrc->increment;
+ __m128i vofs = _mm_add_epi32(_mm_set1_epi32(start_offset), _mm_set_epi32(inc * 3, inc * 2, inc, 0));
+ const __m128i vinc = _mm_set1_epi32(inc * 4), vfmask = _mm_set1_epi32((int32)FRACTION_MASK);
+ const __m128 vec_divf = _mm_set1_ps(div_fraction);
+ const __m128 vec_divo = _mm_set1_ps(M_15BIT);
+ for(; i < count; i += 4) {
+ __m128i vofsi = _mm_srli_epi32(vofs, FRACTION_BITS);
+ __m128 vin1 = _mm_loadu_ps(&src[MM_EXTRACT_I32(vofsi,0)]); // ofsi\82Æofsi+1\82ð\83\8d\81[\83h [v11v12v13v14]
+ __m128 vin2 = _mm_loadu_ps(&src[MM_EXTRACT_I32(vofsi,1)]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶ [v21v22v23v24]
+ __m128 vin3 = _mm_loadu_ps(&src[MM_EXTRACT_I32(vofsi,2)]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶ [v31v32v33v34]
+ __m128 vin4 = _mm_loadu_ps(&src[MM_EXTRACT_I32(vofsi,3)]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶ [v41v42v43v44]
+ __m128 vin12 = _mm_shuffle_ps(vin1, vin2, 0x44); // [v11,v12,v21,v22]
+ __m128 vin34 = _mm_shuffle_ps(vin3, vin4, 0x44); // [v31,v32,v41,v42]
+ __m128 vv1 = _mm_shuffle_ps(vin12, vin34, 0x88); // [v11,v21,v31,v41]
+ __m128 vv2 = _mm_shuffle_ps(vin12, vin34, 0xDD); // [v12,v22,v32,v42]
+ __m128 vfp = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128(vofs, vfmask)), vec_divf);
+ __m128 vec_out = MM_FMA_PS(_mm_sub_ps(vv2, vv1), vfp, vv1);
+#if defined(DATA_T_DOUBLE)
+#if (USE_X86_EXT_INTRIN >= 8)
+ _mm256_storeu_pd(dest, _mm256_cvtps_pd(vec_out));
+ dest += 4;
+#else
+ _mm_storeu_pd(dest, _mm_cvtps_pd(vec_out));
+ dest += 2;
+ _mm_storeu_pd(dest, _mm_cvtps_pd(_mm_movehl_ps(vec_out, vec_out)));
+ dest += 2;
+#endif
+#elif defined(DATA_T_FLOAT) // DATA_T_FLOAT
+ _mm_storeu_ps(dest, vec_out);
+ dest += 4;
+#else // DATA_T_IN32
+ _mm_storeu_si128((__m128i *)dest, _mm_cvtps_epi32(_mm_mul_ps(vec_out, vec_divo)));
+ dest += 4;
+#endif
+ vofs = _mm_add_epi32(vofs, vinc);
+ }
+ resrc->offset = prec_offset + (splen_t)(MM_EXTRACT_I32(vofs,0));
+ *out_count = i;
+ return dest;
+}
- i = ofs + count2;
- if(i > le)
- i = le;
- count2 = i - ofs;
+#else // normal
+// \83\8b\81[\83v\93à\95\94\82Ìoffset\8cv\8eZ\82ðint32\92l\88æ\82É\82·\82é , (sample_increment * (req_count+1)) < int32 max
+static inline DATA_T *resample_linear_float_multi(Voice *vp, DATA_T *dest, int32 req_count, int32 *out_count)
+{
+ int32 i;
+ resample_rec_t *resrc = &vp->resrc;
+ splen_t prec_offset = resrc->offset & INTEGER_MASK;
+ float *src = (float *)vp->sample->data + (prec_offset >> FRACTION_BITS);
+ const int32 start_offset = (int32)(resrc->offset - prec_offset); // offset\8cv\8eZ\82ðint32\92l\88æ\82É\82·\82é(SIMD\97p
+ int32 ofs = (int32)(resrc->offset & FRACTION_MASK);
+ const int32 inc = resrc->increment;
- for (i = 0; i < count2; i++) {
- dest[i] = src[i + ofs];
+ for(i = 0; i < req_count; i++) {
+ int32 ofsi = ofs >> FRACTION_BITS;
+ int32 ofsf = ofs & FRACTION_MASK;
+#if defined(DATA_T_DOUBLE) || defined(DATA_T_FLOAT)
+ FLOAT_T v1 = src[ofsi], v2 = src[ofsi + 1], fp = (ofsf & FRACTION_MASK);
+ *dest++ = (v1 + (v2 - v1) * fp * div_fraction); // FLOAT_T
+#else
+ int32 v1 = (int32)(src[ofsi] * M_16BIT), v2 = (int32)(src[ofsi + 1] * M_16BIT);
+ *dest++ = v1 + imuldiv_fraction(v2 - v1, ofsf);
+#endif
+ ofs += inc;
}
- for (; i < count; i++) {
- vp->finish_voice = 1;
- dest[i] = 0;
- }
- ofs += count2;
- vp->resrc.offset = ofs << FRACTION_BITS;
+ resrc->offset = prec_offset + (splen_t)ofs;
+ *out_count = i;
+ return dest;
}
-///r
-static void rs_plain(int v, DATA_T *ptr, int32 count)
-{
- /* Play sample until end, then free the voice. */
- Voice *vp = &voice[v];
- DATA_T *dest = ptr;
- sample_t *src = vp->sample->data;
- int data_type = vp->sample->data_type;
- splen_t
- ofs = vp->resrc.offset,
- ls = 0,
- le = vp->sample->data_length;
- int32 incr = vp->resrc.increment;
-#ifdef PRECALC_LOOPS
- int32 i = 0, j;
#endif
- if(vp->cache && incr == (1 << FRACTION_BITS)){
- rs_plain_c(v, ptr, count);
- return;
- }
+static void lo_rs_plain_float(Voice *vp, DATA_T *dest, int32 count)
+{
+ /* Play sample until end, then free the voice. */
+ resample_rec_t *resrc = &vp->resrc;
+ int32 i = 0, j;
-#ifdef PRECALC_LOOPS
- if (incr < 0) incr = -incr; /* In case we're coming out of a bidir loop */
- /* Precalc how many times we should go through the loop.
- NOTE: Assumes that incr > 0 and that ofs <= le */
- j = PRECALC_LOOP_COUNT(ofs, le, incr);
- if (j > count) {j = count;}
+ if (resrc->increment < 0) resrc->increment = -resrc->increment; /* In case we're coming out of a bidir loop */
+ j = PRECALC_LOOP_COUNT(resrc->offset, resrc->data_length, resrc->increment) + 2; // safe end+128 sample
+ if (j > count) {j = count;}
else if(j < 0) {j = 0;}
- for(i = 0; i < j; i++) {
- RESAMPLATION;
- ofs += incr;
- }
- for (; i < count; i++) {
- *dest++ = 0;
- vp->finish_voice = 1;
- }
-#else /* PRECALC_LOOPS */
- while (count--)
- {
- if (ofs >= le){
- *dest++ = 0;
- vp->finish_voice = 1;
- }else {
- RESAMPLATION;
- ofs += incr;
- }
+ dest = resample_linear_float_multi(vp, dest, j, &i);
+ for(; i < j; i++) {
+ *dest++ = resample_linear_float_single(vp);
+ resrc->offset += resrc->increment;
}
-#endif /* PRECALC_LOOPS */
-
- vp->resrc.offset = ofs; /* Update offset */
+ for(; i < count; i++) { *dest++ = 0; vp->finish_voice = 1;}
}
-static void rs_loop_c(Voice *vp, DATA_T *ptr, int32 count)
-{
- splen_t
- ofs = vp->resrc.offset >> FRACTION_BITS,
- le = vp->sample->loop_end >> FRACTION_BITS,
- ll = le - (vp->sample->loop_start >> FRACTION_BITS);
- DATA_T *dest = ptr;
- cache_t *src = (cache_t *)vp->sample->data;
- int32 i, j;
+static void lo_rs_loop_float(Voice *vp, DATA_T *dest, int32 count)
+{
+ /* Play sample until end-of-loop, skip back and continue. */
+ resample_rec_t *resrc = &vp->resrc;
+ int32 i = 0, j;
+
+ j = PRECALC_LOOP_COUNT(resrc->offset, resrc->loop_end, resrc->increment) - 2; // 2point interpolation
+ if (j > count) {j = count;}
+ else if(j < 0) {j = 0;}
+ dest = resample_linear_float_multi(vp, dest, j, &i);
+ for(; i < count; i++) {
+ *dest++ = resample_linear_float_single(vp);
+ if((resrc->offset += resrc->increment) >= resrc->loop_end)
+ resrc->offset -= resrc->loop_end - resrc->loop_start;
+ /* Hopefully the loop is longer than an increment. */
+ }
+}
-// ERROR loop_start = 4215529472
- if(ll < 0)
- {
- vp->sample->loop_start = 0;
- ll = le - (vp->sample->loop_start >> FRACTION_BITS);
- }
+static void lo_rs_bidir_float(Voice *vp, DATA_T *dest, int32 count)
+{
+ resample_rec_t *resrc = &vp->resrc;
+ int32 i = 0, j = 0;
- while(count){
- while(ofs >= le)
- ofs -= ll;
+ if (resrc->increment > 0){
+ j = PRECALC_LOOP_COUNT(resrc->offset, resrc->loop_end, resrc->increment) - 2; // 2point interpolation
+ if (j > count) {j = count;}
+ else if(j < 0) {j = 0;}
+ dest = resample_linear_float_multi(vp, dest, j, &i);
+ }
+ for(; i < count; i++) {
+ *dest++ = resample_linear_float_single(vp);
+ resrc->offset += resrc->increment;
+ if(resrc->increment > 0){
+ if(resrc->offset >= resrc->loop_end){
+ resrc->offset = (resrc->loop_end << 1) - resrc->offset;
+ resrc->increment = -resrc->increment;
+ }
+ }else{
+ if(resrc->offset <= resrc->loop_start){
+ resrc->offset = (resrc->loop_start << 1) - resrc->offset;
+ resrc->increment = -resrc->increment;
+ }
+ }
+ }
+}
+
+static inline void resample_voice_linear_float_optimize(Voice *vp, DATA_T *ptr, int32 count)
+{
+ int mode = vp->sample->modes;
+
+ if(vp->resrc.plain_flag){ /* no loop */ /* else then loop */
+ lo_rs_plain_float(vp, ptr, count); /* no loop */
+ }else if(!(mode & MODES_ENVELOPE) && (vp->status & (VOICE_OFF | VOICE_DIE))){ /* no env */
+ vp->resrc.plain_flag = 1; /* lock no loop */
+ lo_rs_plain_float(vp, ptr, count); /* no loop */
+ }else if(mode & MODES_RELEASE && (vp->status & VOICE_OFF)){ /* release sample */
+ vp->resrc.plain_flag = 1; /* lock no loop */
+ lo_rs_plain_float(vp, ptr, count); /* no loop */
+ }else if(mode & MODES_PINGPONG){ /* Bidirectional */
+ lo_rs_bidir_float(vp, ptr, count); /* Bidirectional loop */
+ }else {
+ lo_rs_loop_float(vp, ptr, count); /* loop */
+ }
+}
+#endif /* optimize linear float resample */
+
+/*************** optimize lagrange resample ***********************/
+#if defined(PRECALC_LOOPS)
+#define LAO_OPTIMIZE_INCREMENT
+
+#if 0 // timidity41-eddb86e
+#if USE_X86_EXT_INTRIN >= 8
+
+// caller must check offsets to ensure lagrange interpolation is applicable
+// TODO: use newton interpolation
+static DATA_T *resample_multi_lagrange_m256(Voice *vp, DATA_T *dest, int32 *i, int32 count)
+{
+ resample_rec_t *resrc = &vp->resrc;
+ spos_t ofsls = resrc->loop_start >> FRACTION_BITS;
+ spos_t ofsle = resrc->loop_end >> FRACTION_BITS;
+ spos_t ofsend = resrc->data_length >> FRACTION_BITS;
+
+ splen_t prec_offset = (resrc->offset & INTEGER_MASK) - (1 << FRACTION_BITS);
+ sample_t *src = vp->sample->data + (prec_offset >> FRACTION_BITS);
+ int32 start_offset = (int32)(resrc->offset - prec_offset); // (offset\8cv\8eZ\82ðint32\92l\88æ\82É\82·\82é(SIMD\97p
+
+ __m256i vindices = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
+ __m256i vofs = _mm256_add_epi32(_mm256_set1_epi32(start_offset), _mm256_mullo_epi32(vindices, _mm256_set1_epi32(resrc->increment)));
+ __m256i vofsi = _mm256_srai_epi32(vofs, FRACTION_BITS);
+
+ // src[ofsi-1], src[ofsi]
+ __m256i vinm10 = MM256_I32GATHER_I32((const int *)src, _mm256_sub_epi32(vofsi, _mm256_set1_epi32(1)), 2);
+ // src[ofsi+1], src[ofsi+2]
+ __m256i vin12 = MM256_I32GATHER_I32((const int *)src, _mm256_add_epi32(vofsi, _mm256_set1_epi32(1)), 2);
+
+ // (int32)src[ofsi-1]
+ __m256i vinm1 = _mm256_srai_epi32(_mm256_slli_epi32(vinm10, 16), 16);
+ // (int32)src[ofsi]
+ __m256i vin0 = _mm256_srai_epi32(vinm10, 16);
+ // (int32)src[ofsi+1]
+ __m256i vin1 = _mm256_srai_epi32(_mm256_slli_epi32(vin12, 16), 16);
+ // (int32)src[ofsi+2]
+ __m256i vin2 = _mm256_srai_epi32(vin12, 16);
+
+ __m256 vec_divf = _mm256_set1_ps(div_fraction);
+
+ // (float)(ofs - ofsi)
+ __m256 vfofsf = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_and_si256(vofs, _mm256_set1_epi32(FRACTION_MASK))), vec_divf);
+
+ // (float)(int32)src[ofsi-1]
+ __m256 vfinm1 = _mm256_cvtepi32_ps(vinm1);
+ // (float)(int32)src[ofsi]
+ __m256 vfin0 = _mm256_cvtepi32_ps(vin0);
+ // (float)(int32)src[ofsi+1]
+ __m256 vfin1 = _mm256_cvtepi32_ps(vin1);
+ // (float)(int32)src[ofsi+2]
+ __m256 vfin2 = _mm256_cvtepi32_ps(vin2);
+
+ __m256 v1 = _mm256_set1_ps(1.0f);
+
+ // x - x1
+ __m256 vfofsfm1 = _mm256_add_ps(vfofsf, v1);
+ // x - x2
+ // __m256 vfofsf0 = vfofsf;
+
+ // x - x3
+ __m256 vfofsf1 = _mm256_sub_ps(vfofsf, v1);
+ // x - x4
+ __m256 vfofsf2 = _mm256_sub_ps(vfofsf1, v1);
+
+ // (x - x2)(x - x3)(x - x4) / (x1 - x2)(x1 - x3)(x1 - x4)
+ // = (x - x2)(x - x3)(x - x4) * (-1/6)
+ __m256 vfcoefm1 = _mm256_mul_ps(_mm256_mul_ps(vfofsf, vfofsf1), _mm256_mul_ps(vfofsf2, _mm256_set1_ps(-1.0f / 6.0f)));
+
+ // (x - x1)(x - x3)(x - x4) / (x2 - x1)(x2 - x3)(x2 - x4)
+ // = (x - x1)(x - x3)(x - x4) * (1/2)
+ __m256 vfcoef0 = _mm256_mul_ps(_mm256_mul_ps(vfofsfm1, vfofsf1), _mm256_mul_ps(vfofsf2, _mm256_set1_ps(1.0f / 2.0f)));
+
+ // (x - x1)(x - x2)(x - x4) / (x3 - x1)(x3 - x2)(x3 - x4)
+ // = (x - x1)(x - x2)(x - x4) * (-1/2)
+ __m256 vfcoef1 = _mm256_mul_ps(_mm256_mul_ps(vfofsfm1, vfofsf), _mm256_mul_ps(vfofsf2, _mm256_set1_ps(-1.0f / 2.0f)));
+
+ // (x - x1)(x - x2)(x - x3) / (x4 - x1)(x4 - x2)(x4 - x3)
+ // = (x - x1)(x - x2)(x - x3) * (1/6)
+ __m256 vfcoef2 = _mm256_mul_ps(_mm256_mul_ps(vfofsfm1, vfofsf), _mm256_mul_ps(vfofsf1, _mm256_set1_ps(1.0f / 6.0f)));
+
+#if USE_X86_EXT_INTRIN >= 9
+ __m256 vresult = _mm256_add_ps(
+ _mm256_fmadd_ps(vfinm1, vfcoefm1, _mm256_mul_ps(vfin0, vfcoef0)),
+ _mm256_fmadd_ps(vfin1, vfcoef1, _mm256_mul_ps(vfin2, vfcoef2))
+ );
+#else
+ __m256 vresult = _mm256_add_ps(
+ _mm256_add_ps(_mm256_mul_ps(vfinm1, vfcoefm1), _mm256_mul_ps(vfin0, vfcoef0)),
+ _mm256_add_ps(_mm256_mul_ps(vfin1, vfcoef1), _mm256_mul_ps(vfin2, vfcoef2))
+ );
+#endif
+
+#if defined(DATA_T_DOUBLE)
+ vresult = _mm256_mul_ps(vresult, _mm256_set1_ps(OUT_INT16));
+ _mm256_storeu_pd(dest, _mm256_cvtps_pd(_mm256_extractf128_ps(vresult, 0)));
+ _mm256_storeu_pd(dest + 4, _mm256_cvtps_pd(_mm256_extractf128_ps(vresult, 1)));
+#elif defined(DATA_T_FLOAT)
+ vresult = _mm256_mul_ps(vresult, _mm256_set1_ps(OUT_INT16));
+ _mm256_storeu_ps(dest, vresult);
+#else
+ _mm256_storeu_si256(dest, _mm256_cvtps_epi32(vresult));
+#endif
+
+ dest += 8;
+ resrc->offset += resrc->increment * 8;
+ *i += 8;
+ return dest;
+}
+
+#endif
+
+#if USE_X86_EXT_INTRIN >= 6
+
+// caller must check offsets to ensure lagrange interpolation is applicable
+// TODO: use newton interpolation
+static DATA_T *resample_multi_lagrange_m128(Voice *vp, DATA_T *dest, int32 *i, int32 count)
+{
+ resample_rec_t *resrc = &vp->resrc;
+ spos_t ofsls = resrc->loop_start >> FRACTION_BITS;
+ spos_t ofsle = resrc->loop_end >> FRACTION_BITS;
+ spos_t ofsend = resrc->data_length >> FRACTION_BITS;
+
+ splen_t prec_offset = (resrc->offset & INTEGER_MASK) - (1 << FRACTION_BITS);
+ sample_t *src = vp->sample->data + (prec_offset >> FRACTION_BITS);
+ int32 start_offset = (int32)(resrc->offset - prec_offset); // (offset\8cv\8eZ\82ðint32\92l\88æ\82É\82·\82é(SIMD\97p
+
+ __m128i vindices = _mm_set_epi32(3, 2, 1, 0);
+ __m128i vofs = _mm_add_epi32(_mm_set1_epi32(start_offset), _mm_mullo_epi32(vindices, _mm_set1_epi32(resrc->increment)));
+ __m128i vofsi = _mm_srai_epi32(vofs, FRACTION_BITS);
+
+ // src[ofsi-1], src[ofsi]
+ __m128i vinm10 = MM_I32GATHER_I32((const int *)src, _mm_sub_epi32(vofsi, _mm_set1_epi32(1)), 2);
+ // src[ofsi+1], src[ofsi+2]
+ __m128i vin12 = MM_I32GATHER_I32((const int *)src, _mm_add_epi32(vofsi, _mm_set1_epi32(1)), 2);
+
+ // (int32)src[ofsi-1]
+ __m128i vinm1 = _mm_srai_epi32(_mm_slli_epi32(vinm10, 16), 16);
+ // (int32)src[ofsi]
+ __m128i vin0 = _mm_srai_epi32(vinm10, 16);
+ // (int32)src[ofsi+1]
+ __m128i vin1 = _mm_srai_epi32(_mm_slli_epi32(vin12, 16), 16);
+ // (int32)src[ofsi+2]
+ __m128i vin2 = _mm_srai_epi32(vin12, 16);
+
+ __m128 vec_divf = _mm_set1_ps(div_fraction);
+
+ // (float)(ofs - ofsi)
+ __m128 vfofsf = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128(vofs, _mm_set1_epi32(FRACTION_MASK))), vec_divf);
+
+ // (float)(int32)src[ofsi-1]
+ __m128 vfinm1 = _mm_cvtepi32_ps(vinm1);
+ // (float)(int32)src[ofsi]
+ __m128 vfin0 = _mm_cvtepi32_ps(vin0);
+ // (float)(int32)src[ofsi+1]
+ __m128 vfin1 = _mm_cvtepi32_ps(vin1);
+ // (float)(int32)src[ofsi+2]
+ __m128 vfin2 = _mm_cvtepi32_ps(vin2);
+
+ __m128 v1 = _mm_set1_ps(1.0f);
+
+ // x - x1
+ __m128 vfofsfm1 = _mm_add_ps(vfofsf, v1);
+ // x - x2
+ // __m128 vfofsf0 = vfofsf;
+
+ // x - x3
+ __m128 vfofsf1 = _mm_sub_ps(vfofsf, v1);
+ // x - x4
+ __m128 vfofsf2 = _mm_sub_ps(vfofsf1, v1);
+
+ // (x - x2)(x - x3)(x - x4) / (x1 - x2)(x1 - x3)(x1 - x4)
+ // = (x - x2)(x - x3)(x - x4) * (-1/6)
+ __m128 vfcoefm1 = _mm_mul_ps(_mm_mul_ps(vfofsf, vfofsf1), _mm_mul_ps(vfofsf2, _mm_set1_ps(-1.0f / 6.0f)));
+
+ // (x - x1)(x - x3)(x - x4) / (x2 - x1)(x2 - x3)(x2 - x4)
+ // = (x - x1)(x - x3)(x - x4) * (1/2)
+ __m128 vfcoef0 = _mm_mul_ps(_mm_mul_ps(vfofsfm1, vfofsf1), _mm_mul_ps(vfofsf2, _mm_set1_ps(1.0f / 2.0f)));
+
+ // (x - x1)(x - x2)(x - x4) / (x3 - x1)(x3 - x2)(x3 - x4)
+ // = (x - x1)(x - x2)(x - x4) * (-1/2)
+ __m128 vfcoef1 = _mm_mul_ps(_mm_mul_ps(vfofsfm1, vfofsf), _mm_mul_ps(vfofsf2, _mm_set1_ps(-1.0f / 2.0f)));
+
+ // (x - x1)(x - x2)(x - x3) / (x4 - x1)(x4 - x2)(x4 - x3)
+ // = (x - x1)(x - x2)(x - x3) * (1/6)
+ __m128 vfcoef2 = _mm_mul_ps(_mm_mul_ps(vfofsfm1, vfofsf), _mm_mul_ps(vfofsf1, _mm_set1_ps(1.0f / 6.0f)));
+
+#if USE_X86_EXT_INTRIN >= 9
+ __m128 vresult = _mm_add_ps(
+ _mm_fmadd_ps(vfinm1, vfcoefm1, _mm_mul_ps(vfin0, vfcoef0)),
+ _mm_fmadd_ps(vfin1, vfcoef1, _mm_mul_ps(vfin2, vfcoef2))
+ );
+#else
+ __m128 vresult = _mm_add_ps(
+ _mm_add_ps(_mm_mul_ps(vfinm1, vfcoefm1), _mm_mul_ps(vfin0, vfcoef0)),
+ _mm_add_ps(_mm_mul_ps(vfin1, vfcoef1), _mm_mul_ps(vfin2, vfcoef2))
+ );
+#endif
+
+#if defined(DATA_T_DOUBLE)
+ vresult = _mm_mul_ps(vresult, _mm_set1_ps(OUT_INT16));
+ _mm_storeu_pd(dest, _mm_cvtps_pd(vresult));
+ _mm_storeu_pd(dest + 2, _mm_cvtps_pd(_mm_movehl_ps(vresult, vresult)));
+#elif defined(DATA_T_FLOAT)
+ vresult = _mm_mul_ps(vresult, _mm_set1_ps(OUT_INT16));
+ _mm_storeu_ps(dest, vresult);
+#else
+ _mm_storeu_si128(dest, _mm_cvtps_epi32(vresult));
+#endif
+
+ dest += 4;
+ resrc->offset += resrc->increment * 4;
+ *i += 4;
+ return dest;
+}
+
+#endif
+
+static void resample_lagrange_multi2(Voice *vp, DATA_T *dest, int32 count)
+{
+ const sample_t *src = vp->sample->data;
+ resample_rec_t *resrc = &vp->resrc;
+ spos_t ofsls = resrc->loop_start >> FRACTION_BITS;
+ spos_t ofsle = resrc->loop_end >> FRACTION_BITS;
+ spos_t ofsend = resrc->data_length >> FRACTION_BITS;
+ int32 i = 0;
+
+ if (resrc->mode == RESAMPLE_MODE_PLAIN) {
+ if (resrc->increment < 0) {
+ resrc->increment = -resrc->increment;
+ }
+
+ // interpolate [0, 1] linearly
+ while (i < count && (resrc->offset >> FRACTION_BITS) < 1) {
+ *dest++ = resample_linear(src, resrc->offset, resrc);
+ resrc->offset += resrc->increment;
+ i++;
+ }
+
+ // lagrange interpolation
+#if USE_X86_EXT_INTRIN >= 8
+ while (count - i >= 8) {
+ // !(ofsi + 2 < ofsend)
+ if (((resrc->offset + resrc->increment * 7) >> FRACTION_BITS) + 2 >= ofsend) {
+ break;
+ }
+
+ dest = resample_multi_lagrange_m256(vp, dest, &i, count);
+ }
+#endif
+
+#if USE_X86_EXT_INTRIN >= 6
+ while (count - i >= 4) {
+ // !(ofsi + 2 < ofsend)
+ if (((resrc->offset + resrc->increment * 3) >> FRACTION_BITS) + 2 >= ofsend) {
+ break;
+ }
+
+ dest = resample_multi_lagrange_m128(vp, dest, &i, count);
+ }
+#endif
+
+ while (i < count && (resrc->offset >> FRACTION_BITS) + 2 < ofsend) {
+ *dest++ = resample_lagrange(src, resrc->offset, resrc);
+ resrc->offset += resrc->increment;
+ i++;
+ }
+
+ // interpolate [ofsend - 2, ofsend - 1] linearly
+ while (i < count && (resrc->offset >> FRACTION_BITS) < 1) {
+ *dest++ = resample_linear(src, resrc->offset, resrc);
+ resrc->offset += resrc->increment;
+ i++;
+ }
+
+ if (i < count) {
+ memset(dest, 0, (count - i) * sizeof(DATA_T));
+ resrc->offset += resrc->increment * (count - i);
+ vp->finish_voice = 1;
+ }
+ } else {
+ while (i < count) {
+ // interpolate [0, 1] linearly
+ while (i < count && (resrc->offset >> FRACTION_BITS) < 1) {
+ *dest++ = resample_linear(src, resrc->offset, resrc);
+ resrc->offset += resrc->increment;
+ i++;
+ }
+
+#if USE_X86_EXT_INTRIN >= 8
+ while (count - i >= 8) {
+ spos_t ofs0i = resrc->offset >> FRACTION_BITS;
+ spos_t ofs7i = (resrc->offset + resrc->increment * 7) >> FRACTION_BITS;
+
+ if (resrc->increment > 0 ? ofsle <= ofs7i + 2 : ofs7i - 1 < ofsls || ofsle <= ofs0i + 2) {
+ break;
+ }
+
+ dest = resample_multi_lagrange_m256(vp, dest, &i, count);
+ }
+#endif
+
+#if USE_X86_EXT_INTRIN >= 6
+ while (count - i >= 4) {
+ spos_t ofs0i = resrc->offset >> FRACTION_BITS;
+ spos_t ofs3i = (resrc->offset + resrc->increment * 3) >> FRACTION_BITS;
+
+ if (resrc->increment > 0 ? ofsle <= ofs3i + 2 : ofs3i - 1 < ofsls || ofsle <= ofs0i + 2) {
+ break;
+ }
+
+ dest = resample_multi_lagrange_m128(vp, dest, &i, count);
+ }
+#endif
+
+ while (i < count) {
+ spos_t ofsi = resrc->offset >> FRACTION_BITS;
+
+ if (resrc->increment > 0 ? ofsle <= ofsi + 2 : ofsi - 1 < ofsls || ofsle <= ofsi + 2) {
+ break;
+ }
+
+ *dest++ = resample_lagrange(src, resrc->offset, resrc);
+ resrc->offset += resrc->increment;
+ i++;
+ }
+
+ while (i < count) {
+ spos_t ofsi = resrc->offset >> FRACTION_BITS;
+
+ if (resrc->increment > 0 ? ofsi + 2 < ofsle : ofsls <= ofsi - 1 && ofsi + 2 < ofsle) {
+ break;
+ }
+
+ *dest++ = resample_lagrange(src, resrc->offset, resrc);
+ resrc->offset += resrc->increment;
+ i++;
+
+ if (resrc->loop_end < resrc->offset) {
+ if (resrc->mode == RESAMPLE_MODE_LOOP) {
+ resrc->offset -= resrc->loop_end - resrc->loop_start;
+ } else if (resrc->mode == RESAMPLE_MODE_BIDIR_LOOP && resrc->increment > 0) {
+ resrc->increment = -resrc->increment;
+ }
+ } else if (resrc->mode == RESAMPLE_MODE_BIDIR_LOOP && resrc->increment < 0 && resrc->offset < resrc->loop_start) {
+ resrc->increment = -resrc->increment;
+ }
+ }
+ }
+ }
+}
+#endif // timidity41-eddb86e
+
+static inline DATA_T resample_lagrange_single(Voice *vp)
+{
+ sample_t *src = vp->sample->data;
+ const resample_rec_t *resrc = &vp->resrc;
+ fract_t ofsf = resrc->offset & FRACTION_MASK;
+ const spos_t ofsls = resrc->loop_start >> FRACTION_BITS;
+ const spos_t ofsle = resrc->loop_end >> FRACTION_BITS;
+ const spos_t ofsi = resrc->offset >> FRACTION_BITS;
+ spos_t ofstmp, len;
+#if defined(DATA_T_DOUBLE) || defined(DATA_T_FLOAT)
+ FLOAT_T v[4], tmp;
+#else // DATA_T_IN32
+ int32 v[4], tmp;
+#endif
+ int32 i, dir;
+
+ switch(resrc->mode){
+ case RESAMPLE_MODE_PLAIN:
+ if(ofsi < 1)
+ goto do_linear;
+ break; // normal
+ case RESAMPLE_MODE_LOOP:
+ if(ofsi < ofsls){
+ if(ofsi < 1)
+ goto do_linear;
+ if((ofsi + 2) < ofsle)
+ break; // normal
+ }else if(((ofsi + 2) < ofsle) && ((ofsi - 1) >= ofsls))
+ break; // normal
+ len = ofsle - ofsls; // loop_length
+ ofstmp = ofsi - 1;
+ if(ofstmp < ofsls) {ofstmp += len;} // if loop_length == data_length need
+ for(i = 0; i < 4; i++){
+ v[i] = src[ofstmp];
+ if((++ofstmp) > ofsle) {ofstmp -= len;} // -= loop_length , jump loop_start
+ }
+ goto loop_ofs;
+ break;
+ case RESAMPLE_MODE_BIDIR_LOOP:
+ if(resrc->increment >= 0){ // normal dir
+ if(ofsi < ofsls){
+ if(ofsi < 1)
+ goto do_linear;
+ if((ofsi + 2) < ofsle)
+ break; // normal
+ }else if(((ofsi + 2) < ofsle) && ((ofsi - 1) >= ofsls))
+ break; // normal
+ dir = 1;
+ ofstmp = ofsi - 1;
+ if(ofstmp < ofsls){ // if loop_length == data_length need
+ ofstmp = (ofsls << 1) - ofstmp;
+ dir = -1;
+ }
+ }else{ // reverse dir
+ dir = -1;
+ ofstmp = ofsi + 1;
+ if(ofstmp > ofsle){ // if loop_length == data_length need
+ ofstmp = (ofsle << 1) - ofstmp;
+ dir = 1;
+ }
+ ofsf = mlt_fraction - ofsf;
+ }
+ for(i = 0; i < 4; i++){
+ v[i] = src[ofstmp];
+ ofstmp += dir;
+ if(dir < 0){ // -
+ if(ofstmp <= ofsls) {dir = 1;}
+ }else{ // +
+ if(ofstmp >= ofsle) {dir = -1;}
+ }
+ }
+ goto loop_ofs;
+ break;
+ }
+normal_ofs:
+ v[0] = src[ofsi - 1];
+ v[1] = src[ofsi];
+ v[2] = src[ofsi + 1];
+ v[3] = src[ofsi + 2];
+#if defined(DATA_T_DOUBLE) || defined(DATA_T_FLOAT)
+loop_ofs:
+ ofsf += mlt_fraction;
+ tmp = v[1] - v[0];
+ v[3] += -3 * v[2] + 3 * v[1] - v[0];
+ v[3] *= (FLOAT_T)(ofsf - ml2_fraction) * DIV_6 * div_fraction;
+ v[3] += v[2] - v[1] - tmp;
+ v[3] *= (FLOAT_T)(ofsf - mlt_fraction) * DIV_2 * div_fraction;
+ v[3] += tmp;
+ v[3] *= (FLOAT_T)ofsf * div_fraction;
+ v[3] += v[0];
+ return v[3] * OUT_INT16;
+do_linear:
+ v[1] = src[ofsi];
+ v[2] = (int32)(src[ofsi + 1]) - (int32)(src[ofsi]);
+ return (v[1] + v[2] * (FLOAT_T)ofsf * div_fraction) * OUT_INT16; // FLOAT_T
+#else // DATA_T_IN32
+loop_ofs:
+ ofsf += mlt_fraction;
+ tmp = v[1] - v[0];
+ v[3] += -3*v[2] + 3*v[1] - v[0];
+ v[3] = imuldiv_fraction(v[3], (ofsf - ml2_fraction) / 6);
+ v[3] += v[2] - v[1] - tmp;
+ v[3] = imuldiv_fraction(v[3], (ofsf - mlt_fraction) >> 1);
+ v[3] += tmp;
+ v[3] = imuldiv_fraction(v[3], ofsf);
+ v[3] += v[0];
+ return v[3];
+do_linear:
+ v[1] = src[ofsi];
+ v[2] = src[ofsi + 1];
+ return v[1] + imuldiv_fraction(v[2] - v[1], ofsf);
+#endif
+}
+
+#if 0 //(USE_X86_EXT_INTRIN >= 9) // \96¢\83e\83X\83g \93®\82\82©\82Í\95s\96¾ broadcast\82Íset1\82¾\82Á\82½\82©\82à\81E\81E
+// offset:int32*8, resamp:float*8
+// \83\8b\81[\83v\93à\95\94\82Ìoffset\8cv\8eZ\82ðint32\92l\88æ\82É\82·\82é , (sample_increment * (req_count+1)) < int32 max
+static inline DATA_T *resample_lagrange_multi(Voice *vp, DATA_T *dest, int32 req_count, int32 *out_count)
+{
+ resample_rec_t *resrc = &vp->resrc;
+ int32 i = 0;
+ const int32 req_count_mask = ~(0x7);
+ const int32 count = req_count & req_count_mask;
+ splen_t prec_offset = resrc->offset & INTEGER_MASK;
+ sample_t *src = vp->sample->data + (prec_offset >> FRACTION_BITS);
+ const int32 start_offset = (int32)(resrc->offset - prec_offset); // offset\8cv\8eZ\82ðint32\92l\88æ\82É\82·\82é(SIMD\97p
+ const int32 inc = resrc->increment;
+ const __m256i vinc = _mm256_broadcastd_epi32(inc * 8), vfmask = _mm256_broadcastd_epi32((int32)FRACTION_MASK);
+ __m256i vofs = _mm256_add_epi32(_mm256_broadcastd_epi32(start_offset), _mm256_set_epi32(inc*7,inc*6,inc*5,inc*4,inc*3,inc*2,inc,0));
+ const __m256 vdivf = _mm256_broadcastd_ps(div_fraction);
+ const __m256 vfrac_6 = _mm256_broadcastd_ps(div_fraction * DIV_6);
+ const __m256 vfrac_2 = _mm256_broadcastd_ps(div_fraction * DIV_2);
+ const __m256 v3n = _mm256_broadcastd_ps(-3);
+ const __m256 v3p = _mm256_broadcastd_ps(3);
+ const __m256i vfrac = _mm256_broadcastd_epi32(mlt_fraction);
+ const __m256i vfrac2 = _mm256_broadcastd_epi32(ml2_fraction);
+ const __m256 vec_divo = _mm256_broadcastd_ps(DIV_15BIT);
+#ifdef LAO_OPTIMIZE_INCREMENT
+ // \8dÅ\93K\89»\83\8c\81[\83g = (\83\8d\81[\83h\83f\81[\83^\90\94 - \8f\89\8aú\83I\83t\83Z\83b\83g\8f¬\90\94\95\94\82Ì\8dÅ\91å\92l(1\96¢\96\9e) - \95â\8aÔ\83|\83C\83\93\83g\90\94(lagrange\82Í3) ) / \83I\83t\83Z\83b\83g\83f\81[\83^\90\94
+ // \83\8d\81[\83h\83f\81[\83^\90\94\82Íint16\97ppermutevar\82ª\82È\82¢\82Ì\82Å\95Ï\8a·\8cã\82Ì32bit(int32/float)\82Ì8\83Z\83b\83g\82É\82È\82é
+ const int32 opt_inc1 = (1 << FRACTION_BITS) * (8 - 1 - 3) / 8; // (float*8) * 1\83Z\83b\83g
+ if(inc < opt_inc1){ // 1\83Z\83b\83g
+ const __m256i vvar1n = _mm256_broadcastd_epi32(-1);
+ const __m256i vvar1 = _mm256_broadcastd_epi32(1);
+ const __m256i vvar2 = _mm256_broadcastd_epi32(2);
+ for(i = 0; i < count; i += 8) {
+ __m256i vofsi2 = _mm256_srli_epi32(vofs, FRACTION_BITS); // ofsi
+ __m256i vofsi1 = _mm256_add_epi32(vofsi2, vvar1n); // ofsi-1
+ __m256i vofsi3 = _mm256_add_epi32(vofsi2, vvar1); // ofsi+1
+ __m256i vofsi4 = _mm256_add_epi32(vofsi2, vvar2); // ofsi+2
+ int32 ofs0 = _mm_cvtsi128_si32(_mm256_extracti128_si256(vofsi1, 0x0));
+ __m128i vin1 = _mm_loadu_si128((__m128i *)&src[ofs0]); // int16*8
+ __m256i vofsib = _mm256_permutevar8x32_epi32(vofsi1, _mm256_setzero_si256());
+ __m256i vofsub1 = _mm256_sub_epi32(vofsi1, vofsib);
+ __m256i vofsub2 = _mm256_sub_epi32(vofsi2, vofsib);
+ __m256i vofsub3 = _mm256_sub_epi32(vofsi3, vofsib);
+ __m256i vofsub4 = _mm256_sub_epi32(vofsi4, vofsib);
+ __m256 vvf1 = _mm256_cvtepi32_ps(_mm256_cvtepi16_epi32(vin1)); // int16 to float (i16*8->i32*8->f32*8
+ __m256 vv1 = _mm256_permutevar8x32_ps(vvf1, vofsub1); // v1 ofsi-1
+ __m256 vv2 = _mm256_permutevar8x32_ps(vvf1, vofsub2); // v2 ofsi
+ __m256 vv3 = _mm256_permutevar8x32_ps(vvf1, vofsub3); // v2 ofsi+1
+ __m256 vv4 = _mm256_permutevar8x32_ps(vvf1, vofsub4); // v2 ofsi+2
+ // \82 \82Æ\82Í\92Ê\8fí\82Æ\93¯\82¶
+ __m256i vofsf = _mm_add_epi32(_mm_and_si128(vofs, vfmask), vfrac); // ofsf = (ofs & FRACTION_MASK) + mlt_fraction;
+ __m256 vtmp = _mm256_sub_ps(vv1, vv0); // tmp = v[1] - v[0];
+ __m256 vtmp1, vtmp2, vtmp3, vtmp4;
+ vv3 = _mm256_add_ps(vv3, _mm256_sub_ps(MM256_FMA2_PS(vv2, v3n, vv1, v3p), vv0)); // v[3] += -3 * v[2] + 3 * v[1] - v[0];
+ vtmp1 = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_sub_epi32(vofsf, vfrac2)), vfrac_6); // tmp1 = (float)(ofsf - ml2_fraction) * DIV_6 * div_fraction;
+ vtmp2 = _mm256_sub_ps(_mm256_sub_ps(vv2, vv1), vtmp); // tmp2 = v[2] - v[1] - tmp);
+ vtmp3 = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_sub_epi32(vofsf, vfrac)), vfrac_2); // tmp3 = (FLOAT_T)(ofsf - mlt_fraction) * DIV_2 * div_fraction;
+ vtmp4 = _mm256_mul_ps(_mm256_cvtepi32_ps(vofsf), vdivf); // tmp4 = (FLOAT_T)ofsf * div_fraction;
+ vv3 = MM256_FMA_PS(vv3, vtmp1, vtmp2); // v[3] = v[3] * tmp1 + tmp2
+ vv3 = MM256_FMA_PS(vv3, vtmp3, vtmp); // v[3] = v[3] * tmp3 + tmp;
+ vv3 = MM256_FMA_PS(vv3, vtmp4, vv0); // v[3] = v[3] * tmp4 + vv0;
+#if defined(DATA_T_DOUBLE)
+ vv3 = _mm256_mul_ps(vv3, vec_divo);
+ _mm256_storeu_pd(dest, _mm256_cvtps_pd(_mm256_extractf128_ps(vv3, 0x0)));
+ dest += 4;
+ _mm256_storeu_pd(dest, _mm256_cvtps_pd(_mm256_extractf128_ps(vv3, 0x1)));
+ dest += 4;
+#elif defined(DATA_T_FLOAT) // DATA_T_FLOAT
+ _mm256_storeu_ps(dest, _mm256_mul_ps(vv3, vec_divo));
+ dest += 8;
+#else // DATA_T_IN32
+ _mm_storeu_si128((__m128i *)dest, _mm_cvtps_epi32(_mm256_extractf128_ps(vv3, 0x0)));
+ dest += 4;
+ _mm_storeu_si128((__m128i *)dest, _mm_cvtps_epi32(_mm256_extractf128_ps(vv3, 0x1)));
+ dest += 4;
+#endif
+ vofs = _mm256_add_epi32(vofs, vinc); // ofs += inc;
+ }
+ }else
+#endif // LAO_OPTIMIZE_INCREMENT
+ for(; i < count; i += 8) {
+ __m256i vofsi = _mm256_srli_epi32(vofs, FRACTION_BITS); // ofsi = ofs >> FRACTION_BITS
+ __m128i vin1 = _mm_loadu_si128((__m128i *)&src[MM256_EXTRACT_I32(vofsi,0) - 1]); // ofsi-1~ofsi+2\82ð\83\8d\81[\83h
+ __m128i vin2 = _mm_loadu_si128((__m128i *)&src[MM256_EXTRACT_I32(vofsi,1) - 1]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
+ __m128i vin3 = _mm_loadu_si128((__m128i *)&src[MM256_EXTRACT_I32(vofsi,2) - 1]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
+ __m128i vin4 = _mm_loadu_si128((__m128i *)&src[MM256_EXTRACT_I32(vofsi,3) - 1]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
+ __m128i vin5 = _mm_loadu_si128((__m128i *)&src[MM256_EXTRACT_I32(vofsi,4) - 1]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
+ __m128i vin6 = _mm_loadu_si128((__m128i *)&src[MM256_EXTRACT_I32(vofsi,5) - 1]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
+ __m128i vin7 = _mm_loadu_si128((__m128i *)&src[MM256_EXTRACT_I32(vofsi,6) - 1]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
+ __m128i vin8 = _mm_loadu_si128((__m128i *)&src[MM256_EXTRACT_I32(vofsi,7) - 1]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
+ __m128i vin12 = _mm_unpacklo_epi16(vin1, vin2); // [v11v21v31v41],[v12v22v32v42] to [v11v12v21v22v31v32v41v42]
+ __m128i vin34 = _mm_unpacklo_epi16(vin3, vin4); // [v13v23v33v43],[v14v24v34v44] to [v13v14v23v24v33v34v43v44]
+ __m128i vin56 = _mm_unpacklo_epi16(vin5, vin6); // [v15v25v35v45],[v16v26v36v46] to [v15v16v25v26v35v36v45v46]
+ __m128i vin78 = _mm_unpacklo_epi16(vin7, vin8); // [v17v27v37v47],[v18v28v38v48] to [v17v18v27v28v37v38v47v48]
+ __m128i vin1121 = _mm_unpacklo_epi32(vin12, vin34); // [v11v12,v21v22],[v13v14,v23v24] to [v11v12v13v14,v21v22v23v24]
+ __m128i vin3141 = _mm_unpackhi_epi32(vin12, vin34); // [v31v32,v41v42],[v33v34v,43v44] to [v31v32v33v34,v41v42v43v44]
+ __m128i vin1525 = _mm_unpacklo_epi32(vin56, vin78); // [v15v16,v25v26],[v17v18,v27v28] to [v15v16v17v18,v25v26v27v28]
+ __m128i vin3545 = _mm_unpackhi_epi32(vin56, vin78); // [v35v36,v45v46],[v37v38v,47v48] to [v35v36v37v38,v45v46v47v48]
+ __m128i vi16_1 = _mm_unpacklo_epi64(vin1121, vin1525); // [v11v12v13v14,v21v22v23v24],[v15v16v17v18,v25v26v27v28] to [v11v12v13v14v15v16v17v18]
+ __m128i vi16_2 = _mm_unpackhi_epi64(vin1121, vin1525); // [v11v12v13v14,v21v22v23v24],[v15v16v17v18,v25v26v27v28] to [v21v22v23v24v25v26v27v28]
+ __m128i vi16_3 = _mm_unpacklo_epi64(vin3141, vin3545); // [v31v32v33v34,v41v42v43v44],[v35v36v37v38,v45v46v47v48] to [v31v32v33v34v35v36v37v38]
+ __m128i vi16_4 = _mm_unpackhi_epi64(vin3141, vin3545); // [v31v32v33v34,v41v42v43v44],[v35v36v37v38,v45v46v47v48] to [v41v42v43v44v45v46v47v48]
+ __m256 vv0 = _mm256_cvtepi32_ps(_mm256_cvtepi16_epi32(vi16_1)); // int16 to float (16bit*8 -> 32bit*8 > float*8
+ __m256 vv1 = _mm256_cvtepi32_ps(_mm256_cvtepi16_epi32(vi16_2)); // int16 to float (16bit*8 -> 32bit*8 > float*8
+ __m256 vv2 = _mm256_cvtepi32_ps(_mm256_cvtepi16_epi32(vi16_3)); // int16 to float (16bit*8 -> 32bit*8 > float*8
+ __m256 vv3 = _mm256_cvtepi32_ps(_mm256_cvtepi16_epi32(vi16_4)); // int16 to float (16bit*8 -> 32bit*8 > float*8
+ __m256i vofsf = _mm_add_epi32(_mm_and_si128(vofs, vfmask), vfrac); // ofsf = (ofs & FRACTION_MASK) + mlt_fraction;
+ __m256 vtmp = _mm256_sub_ps(vv1, vv0); // tmp = v[1] - v[0];
+ __m256 vtmp1, vtmp2, vtmp3, vtmp4;
+ vv3 = _mm256_add_ps(vv3, _mm256_sub_ps(MM256_FMA2_PS(vv2, v3n, vv1, v3p), vv0)); // v[3] += -3 * v[2] + 3 * v[1] - v[0];
+ vtmp1 = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_sub_epi32(vofsf, vfrac2)), vfrac_6); // tmp1 = (float)(ofsf - ml2_fraction) * DIV_6 * div_fraction;
+ vtmp2 = _mm256_sub_ps(_mm256_sub_ps(vv2, vv1), vtmp); // tmp2 = v[2] - v[1] - tmp);
+ vtmp3 = _mm256_mul_ps(_mm256_cvtepi32_ps(_mm256_sub_epi32(vofsf, vfrac)), vfrac_2); // tmp3 = (FLOAT_T)(ofsf - mlt_fraction) * DIV_2 * div_fraction;
+ vtmp4 = _mm256_mul_ps(_mm256_cvtepi32_ps(vofsf), vdivf); // tmp4 = (FLOAT_T)ofsf * div_fraction;
+ vv3 = MM256_FMA_PS(vv3, vtmp1, vtmp2); // v[3] = v[3] * tmp1 + tmp2
+ vv3 = MM256_FMA_PS(vv3, vtmp3, vtmp); // v[3] = v[3] * tmp3 + tmp;
+ vv3 = MM256_FMA_PS(vv3, vtmp4, vv0); // v[3] = v[3] * tmp4 + vv0;
+#if defined(DATA_T_DOUBLE)
+ vv3 = _mm256_mul_ps(vv3, vec_divo);
+ _mm256_storeu_pd(dest, _mm256_cvtps_pd(_mm256_extractf128_ps(vv3, 0x0)));
+ dest += 4;
+ _mm256_storeu_pd(dest, _mm256_cvtps_pd(_mm256_extractf128_ps(vv3, 0x1)));
+ dest += 4;
+#elif defined(DATA_T_FLOAT) // DATA_T_FLOAT
+ _mm256_storeu_ps(dest, _mm256_mul_ps(vv3, vec_divo));
+ dest += 8;
+#else // DATA_T_IN32
+ _mm256_storeu_si256((__m256i *)dest, _mm256_cvtps_epi32(vv3));
+ dest += 8;
+#endif
+ vofs = _mm256_add_epi32(vofs, vinc); // ofs += inc;
+ }
+ resrc->offset = prec_offset + (splen_t)(MM256_EXTRACT_I32(vofs,0));
+ *out_count = i;
+ return dest;
+}
+
+#elif (USE_X86_EXT_INTRIN >= 3)
+// offset:int32*4*2, resamp:float*4*2 2set 15.51s (1set 16.08s
+// \83\8b\81[\83v\93à\95\94\82Ìoffset\8cv\8eZ\82ðint32\92l\88æ\82É\82·\82é , (sample_increment * (req_count+1)) < int32 max
+static inline DATA_T *resample_lagrange_multi(Voice *vp, DATA_T *dest, int32 req_count, int32 *out_count)
+{
+ resample_rec_t *resrc = &vp->resrc;
+ int32 i = 0;
+ const int32 req_count_mask = ~(0x7);
+ const int32 count = req_count & req_count_mask;
+ splen_t prec_offset = resrc->offset & INTEGER_MASK;
+ sample_t *src = vp->sample->data + (prec_offset >> FRACTION_BITS);
+ const int32 start_offset = (int32)(resrc->offset - prec_offset); // offset\8cv\8eZ\82ðint32\92l\88æ\82É\82·\82é(SIMD\97p
+ const int32 inc = resrc->increment;
+ const __m128i vinc = _mm_set1_epi32(inc * 8), vfmask = _mm_set1_epi32((int32)FRACTION_MASK);
+ __m128i vofs1 = _mm_add_epi32(_mm_set1_epi32(start_offset), _mm_set_epi32(inc * 3, inc * 2, inc, 0));
+ __m128i vofs2 = _mm_add_epi32(vofs1, _mm_set1_epi32(inc * 4));
+ const __m128 vdivf = _mm_set1_ps(div_fraction);
+ const __m128 vfrac_6 = _mm_set1_ps(div_fraction * DIV_6);
+ const __m128 vfrac_2 = _mm_set1_ps(div_fraction * DIV_2);
+ const __m128 v3n = _mm_set1_ps(-3);
+ const __m128 v3p = _mm_set1_ps(3);
+ const __m128i vfrac = _mm_set1_epi32(mlt_fraction);
+ const __m128i vfrac2 = _mm_set1_epi32(ml2_fraction);
+ const __m128 vec_divo = _mm_set1_ps(DIV_15BIT);
+ for(; i < count; i += 8) {
+ __m128i vofsi1 = _mm_srli_epi32(vofs1, FRACTION_BITS); // ofsi = ofs >> FRACTION_BITS
+ __m128i vofsi2 = _mm_srli_epi32(vofs2, FRACTION_BITS); // ofsi = ofs >> FRACTION_BITS
+ __m128i vin1 = _mm_loadu_si128((__m128i *)&src[MM_EXTRACT_I32(vofsi1,0) - 1]); // ofsi-1~ofsi+2\82ð\83\8d\81[\83h
+ __m128i vin2 = _mm_loadu_si128((__m128i *)&src[MM_EXTRACT_I32(vofsi1,1) - 1]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
+ __m128i vin3 = _mm_loadu_si128((__m128i *)&src[MM_EXTRACT_I32(vofsi1,2) - 1]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
+ __m128i vin4 = _mm_loadu_si128((__m128i *)&src[MM_EXTRACT_I32(vofsi1,3) - 1]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
+ __m128i vin5 = _mm_loadu_si128((__m128i *)&src[MM_EXTRACT_I32(vofsi2,0) - 1]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
+ __m128i vin6 = _mm_loadu_si128((__m128i *)&src[MM_EXTRACT_I32(vofsi2,1) - 1]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
+ __m128i vin7 = _mm_loadu_si128((__m128i *)&src[MM_EXTRACT_I32(vofsi2,2) - 1]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
+ __m128i vin8 = _mm_loadu_si128((__m128i *)&src[MM_EXTRACT_I32(vofsi2,3) - 1]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶
+ __m128i vin12 = _mm_unpacklo_epi16(vin1, vin2); // [v11v21v31v41],[v12v22v32v42] to [v11v12v21v22v31v32v41v42]
+ __m128i vin34 = _mm_unpacklo_epi16(vin3, vin4); // [v13v23v33v43],[v14v24v34v44] to [v13v14v23v24v33v34v43v44]
+ __m128i vin56 = _mm_unpacklo_epi16(vin5, vin6); // [v15v25v35v45],[v16v26v36v46] to [v15v16v25v26v35v36v45v46]
+ __m128i vin78 = _mm_unpacklo_epi16(vin7, vin8); // [v17v27v37v47],[v18v28v38v48] to [v17v18v27v28v37v38v47v48]
+ __m128i vi16_1 = _mm_unpacklo_epi32(vin12, vin34); // [v11v12,v21v22],[v13v14,v23v24] to [v11v12v13v14,v21v22v23v24]
+ __m128i vi16_2 = _mm_unpackhi_epi32(vin12, vin34); // [v31v32,v41v42],[v33v34v,43v44] to [v31v32v33v34,v41v42v43v44]
+ __m128i vi16_3 = _mm_unpacklo_epi32(vin56, vin78); // [v15v16,v25v26],[v17v18,v27v28] to [v15v16v17v18,v25v26v27v28]
+ __m128i vi16_4 = _mm_unpackhi_epi32(vin56, vin78); // [v35v36,v45v46],[v37v38v,47v48] to [v35v36v37v38,v45v46v47v48]
+#if (USE_X86_EXT_INTRIN >= 6) // sse4.1 , _mm_ cvtepi16_epi32()
+ __m128i vi16_1_2 = _mm_shuffle_epi32(vi16_1, 0x4e); // ofsi+0\82ÍL64bit\82Ö
+ __m128i vi16_2_2 = _mm_shuffle_epi32(vi16_2, 0x4e); // ofsi+2\82ÍL64bit\82Ö
+ __m128i vi16_3_2 = _mm_shuffle_epi32(vi16_3, 0x4e); // ofsi+0\82ÍL64bit\82Ö
+ __m128i vi16_4_2 = _mm_shuffle_epi32(vi16_4, 0x4e); // ofsi+2\82ÍL64bit\82Ö
+ __m128 vv01 = _mm_cvtepi32_ps(_mm_cvtepi16_epi32(vi16_1)); // int16 to float
+ __m128 vv11 = _mm_cvtepi32_ps(_mm_cvtepi16_epi32(vi16_1_2)); // int16 to float
+ __m128 vv21 = _mm_cvtepi32_ps(_mm_cvtepi16_epi32(vi16_2)); // int16 to float
+ __m128 vv31 = _mm_cvtepi32_ps(_mm_cvtepi16_epi32(vi16_2_2)); // int16 to float
+ __m128 vv02 = _mm_cvtepi32_ps(_mm_cvtepi16_epi32(vi16_3)); // int16 to float
+ __m128 vv12 = _mm_cvtepi32_ps(_mm_cvtepi16_epi32(vi16_3_2)); // int16 to float
+ __m128 vv22 = _mm_cvtepi32_ps(_mm_cvtepi16_epi32(vi16_4)); // int16 to float
+ __m128 vv32 = _mm_cvtepi32_ps(_mm_cvtepi16_epi32(vi16_4_2)); // int16 to float
+#else
+ __m128i sign1 = _mm_cmpgt_epi16(_mm_setzero_si128(), vi16_1);
+ __m128i sign2 = _mm_cmpgt_epi16(_mm_setzero_si128(), vi16_2);
+ __m128i sign3 = _mm_cmpgt_epi16(_mm_setzero_si128(), vi16_3);
+ __m128i sign4 = _mm_cmpgt_epi16(_mm_setzero_si128(), vi16_4);
+ __m128 vv01 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(vi16_1, sign1)); // int16 to float
+ __m128 vv11 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(vi16_1, sign1)); // int16 to float
+ __m128 vv21 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(vi16_2, sign2)); // int16 to float
+ __m128 vv31 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(vi16_2, sign2)); // int16 to float
+ __m128 vv02 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(vi16_3, sign3)); // int16 to float
+ __m128 vv12 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(vi16_3, sign3)); // int16 to float
+ __m128 vv22 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(vi16_4, sign4)); // int16 to float
+ __m128 vv32 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(vi16_4, sign4)); // int16 to float
+#endif
+ __m128i vofsf1 = _mm_add_epi32(_mm_and_si128(vofs1, vfmask), vfrac); // ofsf = (ofs & FRACTION_MASK) + mlt_fraction;
+ __m128i vofsf2 = _mm_add_epi32(_mm_and_si128(vofs2, vfmask), vfrac); // ofsf = (ofs & FRACTION_MASK) + mlt_fraction;
+ __m128 vtmp1 = _mm_sub_ps(vv11, vv01); // tmp = v[1] - v[0];
+ __m128 vtmp2 = _mm_sub_ps(vv12, vv02); // tmp = v[1] - v[0];
+ __m128 vtmpx11, vtmpx12, vtmpx21, vtmpx22, vtmpx31, vtmpx32, vtmpx41, vtmpx42;
+ __m128 vtmpi1, vtmpi2;
+ vv31 = _mm_add_ps(vv31, _mm_sub_ps(MM_FMA2_PS(vv21, v3n, vv11, v3p), vv01)); // v[3] += -3 * v[2] + 3 * v[1] - v[0];
+ vv32 = _mm_add_ps(vv32, _mm_sub_ps(MM_FMA2_PS(vv22, v3n, vv12, v3p), vv02)); // v[3] += -3 * v[2] + 3 * v[1] - v[0];
+ vtmpi1 = _mm_cvtepi32_ps(_mm_sub_epi32(vofsf1, vfrac2));
+ vtmpi2 = _mm_cvtepi32_ps(_mm_sub_epi32(vofsf2, vfrac2));
+ vtmpx11 = _mm_mul_ps(vtmpi1, vfrac_6); // tmpx1 = (float)(ofsf - ml2_fraction) * DIV_6 * div_fraction;
+ vtmpx12 = _mm_mul_ps(vtmpi2, vfrac_6); // tmpx1 = (float)(ofsf - ml2_fraction) * DIV_6 * div_fraction;
+ vtmpx21 = _mm_sub_ps(_mm_sub_ps(vv21, vv11), vtmp1); // tmpx2 = v[2] - v[1] - tmp);
+ vtmpx22 = _mm_sub_ps(_mm_sub_ps(vv22, vv12), vtmp2); // tmpx2 = v[2] - v[1] - tmp);
+ vtmpi1 = _mm_cvtepi32_ps(_mm_sub_epi32(vofsf1, vfrac));
+ vtmpi2 = _mm_cvtepi32_ps(_mm_sub_epi32(vofsf2, vfrac));
+ vtmpx31 = _mm_mul_ps(vtmpi1, vfrac_2); // tmpx3 = (FLOAT_T)(ofsf - mlt_fraction) * DIV_2 * div_fraction;
+ vtmpx32 = _mm_mul_ps(vtmpi2, vfrac_2); // tmpx3 = (FLOAT_T)(ofsf - mlt_fraction) * DIV_2 * div_fraction;
+ vtmpi1 = _mm_cvtepi32_ps(vofsf1);
+ vtmpi2 = _mm_cvtepi32_ps(vofsf2);
+ vtmpx41 = _mm_mul_ps(vtmpi1, vdivf); // tmpx4 = (FLOAT_T)ofsf * div_fraction;
+ vtmpx42 = _mm_mul_ps(vtmpi2, vdivf); // tmpx4 = (FLOAT_T)ofsf * div_fraction;
+ vv31 = MM_FMA_PS(vv31, vtmpx11, vtmpx21); // v[3] = v[3] * tmpx1 + tmpx2
+ vv32 = MM_FMA_PS(vv32, vtmpx12, vtmpx22); // v[3] = v[3] * tmp1 + tmp2
+ vv31 = MM_FMA_PS(vv31, vtmpx31, vtmp1); // v[3] = v[3] * tmpx3 + tmp;
+ vv32 = MM_FMA_PS(vv32, vtmpx32, vtmp2); // v[3] = v[3] * tmpx3 + tmp;
+ vv31 = MM_FMA_PS(vv31, vtmpx41, vv01); // v[3] = v[3] * tmpx4 + vv0;
+ vv32 = MM_FMA_PS(vv32, vtmpx42, vv02); // v[3] = v[3] * tmpx4 + vv0;
+#if defined(DATA_T_DOUBLE)
+ vv31 = _mm_mul_ps(vv31, vec_divo);
+ vv32 = _mm_mul_ps(vv32, vec_divo);
+#if (USE_X86_EXT_INTRIN >= 8)
+ _mm256_storeu_pd(dest, _mm256_cvtps_pd(vv31));
+ dest += 4;
+ _mm256_storeu_pd(dest, _mm256_cvtps_pd(vv32));
+ dest += 4;
+#else
+ _mm_storeu_pd(dest, _mm_cvtps_pd(vv31));
+ dest += 2;
+ _mm_storeu_pd(dest, _mm_cvtps_pd(_mm_movehl_ps(vv31, vv31)));
+ dest += 2;
+ _mm_storeu_pd(dest, _mm_cvtps_pd(vv32));
+ dest += 2;
+ _mm_storeu_pd(dest, _mm_cvtps_pd(_mm_movehl_ps(vv32, vv32)));
+ dest += 2;
+#endif
+#elif defined(DATA_T_FLOAT) // DATA_T_FLOAT
+ _mm_storeu_ps(dest, _mm_mul_ps(vv31, vec_divo));
+ dest += 4;
+ _mm_storeu_ps(dest, _mm_mul_ps(vv32, vec_divo));
+ dest += 4;
+#else // DATA_T_IN32
+ _mm_storeu_si128((__m128i *)dest, _mm_cvtps_epi32(vv31));
+ dest += 4;
+ _mm_storeu_si128((__m128i *)dest, _mm_cvtps_epi32(vv32));
+ dest += 4;
+#endif
+ vofs1 = _mm_add_epi32(vofs1, vinc); // ofs += inc;
+ vofs2 = _mm_add_epi32(vofs2, vinc); // ofs += inc;
+ }
+ resrc->offset = prec_offset + (splen_t)(MM_EXTRACT_I32(vofs1,0));
+ *out_count = i;
+ return dest;
+}
+
+#else // not use MMX/SSE/AVX
+// \83\8b\81[\83v\93à\95\94\82Ìoffset\8cv\8eZ\82ðint32\92l\88æ\82É\82·\82é , (sample_increment * (req_count+1)) < int32 max
+static inline DATA_T *resample_lagrange_multi(Voice *vp, DATA_T *dest, int32 req_count, int32 *out_count)
+{
+ resample_rec_t *resrc = &vp->resrc;
+ int32 i = 0;
+ splen_t prec_offset = resrc->offset & INTEGER_MASK;
+ sample_t *src = vp->sample->data + (prec_offset >> FRACTION_BITS);
+ int32 ofs = (int32)(resrc->offset & FRACTION_MASK);
+ int32 inc = resrc->increment;
+
+ for(i = 0; i < req_count; i++) {
+ int32 ofsi, ofsf;
+#if defined(DATA_T_DOUBLE) || defined(DATA_T_FLOAT)
+ FLOAT_T v[4], tmp;
+ ofsi = ofs >> FRACTION_BITS, ofsf = ofs & FRACTION_MASK; ofs += inc;
+ v[0] = src[ofsi - 1];
+ v[1] = src[ofsi];
+ v[2] = src[ofsi + 1];
+ v[3] = src[ofsi + 2];
+ ofsf += mlt_fraction;
+ tmp = v[1] - v[0];
+ v[3] += -3 * v[2] + 3 * v[1] - v[0];
+ v[3] *= (FLOAT_T)(ofsf - ml2_fraction) * DIV_6 * div_fraction;
+ v[3] += v[2] - v[1] - tmp;
+ v[3] *= (FLOAT_T)(ofsf - mlt_fraction) * DIV_2 * div_fraction;
+ v[3] += tmp;
+ v[3] *= (FLOAT_T)ofsf * div_fraction;
+ v[3] += v[0];
+ *dest++ = v[3] * OUT_INT16;
+#else // DATA_T_IN32
+ int32 v[4], tmp;
+ ofsi = ofs >> FRACTION_BITS, ofsf = ofs & FRACTION_MASK; ofs += inc;
+ v[0] = src[ofsi - 1];
+ v[1] = src[ofsi];
+ v[2] = src[ofsi + 1];
+ v[3] = src[ofsi + 2];
+ ofsf += mlt_fraction;
+ tmp = v[1] - v[0];
+ v[3] += -3*v[2] + 3*v[1] - v[0];
+ v[3] = imuldiv_fraction(v[3], (ofsf - ml2_fraction) / 6);
+ v[3] += v[2] - v[1] - tmp;
+ v[3] = imuldiv_fraction(v[3], (ofsf - mlt_fraction) >> 1);
+ v[3] += tmp;
+ v[3] = imuldiv_fraction(v[3], ofsf);
+ v[3] += v[0];
+ *dest++ = v[3];
+#endif
+ }
+ resrc->offset = prec_offset + (splen_t)ofs;
+ *out_count = i;
+ return dest;
+}
+#endif
+
+static void lao_rs_plain(Voice *vp, DATA_T *dest, int32 count)
+{
+ /* Play sample until end, then free the voice. */
+ resample_rec_t *resrc = &vp->resrc;
+ int32 i = 0, j = 0;
+
+ if (resrc->increment < 0) resrc->increment = -resrc->increment; /* In case we're coming out of a bidir loop */
+ j = PRECALC_LOOP_COUNT(resrc->offset, resrc->data_length, resrc->increment) + 4; // safe end+128 sample
+ if (j > count) {j = count;}
+ else if(j < 0) {j = 0;}
+ if((resrc->offset >> FRACTION_BITS) >= 1)
+ dest = resample_lagrange_multi(vp, dest, j, &i);
+ for(; i < j; i++) {
+ *dest++ = resample_lagrange_single(vp);
+ resrc->offset += resrc->increment;
+ }
+ for(; i < count; i++) { *dest++ = 0; vp->finish_voice = 1;}
+}
+
+static void lao_rs_loop(Voice *vp, DATA_T *dest, int32 count)
+{
+ /* Play sample until end-of-loop, skip back and continue. */
+ resample_rec_t *resrc = &vp->resrc;
+ int32 i = 0, j = 0;
+
+ if((resrc->offset >> FRACTION_BITS) >= 1){
+ j = PRECALC_LOOP_COUNT(resrc->offset, resrc->loop_end, resrc->increment) - 4; // 4point interpolation
+ if (j > count) {j = count;}
+ else if(j < 0) {j = 0;}
+ dest = resample_lagrange_multi(vp, dest, j, &i);
+ }
+ for(; i < count; i++) {
+ *dest++ = resample_lagrange_single(vp);
+ if((resrc->offset += resrc->increment) >= resrc->loop_end)
+ resrc->offset -= resrc->loop_end - resrc->loop_start;
+ /* Hopefully the loop is longer than an increment. */
+ }
+}
+
+static void lao_rs_bidir(Voice *vp, DATA_T *dest, int32 count)
+{
+ resample_rec_t *resrc = &vp->resrc;
+ int32 i = 0, j = 0;
+
+ if ((resrc->offset >> FRACTION_BITS) >= 1 && resrc->increment > 0){
+ j = PRECALC_LOOP_COUNT(resrc->offset, resrc->loop_end, resrc->increment) - 4; // 4point interpolation
+ if (j > count) {j = count;}
+ else if(j < 0) {j = 0;}
+ dest = resample_lagrange_multi(vp, dest, j, &i);
+ }
+ for(; i < count; i++) {
+ *dest++ = resample_lagrange_single(vp);
+ resrc->offset += resrc->increment;
+ if(resrc->increment > 0){
+ if(resrc->offset >= resrc->loop_end){
+ resrc->offset = (resrc->loop_end << 1) - resrc->offset;
+ resrc->increment = -resrc->increment;
+ }
+ }else{
+ if(resrc->offset <= resrc->loop_start){
+ resrc->offset = (resrc->loop_start << 1) - resrc->offset;
+ resrc->increment = -resrc->increment;
+ }
+ }
+ }
+}
+
+static inline void resample_voice_lagrange_optimize(Voice *vp, DATA_T *ptr, int32 count)
+{
+ int mode = vp->sample->modes;
+
+ if(vp->resrc.plain_flag){ /* no loop */ /* else then loop */
+ vp->resrc.mode = RESAMPLE_MODE_PLAIN; /* no loop */
+ lao_rs_plain(vp, ptr, count); /* no loop */
+ }else if(!(mode & MODES_ENVELOPE) && (vp->status & (VOICE_OFF | VOICE_DIE))){ /* no env */
+ vp->resrc.plain_flag = 1; /* lock no loop */
+ vp->resrc.mode = RESAMPLE_MODE_PLAIN; /* no loop */
+ lao_rs_plain(vp, ptr, count); /* no loop */
+ }else if(mode & MODES_RELEASE && (vp->status & VOICE_OFF)){ /* release sample */
+ vp->resrc.plain_flag = 1; /* lock no loop */
+ vp->resrc.mode = RESAMPLE_MODE_PLAIN; /* no loop */
+ lao_rs_plain(vp, ptr, count); /* no loop */
+ }else if(mode & MODES_PINGPONG){ /* Bidirectional */
+ vp->resrc.mode = RESAMPLE_MODE_BIDIR_LOOP; /* Bidirectional loop */
+ lao_rs_bidir(vp, ptr, count); /* Bidirectional loop */
+ }else {
+ vp->resrc.mode = RESAMPLE_MODE_LOOP; /* loop */
+ lao_rs_loop(vp, ptr, count); /* loop */
+ }
+}
+#endif /* optimize lagrange resample */
+
+
+/*************** optimize lagrange float resample ***********************/
+#if defined(PRECALC_LOOPS)
+
+static inline DATA_T resample_lagrange_float_single(Voice *vp)
+{
+ float *src = (float *)vp->sample->data;
+ const resample_rec_t *resrc = &vp->resrc;
+ fract_t ofsf = resrc->offset & FRACTION_MASK;
+ const spos_t ofsls = resrc->loop_start >> FRACTION_BITS;
+ const spos_t ofsle = resrc->loop_end >> FRACTION_BITS;
+ const spos_t ofsi = resrc->offset >> FRACTION_BITS;
+ spos_t ofstmp, len;
+ FLOAT_T v[4], tmp;
+ int32 vi[4], tmpi;
+ int32 i, dir;
+
+ switch(resrc->mode){
+ case RESAMPLE_MODE_PLAIN:
+ if(ofsi < 1)
+ goto do_linear;
+ break; // normal
+ case RESAMPLE_MODE_LOOP:
+ if(ofsi < ofsls){
+ if(ofsi < 1)
+ goto do_linear;
+ if((ofsi + 2) < ofsle)
+ break; // normal
+ }else if(((ofsi + 2) < ofsle) && ((ofsi - 1) >= ofsls))
+ break; // normal
+ len = ofsle - ofsls; // loop_length
+ ofstmp = ofsi - 1;
+ if(ofstmp < ofsls) {ofstmp += len;} // if loop_length == data_length need
+ for(i = 0; i < 4; i++){
+ v[i] = src[ofstmp];
+ if((++ofstmp) > ofsle) {ofstmp -= len;} // -= loop_length , jump loop_start
+ }
+ goto loop_ofs;
+ break;
+ case RESAMPLE_MODE_BIDIR_LOOP:
+ if(resrc->increment >= 0){ // normal dir
+ if(ofsi < ofsls){
+ if(ofsi < 1)
+ goto do_linear;
+ if((ofsi + 2) < ofsle)
+ break; // normal
+ }else if(((ofsi + 2) < ofsle) && ((ofsi - 1) >= ofsls))
+ break; // normal
+ dir = 1;
+ ofstmp = ofsi - 1;
+ if(ofstmp < ofsls){ // if loop_length == data_length need
+ ofstmp = (ofsls << 1) - ofstmp;
+ dir = -1;
+ }
+ }else{ // reverse dir
+ dir = -1;
+ ofstmp = ofsi + 1;
+ if(ofstmp > ofsle){ // if loop_length == data_length need
+ ofstmp = (ofsle << 1) - ofstmp;
+ dir = 1;
+ }
+ ofsf = mlt_fraction - ofsf;
+ }
+ for(i = 0; i < 4; i++){
+ v[i] = src[ofstmp];
+ ofstmp += dir;
+ if(dir < 0){ // -
+ if(ofstmp <= ofsls) {dir = 1;}
+ }else{ // +
+ if(ofstmp >= ofsle) {dir = -1;}
+ }
+ }
+ goto loop_ofs;
+ break;
+ }
+normal_ofs:
+ v[0] = src[ofsi - 1];
+ v[1] = src[ofsi];
+ v[2] = src[ofsi + 1];
+ v[3] = src[ofsi + 2];
+#if defined(DATA_T_DOUBLE) || defined(DATA_T_FLOAT)
+loop_ofs:
+ ofsf += mlt_fraction;
+ tmp = v[1] - v[0];
+ v[3] += -3 * v[2] + 3 * v[1] - v[0];
+ v[3] *= (FLOAT_T)(ofsf - ml2_fraction) * DIV_6 * div_fraction;
+ v[3] += v[2] - v[1] - tmp;
+ v[3] *= (FLOAT_T)(ofsf - mlt_fraction) * DIV_2 * div_fraction;
+ v[3] += tmp;
+ v[3] *= (FLOAT_T)ofsf * div_fraction;
+ v[3] += v[0];
+ return v[3] * OUT_INT16;
+do_linear:
+ v[1] = src[ofsi];
+ v[2] = (int32)(src[ofsi + 1]) - (int32)(src[ofsi]);
+ return (v[1] + v[2] * (FLOAT_T)ofsf * div_fraction) * OUT_INT16; // FLOAT_T
+#else // DATA_T_INT32
+loop_ofs:
+ vi[0] = v[0] * M_15BIT;
+ vi[1] = v[1] * M_15BIT;
+ vi[2] = v[2] * M_15BIT;
+ vi[3] = v[3] * M_15BIT;
+ ofsf += mlt_fraction;
+ tmpi = vi[1] - vi[0];
+ vi[3] += -3*vi[2] + 3*vi[1] - vi[0];
+ vi[3] = imuldiv_fraction(vi[3], (ofsf - ml2_fraction) / 6);
+ vi[3] += vi[2] - vi[1] - tmpi;
+ vi[3] = imuldiv_fraction(vi[3], (ofsf - mlt_fraction) >> 1);
+ vi[3] += tmpi;
+ vi[3] = imuldiv_fraction(vi[3], ofsf);
+ vi[3] += vi[0];
+ return vi[3];
+do_linear:
+ v[1] = src[ofsi];
+ v[2] = src[ofsi + 1];
+ vi[0] = v[0] * M_15BIT;
+ vi[1] = v[1] * M_15BIT;
+ return v[1] + imuldiv_fraction(vi[2] - vi[1], ofsf);
+#endif
+}
+
+#if (USE_X86_EXT_INTRIN >= 3)
+// offset:int32*4*2, resamp:float*4*2 2set
+// \83\8b\81[\83v\93à\95\94\82Ìoffset\8cv\8eZ\82ðint32\92l\88æ\82É\82·\82é , (sample_increment * (req_count+1)) < int32 max
+static inline DATA_T *resample_lagrange_float_multi(Voice *vp, DATA_T *dest, int32 req_count, int32 *out_count)
+{
+ resample_rec_t *resrc = &vp->resrc;
+ int32 i = 0;
+ const int32 req_count_mask = ~(0x7);
+ const int32 count = req_count & req_count_mask;
+ splen_t prec_offset = resrc->offset & INTEGER_MASK;
+ float *src = (float *)vp->sample->data + (prec_offset >> FRACTION_BITS);
+ const int32 start_offset = (int32)(resrc->offset - prec_offset); // offset\8cv\8eZ\82ðint32\92l\88æ\82É\82·\82é(SIMD\97p
+ const int32 inc = resrc->increment;
+ const __m128i vinc = _mm_set1_epi32(inc * 8), vfmask = _mm_set1_epi32((int32)FRACTION_MASK);
+ __m128i vofs1 = _mm_add_epi32(_mm_set1_epi32(start_offset), _mm_set_epi32(inc * 3, inc * 2, inc, 0));
+ __m128i vofs2 = _mm_add_epi32(vofs1, _mm_set1_epi32(inc * 4));
+ const __m128 vdivf = _mm_set1_ps(div_fraction);
+ const __m128 vfrac_6 = _mm_set1_ps(div_fraction * DIV_6);
+ const __m128 vfrac_2 = _mm_set1_ps(div_fraction * DIV_2);
+ const __m128 v3n = _mm_set1_ps(-3);
+ const __m128 v3p = _mm_set1_ps(3);
+ const __m128i vfrac = _mm_set1_epi32(mlt_fraction);
+ const __m128i vfrac2 = _mm_set1_epi32(ml2_fraction);
+ const __m128 vec_divo = _mm_set1_ps(M_15BIT);
+ for(; i < count; i += 8) {
+ __m128i vofsi1 = _mm_srli_epi32(vofs1, FRACTION_BITS); // ofsi = ofs >> FRACTION_BITS
+ __m128i vofsi2 = _mm_srli_epi32(vofs2, FRACTION_BITS); // ofsi = ofs >> FRACTION_BITS
+ __m128 vin1 = _mm_loadu_ps(&src[MM_EXTRACT_I32(vofsi1,0) - 1]); // ofsi-1~ofsi+2\82ð\83\8d\81[\83h [v11v12v13v14]
+ __m128 vin2 = _mm_loadu_ps(&src[MM_EXTRACT_I32(vofsi1,1) - 1]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶ [v21v22v23v24]
+ __m128 vin3 = _mm_loadu_ps(&src[MM_EXTRACT_I32(vofsi1,2) - 1]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶ [v31v32v33v34]
+ __m128 vin4 = _mm_loadu_ps(&src[MM_EXTRACT_I32(vofsi1,3) - 1]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶ [v41v42v43v44]
+ __m128 vin5 = _mm_loadu_ps(&src[MM_EXTRACT_I32(vofsi2,0) - 1]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶ [v51v52v53v54]
+ __m128 vin6 = _mm_loadu_ps(&src[MM_EXTRACT_I32(vofsi2,1) - 1]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶ [v61v62v63v64]
+ __m128 vin7 = _mm_loadu_ps(&src[MM_EXTRACT_I32(vofsi2,2) - 1]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶ [v71v72v73v74]
+ __m128 vin8 = _mm_loadu_ps(&src[MM_EXTRACT_I32(vofsi2,3) - 1]); // \8e\9f\8eü\83T\83\93\83v\83\8b\82à\93¯\82¶ [v81v82v83v84]
+ __m128 vin12a = _mm_shuffle_ps(vin1, vin2, 0x44); // [v11,v12,v21,v22]
+ __m128 vin12b = _mm_shuffle_ps(vin1, vin2, 0xEE); // [v13,v14,v23,v24]
+ __m128 vin34a = _mm_shuffle_ps(vin3, vin4, 0x44); // [v31,v32,v41,v42]
+ __m128 vin34b = _mm_shuffle_ps(vin3, vin4, 0xEE); // [v33,v34,v43,v44]
+ __m128 vin56a = _mm_shuffle_ps(vin5, vin6, 0x44); // [v51,v52,v61,v62]
+ __m128 vin56b = _mm_shuffle_ps(vin5, vin6, 0xEE); // [v53,v54,v63,v64]
+ __m128 vin78a = _mm_shuffle_ps(vin7, vin8, 0x44); // [v71,v72,v81,v82]
+ __m128 vin78b = _mm_shuffle_ps(vin7, vin8, 0xEE); // [v73,v74,v83,v84]
+ __m128 vv01 = _mm_shuffle_ps(vin12a, vin34a, 0x88); // [v11,v21,v31,v41]
+ __m128 vv11 = _mm_shuffle_ps(vin12a, vin34a, 0xDD); // [v12,v22,v32,v42]
+ __m128 vv21 = _mm_shuffle_ps(vin12b, vin34b, 0x88); // [v13,v23,v33,v43]
+ __m128 vv31 = _mm_shuffle_ps(vin12b, vin34b, 0xDD); // [v14,v24,v34,v44]
+ __m128 vv02 = _mm_shuffle_ps(vin56a, vin78a, 0x88); // [v51,v61,v71,v81]
+ __m128 vv12 = _mm_shuffle_ps(vin56a, vin78a, 0xDD); // [v52,v62,v72,v82]
+ __m128 vv22 = _mm_shuffle_ps(vin56b, vin78b, 0x88); // [v53,v63,v73,v83]
+ __m128 vv32 = _mm_shuffle_ps(vin56b, vin78b, 0xDD); // [v54,v64,v74,v84]
+ __m128i vofsf1 = _mm_add_epi32(_mm_and_si128(vofs1, vfmask), vfrac); // ofsf = (ofs & FRACTION_MASK) + mlt_fraction;
+ __m128i vofsf2 = _mm_add_epi32(_mm_and_si128(vofs2, vfmask), vfrac); // ofsf = (ofs & FRACTION_MASK) + mlt_fraction;
+ __m128 vtmp1 = _mm_sub_ps(vv11, vv01); // tmp = v[1] - v[0];
+ __m128 vtmp2 = _mm_sub_ps(vv12, vv02); // tmp = v[1] - v[0];
+ __m128 vtmpx11, vtmpx12, vtmpx21, vtmpx22, vtmpx31, vtmpx32, vtmpx41, vtmpx42;
+ __m128 vtmpi1, vtmpi2;
+ vv31 = _mm_add_ps(vv31, _mm_sub_ps(MM_FMA2_PS(vv21, v3n, vv11, v3p), vv01)); // v[3] += -3 * v[2] + 3 * v[1] - v[0];
+ vv32 = _mm_add_ps(vv32, _mm_sub_ps(MM_FMA2_PS(vv22, v3n, vv12, v3p), vv02)); // v[3] += -3 * v[2] + 3 * v[1] - v[0];
+ vtmpi1 = _mm_cvtepi32_ps(_mm_sub_epi32(vofsf1, vfrac2));
+ vtmpi2 = _mm_cvtepi32_ps(_mm_sub_epi32(vofsf2, vfrac2));
+ vtmpx11 = _mm_mul_ps(vtmpi1, vfrac_6); // tmpx1 = (float)(ofsf - ml2_fraction) * DIV_6 * div_fraction;
+ vtmpx12 = _mm_mul_ps(vtmpi2, vfrac_6); // tmpx1 = (float)(ofsf - ml2_fraction) * DIV_6 * div_fraction;
+ vtmpx21 = _mm_sub_ps(_mm_sub_ps(vv21, vv11), vtmp1); // tmpx2 = v[2] - v[1] - tmp);
+ vtmpx22 = _mm_sub_ps(_mm_sub_ps(vv22, vv12), vtmp2); // tmpx2 = v[2] - v[1] - tmp);
+ vtmpi1 = _mm_cvtepi32_ps(_mm_sub_epi32(vofsf1, vfrac));
+ vtmpi2 = _mm_cvtepi32_ps(_mm_sub_epi32(vofsf2, vfrac));
+ vtmpx31 = _mm_mul_ps(vtmpi1, vfrac_2); // tmpx3 = (FLOAT_T)(ofsf - mlt_fraction) * DIV_2 * div_fraction;
+ vtmpx32 = _mm_mul_ps(vtmpi2, vfrac_2); // tmpx3 = (FLOAT_T)(ofsf - mlt_fraction) * DIV_2 * div_fraction;
+ vtmpi1 = _mm_cvtepi32_ps(vofsf1);
+ vtmpi2 = _mm_cvtepi32_ps(vofsf2);
+ vtmpx41 = _mm_mul_ps(vtmpi1, vdivf); // tmpx4 = (FLOAT_T)ofsf * div_fraction;
+ vtmpx42 = _mm_mul_ps(vtmpi2, vdivf); // tmpx4 = (FLOAT_T)ofsf * div_fraction;
+ vv31 = MM_FMA_PS(vv31, vtmpx11, vtmpx21); // v[3] = v[3] * tmpx1 + tmpx2
+ vv32 = MM_FMA_PS(vv32, vtmpx12, vtmpx22); // v[3] = v[3] * tmp1 + tmp2
+ vv31 = MM_FMA_PS(vv31, vtmpx31, vtmp1); // v[3] = v[3] * tmpx3 + tmp;
+ vv32 = MM_FMA_PS(vv32, vtmpx32, vtmp2); // v[3] = v[3] * tmpx3 + tmp;
+ vv31 = MM_FMA_PS(vv31, vtmpx41, vv01); // v[3] = v[3] * tmpx4 + vv0;
+ vv32 = MM_FMA_PS(vv32, vtmpx42, vv02); // v[3] = v[3] * tmpx4 + vv0;
+#if defined(DATA_T_DOUBLE)
+#if (USE_X86_EXT_INTRIN >= 8)
+ _mm256_storeu_pd(dest, _mm256_cvtps_pd(vv31));
+ dest += 4;
+ _mm256_storeu_pd(dest, _mm256_cvtps_pd(vv32));
+ dest += 4;
+#else
+ _mm_storeu_pd(dest, _mm_cvtps_pd(vv31));
+ dest += 2;
+ _mm_storeu_pd(dest, _mm_cvtps_pd(_mm_movehl_ps(vv31, vv31)));
+ dest += 2;
+ _mm_storeu_pd(dest, _mm_cvtps_pd(vv32));
+ dest += 2;
+ _mm_storeu_pd(dest, _mm_cvtps_pd(_mm_movehl_ps(vv32, vv32)));
+ dest += 2;
+#endif
+#elif defined(DATA_T_FLOAT) // DATA_T_FLOAT
+ _mm_storeu_ps(dest, vv31);
+ dest += 4;
+ _mm_storeu_ps(dest, vv32);
+ dest += 4;
+#else // DATA_T_IN32
+ vv31 = _mm_mul_ps(vv31, vdivo);
+ vv32 = _mm_mul_ps(vv32, vdivo);
+ _mm_storeu_si128((__m128i *)dest, _mm_cvtps_epi32(vv31));
+ dest += 4;
+ _mm_storeu_si128((__m128i *)dest, _mm_cvtps_epi32(vv32));
+ dest += 4;
+#endif
+ vofs1 = _mm_add_epi32(vofs1, vinc); // ofs += inc;
+ vofs2 = _mm_add_epi32(vofs2, vinc); // ofs += inc;
+ }
+ resrc->offset = prec_offset + (splen_t)(MM_EXTRACT_I32(vofs1,0));
+ *out_count = i;
+ return dest;
+}
+
+#else // not use MMX/SSE/AVX
+// \83\8b\81[\83v\93à\95\94\82Ìoffset\8cv\8eZ\82ðint32\92l\88æ\82É\82·\82é , (sample_increment * (req_count+1)) < int32 max
+static inline DATA_T *resample_lagrange_float_multi(Voice *vp, DATA_T *dest, int32 req_count, int32 *out_count)
+{
+ resample_rec_t *resrc = &vp->resrc;
+ int32 i = 0;
+ splen_t prec_offset = resrc->offset & INTEGER_MASK;
+ float *src = (float *)vp->sample->data + (prec_offset >> FRACTION_BITS);
+ int32 ofs = (int32)(resrc->offset & FRACTION_MASK);
+ int32 inc = resrc->increment;
+
+ for(i = 0; i < req_count; i++) {
+ int32 ofsi, ofsf;
+#if defined(DATA_T_DOUBLE) || defined(DATA_T_FLOAT)
+ FLOAT_T v[4], tmp;
+ ofsi = ofs >> FRACTION_BITS, ofsf = ofs & FRACTION_MASK; ofs += inc;
+ v[0] = src[ofsi - 1];
+ v[1] = src[ofsi];
+ v[2] = src[ofsi + 1];
+ v[3] = src[ofsi + 2];
+ ofsf += mlt_fraction;
+ tmp = v[1] - v[0];
+ v[3] += -3 * v[2] + 3 * v[1] - v[0];
+ v[3] *= (FLOAT_T)(ofsf - ml2_fraction) * DIV_6 * div_fraction;
+ v[3] += v[2] - v[1] - tmp;
+ v[3] *= (FLOAT_T)(ofsf - mlt_fraction) * DIV_2 * div_fraction;
+ v[3] += tmp;
+ v[3] *= (FLOAT_T)ofsf * div_fraction;
+ v[3] += v[0];
+ *dest++ = v[3];
+#else // DATA_T_IN32
+ int32 v[4], tmp;
+ ofsi = ofs >> FRACTION_BITS, ofsf = ofs & FRACTION_MASK; ofs += inc;
+ v[0] = src[ofsi - 1] * M_15BIT;
+ v[1] = src[ofsi] * M_15BIT;
+ v[2] = src[ofsi + 1] * M_15BIT;
+ v[3] = src[ofsi + 2] * M_15BIT;
+ ofsf += mlt_fraction;
+ tmp = v[1] - v[0];
+ v[3] += -3*v[2] + 3*v[1] - v[0];
+ v[3] = imuldiv_fraction(v[3], (ofsf - ml2_fraction) / 6);
+ v[3] += v[2] - v[1] - tmp;
+ v[3] = imuldiv_fraction(v[3], (ofsf - mlt_fraction) >> 1);
+ v[3] += tmp;
+ v[3] = imuldiv_fraction(v[3], ofsf);
+ v[3] += v[0];
+ *dest++ = v[3];
+#endif
+ }
+ resrc->offset = prec_offset + (splen_t)ofs;
+ *out_count = i;
+ return dest;
+}
+#endif
+
+static void lao_rs_plain_float(Voice *vp, DATA_T *dest, int32 count)
+{
+ /* Play sample until end, then free the voice. */
+ resample_rec_t *resrc = &vp->resrc;
+ int32 i = 0, j = 0;
+
+ if (resrc->increment < 0) resrc->increment = -resrc->increment; /* In case we're coming out of a bidir loop */
+ j = PRECALC_LOOP_COUNT(resrc->offset, resrc->data_length, resrc->increment) + 4; // safe end+128 sample
+ if (j > count) {j = count;}
+ else if(j < 0) {j = 0;}
+ if((resrc->offset >> FRACTION_BITS) >= 1)
+ dest = resample_lagrange_float_multi(vp, dest, j, &i);
+ for(; i < j; i++) {
+ *dest++ = resample_lagrange_float_single(vp);
+ resrc->offset += resrc->increment;
+ }
+ for(; i < count; i++) { *dest++ = 0; vp->finish_voice = 1;}
+}
+
+static void lao_rs_loop_float(Voice *vp, DATA_T *dest, int32 count)
+{
+ /* Play sample until end-of-loop, skip back and continue. */
+ resample_rec_t *resrc = &vp->resrc;
+ int32 i = 0, j = 0;
+
+ if((resrc->offset >> FRACTION_BITS) >= 1){
+ j = PRECALC_LOOP_COUNT(resrc->offset, resrc->loop_end, resrc->increment) - 4; // 4point interpolation
+ if (j > count) {j = count;}
+ else if(j < 0) {j = 0;}
+ dest = resample_lagrange_float_multi(vp, dest, j, &i);
+ }
+ for(; i < count; i++) {
+ *dest++ = resample_lagrange_float_single(vp);
+ if((resrc->offset += resrc->increment) >= resrc->loop_end)
+ resrc->offset -= resrc->loop_end - resrc->loop_start;
+ /* Hopefully the loop is longer than an increment. */
+ }
+}
+
+static void lao_rs_bidir_float(Voice *vp, DATA_T *dest, int32 count)
+{
+ resample_rec_t *resrc = &vp->resrc;
+ int32 i = 0, j = 0;
+
+ if ((resrc->offset >> FRACTION_BITS) >= 1 && resrc->increment > 0){
+ j = PRECALC_LOOP_COUNT(resrc->offset, resrc->loop_end, resrc->increment) - 4; // 4point interpolation
+ if (j > count) {j = count;}
+ else if(j < 0) {j = 0;}
+ dest = resample_lagrange_float_multi(vp, dest, j, &i);
+ }
+ for(; i < count; i++) {
+ *dest++ = resample_lagrange_float_single(vp);
+ resrc->offset += resrc->increment;
+ if(resrc->increment > 0){
+ if(resrc->offset >= resrc->loop_end){
+ resrc->offset = (resrc->loop_end << 1) - resrc->offset;
+ resrc->increment = -resrc->increment;
+ }
+ }else{
+ if(resrc->offset <= resrc->loop_start){
+ resrc->offset = (resrc->loop_start << 1) - resrc->offset;
+ resrc->increment = -resrc->increment;
+ }
+ }
+ }
+}
+
+static inline void resample_voice_lagrange_float_optimize(Voice *vp, DATA_T *ptr, int32 count)
+{
+ int mode = vp->sample->modes;
+
+ if(vp->resrc.plain_flag){ /* no loop */ /* else then loop */
+ vp->resrc.mode = RESAMPLE_MODE_PLAIN; /* no loop */
+ lao_rs_plain_float(vp, ptr, count); /* no loop */
+ }else if(!(mode & MODES_ENVELOPE) && (vp->status & (VOICE_OFF | VOICE_DIE))){ /* no env */
+ vp->resrc.plain_flag = 1; /* lock no loop */
+ vp->resrc.mode = RESAMPLE_MODE_PLAIN; /* no loop */
+ lao_rs_plain_float(vp, ptr, count); /* no loop */
+ }else if(mode & MODES_RELEASE && (vp->status & VOICE_OFF)){ /* release sample */
+ vp->resrc.plain_flag = 1; /* lock no loop */
+ vp->resrc.mode = RESAMPLE_MODE_PLAIN; /* no loop */
+ lao_rs_plain_float(vp, ptr, count); /* no loop */
+ }else if(mode & MODES_PINGPONG){ /* Bidirectional */
+ vp->resrc.mode = RESAMPLE_MODE_BIDIR_LOOP; /* Bidirectional loop */
+ lao_rs_bidir_float(vp, ptr, count); /* Bidirectional loop */
+ }else {
+ vp->resrc.mode = RESAMPLE_MODE_LOOP; /* loop */
+ lao_rs_loop_float(vp, ptr, count); /* loop */
+ }
+}
+#endif /* optimize lagrange float resample */
+
+
+
+/*************** resampling with fixed increment *****************/
+///r
+static void rs_plain_c(int v, DATA_T *ptr, int32 count)
+{
+ Voice *vp = &voice[v];
+ DATA_T *dest = ptr + vp->resrc.buffer_offset;
+ cache_t *src = (cache_t *)vp->sample->data;
+ int32 count2 = count;
+ splen_t ofs, i, le;
+
+ le = vp->sample->loop_end >> FRACTION_BITS;
+ ofs = vp->resrc.offset >> FRACTION_BITS;
+
+ i = ofs + count2;
+ if(i > le)
+ i = le;
+ count2 = i - ofs;
+
+ for (i = 0; i < count2; i++) {
+ dest[i] = src[i + ofs];
+ }
+ for (; i < count; i++) {
+ vp->finish_voice = 1;
+ dest[i] = 0;
+ }
+ ofs += count2;
+ vp->resrc.offset = ofs << FRACTION_BITS;
+}
+///r
+static void rs_plain(int v, DATA_T *ptr, int32 count)
+{
+ /* Play sample until end, then free the voice. */
+ Voice *vp = &voice[v];
+ DATA_T *dest = ptr;
+ sample_t *src = vp->sample->data;
+ int data_type = vp->sample->data_type;
+ splen_t
+ ofs = vp->resrc.offset,
+ ls = 0,
+ le = vp->sample->data_length;
+ int32 incr = vp->resrc.increment;
+#ifdef PRECALC_LOOPS
+ int32 i = 0, j;
+#endif
+
+ if(vp->cache && incr == (1 << FRACTION_BITS)){
+ rs_plain_c(v, ptr, count);
+ return;
+ }
+
+#ifdef PRECALC_LOOPS
+ if (incr < 0) incr = -incr; /* In case we're coming out of a bidir loop */
+ /* Precalc how many times we should go through the loop.
+ NOTE: Assumes that incr > 0 and that ofs <= le */
+ j = PRECALC_LOOP_COUNT(ofs, le, incr);
+ if (j > count) {j = count;}
+ else if(j < 0) {j = 0;}
+ for(i = 0; i < j; i++) {
+ RESAMPLATION;
+ ofs += incr;
+ }
+ for (; i < count; i++) {
+ *dest++ = 0;
+ vp->finish_voice = 1;
+ }
+#else /* PRECALC_LOOPS */
+ while (count--)
+ {
+ if (ofs >= le){
+ *dest++ = 0;
+ vp->finish_voice = 1;
+ }else {
+ RESAMPLATION;
+ ofs += incr;
+ }
+ }
+#endif /* PRECALC_LOOPS */
+
+ vp->resrc.offset = ofs; /* Update offset */
+}
+static void rs_loop_c(Voice *vp, DATA_T *ptr, int32 count)
+{
+ splen_t
+ ofs = vp->resrc.offset >> FRACTION_BITS,
+ le = vp->sample->loop_end >> FRACTION_BITS,
+ ll = le - (vp->sample->loop_start >> FRACTION_BITS);
+
+ DATA_T *dest = ptr;
+ cache_t *src = (cache_t *)vp->sample->data;
+ int32 i, j;
+
+// ERROR loop_start = 4215529472
+ if(ll < 0)
+ {
+ vp->sample->loop_start = 0;
+ ll = le - (vp->sample->loop_start >> FRACTION_BITS);
+ }
+
+ while(count){
+ while(ofs >= le)
+ ofs -= ll;
/* Precalc how many times we should go through the loop */
i = le - ofs;
if(i > count)
{
Voice *vp = &voice[v];
int mode;
- int32 i;
+ int32 i = 0;
int32 a;
if(!opt_resample_over_sampling && vp->sample->sample_rate == play_mode->rate &&
/* Let the caller know how much data we had left */
count2 = (int32)((vp->sample->data_length >> FRACTION_BITS) - ofs);
}else
- vp->resrc.offset += (count2 << FRACTION_BITS);
+ vp->resrc.offset += ((splen_t)count2 << FRACTION_BITS);
switch(vp->sample->data_type){
case SAMPLE_TYPE_INT16:
vp->resrc.increment = (vp->resrc.increment >= 0) ? a : -a;
#if defined(PRECALC_LOOPS)
- if(opt_resample_type == RESAMPLE_LINEAR && vp->sample->data_type == SAMPLE_TYPE_INT16){
- resample_voice_linear_optimize(vp, ptr, count);
- return;
+ if(opt_resample_type == RESAMPLE_LINEAR){
+ if(vp->sample->data_type == SAMPLE_TYPE_INT16){
+ resample_voice_linear_optimize(vp, ptr, count);
+ return;
+ }else if(vp->sample->data_type == SAMPLE_TYPE_FLOAT && !opt_pre_resamplation){
+ resample_voice_linear_float_optimize(vp, ptr, count);
+ return;
+ }
+ } else if (opt_resample_type == RESAMPLE_LAGRANGE){
+ if(vp->sample->data_type == SAMPLE_TYPE_INT16){
+ resample_voice_lagrange_optimize(vp, ptr, count);
+ return;
+ }else if(vp->sample->data_type == SAMPLE_TYPE_FLOAT && !opt_pre_resamplation){
+ resample_voice_lagrange_float_optimize(vp, ptr, count);
+ return;
+ }
}
#endif
--- /dev/null
+// SFZ Support Routines for TiMidity++
+// Copyright (c) 2018 Starg <https://osdn.net/projects/timidity41>
+
+extern "C"
+{
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#include "timidity.h"
+#include "common.h"
+#include "controls.h"
+#include "tables.h"
+
+#include "sfz.h"
+
+// smplfile.c
+Instrument *extract_sample_file(char *sample_file);
+}
+
+#include <cassert>
+#include <cstdint>
+#include <cstdio>
+
+#include <algorithm>
+#include <exception>
+#include <iterator>
+#include <memory>
+#include <numeric>
+#include <optional>
+#include <sstream>
+#include <stack>
+#include <stdexcept>
+#include <string>
+#include <string_view>
+#include <unordered_map>
+#include <utility>
+#include <variant>
+#include <vector>
+
+namespace TimSFZ
+{
+
+using namespace std::string_literals;
+using namespace std::string_view_literals;
+
+struct TFFileCloser
+{
+ void operator()(timidity_file* pFile) const
+ {
+ if (pFile)
+ {
+ ::close_file(pFile);
+ }
+ }
+};
+
+struct InstrumentDeleter
+{
+ void operator()(Instrument* pInstrument) const
+ {
+ if (pInstrument)
+ {
+ ::free_instrument(pInstrument);
+ }
+ }
+};
+
+std::string ReadEntireFile(std::string url)
+{
+ std::unique_ptr<timidity_file, TFFileCloser> pFile(::open_file(url.data(), 1, OF_NORMAL));
+
+ if (!pFile)
+ {
+ throw std::runtime_error("unable to open '"s + url + "'");
+ }
+
+ std::string buf;
+
+ while (true)
+ {
+ int c = tf_getc(pFile.get());
+
+ if (c == EOF)
+ {
+ break;
+ }
+
+ buf.push_back(static_cast<char>(c));
+ }
+
+ return buf;
+}
+
+std::string ConstructPath(std::string_view base, std::string_view relPath)
+{
+ std::size_t lastPathDelimiterOffset = base.find_last_of("/\\");
+ return (lastPathDelimiterOffset == base.npos ? ""s : std::string(base, 0, lastPathDelimiterOffset))
+ .append("/").append(relPath);
+}
+
+struct FileInfo
+{
+ std::string FilePath;
+};
+
+struct FileLocationInfo
+{
+ std::size_t FileID;
+ std::uint32_t Line; // 1-based
+};
+
+class TextBuffer
+{
+public:
+ TextBuffer() = default;
+
+ TextBuffer(std::string str, FileLocationInfo loc)
+ : m_Text(std::move(str)), m_Locations{PartLocationInfo{0, loc}}
+ {
+ }
+
+ class View
+ {
+ friend class TextBuffer;
+
+ View(const TextBuffer* pBuffer, std::size_t offset, std::size_t length)
+ : m_pBuffer(pBuffer), m_Offset(offset), m_Length(length)
+ {
+ }
+
+ public:
+ View() : m_pBuffer(nullptr), m_Offset(0), m_Length(0)
+ {
+ }
+
+ View(const View&) = default;
+ View& operator=(const View&) = default;
+
+ bool IsEmpty() const
+ {
+ return m_Length == 0;
+ }
+
+ std::size_t GetLength() const
+ {
+ return m_Length;
+ }
+
+ void SetLength(std::size_t len)
+ {
+ assert(len <= m_Length);
+ m_Length = len;
+ }
+
+ char operator[](std::size_t i) const
+ {
+ return (*m_pBuffer)[m_Offset + i];
+ }
+
+ char Peek(std::size_t i = 0) const
+ {
+ return (*m_pBuffer)[m_Offset + i];
+ }
+
+ char PeekOr(std::size_t i = 0, char defaultValue = '\0') const
+ {
+ return i < m_Length ? (*m_pBuffer)[m_Offset + i] : defaultValue;
+ }
+
+ std::string ToString() const
+ {
+ return std::string(ToStringView());
+ }
+
+ std::string_view ToStringView() const
+ {
+ return std::string_view(m_pBuffer->m_Text.data() + m_Offset, m_Length);
+ }
+
+ void Advance(std::size_t count = 1)
+ {
+ assert(count <= m_Length);
+ m_Offset += count;
+ m_Length -= count;
+ }
+
+ FileLocationInfo GetLocationInfo(std::size_t i = 0) const
+ {
+ return m_pBuffer->GetLocationInfo(m_Offset + i);
+ }
+
+ private:
+ const TextBuffer* m_pBuffer;
+ std::size_t m_Offset;
+ std::size_t m_Length;
+ };
+
+ View GetView() const
+ {
+ return View(this, 0, m_Text.size());
+ }
+
+ View GetView(std::size_t offset, std::size_t length) const
+ {
+ return View(this, offset, length);
+ }
+
+ char operator[](std::size_t offset) const
+ {
+ return m_Text[offset];
+ }
+
+private:
+ struct PartLocationInfo
+ {
+ std::size_t Offset; // offset in m_Text
+ FileLocationInfo FirstLocation;
+ };
+
+ auto FindMatchingLocationInfo(std::size_t offset) const
+ {
+ auto it = std::upper_bound(
+ m_Locations.begin(),
+ m_Locations.end(),
+ offset,
+ [] (auto&& a, auto&& b)
+ {
+ return a < b.Offset;
+ }
+ );
+
+ assert(m_Locations.begin() < it);
+ return std::prev(it);
+ }
+
+public:
+ FileLocationInfo GetLocationInfo(std::size_t offset) const
+ {
+ auto it = FindMatchingLocationInfo(offset);
+ auto loc = it->FirstLocation;
+ loc.Line += std::count(m_Text.begin() + it->Offset, m_Text.begin() + offset, '\n');
+ return loc;
+ }
+
+ void Append(char c)
+ {
+ m_Text.append(1, c);
+ }
+
+ void Append(std::string_view s)
+ {
+ m_Text.append(s);
+ }
+
+ void Append(std::string_view str, FileLocationInfo loc)
+ {
+ PartLocationInfo partLoc{m_Text.size(), loc};
+ m_Text.append(str);
+ m_Locations.push_back(partLoc);
+ }
+
+ void Append(const View& view)
+ {
+ assert(this != view.m_pBuffer);
+ auto it = view.m_pBuffer->FindMatchingLocationInfo(view.m_Offset);
+ auto partLoc = *it;
+ std::ptrdiff_t offsetDiff = m_Text.size() - partLoc.Offset;
+ partLoc.Offset = m_Text.size();
+ partLoc.FirstLocation.Line += std::count(
+ view.m_pBuffer->m_Text.begin() + it->Offset,
+ view.m_pBuffer->m_Text.begin() + view.m_Offset,
+ '\n'
+ );
+
+ m_Locations.push_back(std::move(partLoc));
+
+ std::for_each(
+ std::next(it),
+ view.m_pBuffer->m_Locations.end(),
+ [this, offsetDiff] (auto&& x)
+ {
+ PartLocationInfo partLoc = std::forward<decltype(x)>(x);
+ partLoc.Offset += offsetDiff;
+ this->m_Locations.push_back(std::move(partLoc));
+ }
+ );
+
+ m_Text.append(view.m_pBuffer->m_Text, view.m_Offset, view.m_Length);
+ }
+
+private:
+ std::string m_Text;
+ std::vector<PartLocationInfo> m_Locations; // must be sorted according to Offset
+};
+
+class ParserException : public std::runtime_error
+{
+public:
+ ParserException(std::string_view fileName, std::uint32_t line, std::string_view msg)
+ : runtime_error(FormatErrorMessage(fileName, line, msg))
+ {
+ }
+
+private:
+ std::string FormatErrorMessage(std::string_view fileName, std::uint32_t line, std::string_view msg)
+ {
+ std::ostringstream oss;
+ oss << fileName << "(" << line << "): " << msg << "\n";
+ return oss.str();
+ }
+};
+
+class BasicParser
+{
+public:
+ bool EndOfInput(TextBuffer::View& view)
+ {
+ return view.IsEmpty();
+ }
+
+ bool EndOfLine(TextBuffer::View& view)
+ {
+ return Char(view, '\n') || String(view, "\r\n");
+ }
+
+ template<typename T>
+ bool CharIf(TextBuffer::View& view, T&& pred)
+ {
+ if (EndOfInput(view))
+ {
+ return false;
+ }
+
+ if (std::forward<T>(pred)(view.Peek()))
+ {
+ view.Advance();
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+ }
+
+ bool AnyChar(TextBuffer::View& view, char& c)
+ {
+ return CharIf(view, [&c] (char x) { c = x; return true; });
+ }
+
+ bool Char(TextBuffer::View& view, char c)
+ {
+ return CharIf(view, [c] (char x) { return c == x; });
+ }
+
+ bool CharSet(TextBuffer::View& view, std::string_view cs)
+ {
+ return CharIf(view, [cs] (char x) { return cs.find(x) != cs.npos; });
+ }
+
+ bool CharRange(TextBuffer::View& view, std::pair<char, char> cr)
+ {
+ return CharIf(view, [cr] (char x) { return cr.first <= x && x <= cr.second; });
+ }
+
+ bool String(TextBuffer::View& view, std::string_view str)
+ {
+ auto curView = view;
+
+ for (auto&& i : str)
+ {
+ if (!Char(curView, i))
+ {
+ return false;
+ }
+ }
+
+ view = curView;
+ return true;
+ }
+
+ bool WordStartChar(TextBuffer::View& view)
+ {
+ return CharIf(view, [] (char x) { return 'A' <= x && x <= 'Z' || 'a' <= x && x <= 'z' || x == '_'; });
+ }
+
+ bool WordContinueChar(TextBuffer::View& view)
+ {
+ return CharIf(
+ view,
+ [] (char x) { return 'A' <= x && x <= 'Z' || 'a' <= x && x <= 'z' || '0' <= x && x <= '9' || x == '_'; }
+ );
+ }
+
+ bool SpaceChar(TextBuffer::View& view)
+ {
+ return CharSet(view, " \t");
+ }
+
+ bool NonSpaceChar(TextBuffer::View& view)
+ {
+ return CharIf(view, [] (char x) { return x != ' ' && x != '\t' && x != '\r' && x != '\n'; });
+ }
+
+ bool AnyWord(TextBuffer::View& view, TextBuffer::View& word)
+ {
+ auto initView = view;
+
+ if (!WordStartChar(view))
+ {
+ return false;
+ }
+
+ while (WordContinueChar(view))
+ {
+ }
+
+ word = initView;
+ word.SetLength(initView.GetLength() - view.GetLength());
+ return true;
+ }
+
+ bool Word(TextBuffer::View& view, std::string_view word)
+ {
+ auto curView = view;
+
+ if (String(curView, word) && !WordContinueChar(curView))
+ {
+ view = curView;
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+ }
+
+ bool AnyCharSequence(TextBuffer::View& view, TextBuffer::View& seq)
+ {
+ auto initView = view;
+
+ if (!NonSpaceChar(view))
+ {
+ return false;
+ }
+
+ while (NonSpaceChar(view))
+ {
+ }
+
+ seq = initView;
+ seq.SetLength(initView.GetLength() - view.GetLength());
+ return true;
+ }
+
+ bool LineComment(TextBuffer::View& view)
+ {
+ if (String(view, "//"))
+ {
+ auto curView = view;
+
+ while (!EndOfInput(curView) && !EndOfLine(curView))
+ {
+ char c;
+ AnyChar(curView, c);
+ view = curView;
+ }
+
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+ }
+
+ bool BlockComment(TextBuffer::View& view)
+ {
+ if (String(view, "/*"))
+ {
+ while (true)
+ {
+ if (String(view, "*/"))
+ {
+ break;
+ }
+ else if (EndOfInput(view))
+ {
+ // TODO: warn unterminated block comment
+ break;
+ }
+ else
+ {
+ char c;
+ AnyChar(view, c);
+ }
+ }
+
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+ }
+
+ bool DoSkips(TextBuffer::View& view)
+ {
+ if (!LineComment(view) && !BlockComment(view) && !SpaceChar(view))
+ {
+ return false;
+ }
+
+ while (LineComment(view) || BlockComment(view) || SpaceChar(view))
+ {
+ }
+
+ return true;
+ }
+
+ bool DoSkipsNL(TextBuffer::View& view)
+ {
+ if (!LineComment(view) && !BlockComment(view) && !SpaceChar(view) && !EndOfLine(view))
+ {
+ return false;
+ }
+
+ while (LineComment(view) || BlockComment(view) || SpaceChar(view) || EndOfLine(view))
+ {
+ }
+
+ return true;
+ }
+
+ bool Integer(TextBuffer::View& view, std::int32_t& n)
+ {
+ auto curView = view;
+ Char(curView, '-');
+
+ if (CharRange(curView, {'0', '9'}))
+ {
+ while (CharRange(curView, {'0', '9'}))
+ {
+ }
+
+ auto intView = view;
+ intView.SetLength(view.GetLength() - curView.GetLength());
+ n = std::stoi(intView.ToString());
+ view = curView;
+ return true;
+ }
+
+ return false;
+ }
+
+ bool DoubleQuoteStringNoEscape(TextBuffer::View& view, TextBuffer::View& str)
+ {
+ auto curView = view;
+
+ if (Char(curView, '"'))
+ {
+ auto startView = curView;
+ auto endView = startView;
+
+ while (true)
+ {
+ if (EndOfInput(curView) || EndOfLine(curView))
+ {
+ // TODO: warn unterminated string literal
+ break;
+ }
+ else if (Char(curView, '"'))
+ {
+ view = curView;
+ break;
+ }
+ else
+ {
+ char c;
+ AnyChar(curView, c);
+ view = curView;
+ endView = curView;
+ }
+ }
+
+ str = startView;
+ str.SetLength(str.GetLength() - endView.GetLength());
+ return true;
+ }
+
+ return false;
+ }
+};
+
+class Preprocessor : private BasicParser
+{
+public:
+ explicit Preprocessor(std::string url)
+ : m_FileNames{url}, m_InBuffers{TextBuffer(ReadEntireFile(url), FileLocationInfo{0, 1})}
+ {
+ m_InputStack.push({m_InBuffers[0].GetView(), false});
+ }
+
+ void Preprocess()
+ {
+ while (true)
+ {
+ while (!m_InputStack.empty() && m_InputStack.top().View.IsEmpty())
+ {
+ m_InputStack.pop();
+ }
+
+ if (m_InputStack.empty())
+ {
+ break;
+ }
+
+ auto& curView = m_InputStack.top().View;
+ auto initView = curView;
+ DoSkips(curView);
+
+ if (!m_InputStack.top().StartsAtMiddle)
+ {
+ if (Word(curView, "#define"))
+ {
+ DoSkips(curView);
+
+ if (!Char(curView, '$'))
+ {
+ throw ParserException(
+ m_FileNames[curView.GetLocationInfo().FileID],
+ curView.GetLocationInfo().Line,
+ "'#define': expected '$'"
+ );
+ }
+
+ TextBuffer::View nameView;
+ if (!AnyWord(curView, nameView))
+ {
+ throw ParserException(
+ m_FileNames[curView.GetLocationInfo().FileID],
+ curView.GetLocationInfo().Line,
+ "'#define': expected macro name"
+ );
+ }
+
+ DoSkips(curView);
+ auto macroDefView = curView;
+ auto macroDefEndView = macroDefView;
+
+ while (true)
+ {
+ DoSkips(curView);
+
+ TextBuffer::View seq;
+ if (AnyCharSequence(curView, seq))
+ {
+ macroDefEndView = curView;
+ }
+ else if (EndOfInput(curView) || EndOfLine(curView))
+ {
+ break;
+ }
+ else
+ {
+ assert(false);
+ break;
+ }
+ }
+
+ macroDefView.SetLength(macroDefView.GetLength() - macroDefEndView.GetLength());
+ if (m_DefinedMacros.insert_or_assign(nameView.ToString(), macroDefView).second)
+ {
+ // TODO: warn macro redefinition
+ }
+
+ continue;
+ }
+ else if (Word(curView, "#include"))
+ {
+ DoSkips(curView);
+
+ TextBuffer::View pathView;
+ if (!DoubleQuoteStringNoEscape(curView, pathView))
+ {
+ throw ParserException(
+ m_FileNames[curView.GetLocationInfo().FileID],
+ curView.GetLocationInfo().Line,
+ "'#include': expected file name"
+ );
+ }
+
+ DoSkips(curView);
+
+ if (!EndOfInput(curView) && !EndOfLine(curView))
+ {
+ throw ParserException(
+ m_FileNames[curView.GetLocationInfo().FileID],
+ curView.GetLocationInfo().Line,
+ "'#include': unexpected characters after file name"
+ );
+ }
+
+ std::string path = ConstructPath(
+ m_FileNames[pathView.GetLocationInfo().FileID],
+ pathView.ToStringView()
+ );
+ m_FileNames.push_back(path);
+ auto& newBuf = m_InBuffers.emplace_back(
+ ReadEntireFile(path.data()),
+ FileLocationInfo{m_FileNames.size() - 1, 1}
+ );
+ m_InputStack.push({newBuf.GetView(), false});
+ continue;
+ }
+ }
+
+ auto skipView = initView;
+ skipView.SetLength(initView.GetLength() - curView.GetLength());
+ m_OutBuffer.Append(skipView);
+
+ if (Char(curView, '$'))
+ {
+ TextBuffer::View nameView;
+ if (!AnyWord(curView, nameView))
+ {
+ throw ParserException(
+ m_FileNames[curView.GetLocationInfo().FileID],
+ curView.GetLocationInfo().Line,
+ "expected macro name after '$'"
+ );
+ }
+
+ auto it = m_DefinedMacros.find(nameView.ToString());
+ if (it == m_DefinedMacros.end())
+ {
+ throw ParserException(
+ m_FileNames[curView.GetLocationInfo().FileID],
+ curView.GetLocationInfo().Line,
+ "macro '$"s.append(nameView.ToStringView()).append("' is not defined")
+ );
+ }
+
+ m_InputStack.push({it->second, true});
+ }
+ else if (TextBuffer::View word; AnyWord(curView, word))
+ {
+ m_OutBuffer.Append(word);
+ }
+ else if (char c; AnyChar(curView, c))
+ {
+ m_OutBuffer.Append(c);
+ }
+ }
+ }
+
+ std::string_view GetFileNameFromID(std::size_t id) const
+ {
+ return m_FileNames[id];
+ }
+
+ TextBuffer& GetOutBuffer()
+ {
+ return m_OutBuffer;
+ }
+
+ const TextBuffer& GetOutBuffer() const
+ {
+ return m_OutBuffer;
+ }
+
+private:
+ struct InputStackItem
+ {
+ TextBuffer::View View;
+ bool StartsAtMiddle; // true for macro expansion results, false for main and #include'd files
+ };
+
+ std::vector<std::string> m_FileNames;
+ std::vector<TextBuffer> m_InBuffers;
+ std::stack<InputStackItem, std::vector<InputStackItem>> m_InputStack;
+ TextBuffer m_OutBuffer;
+ std::unordered_map<std::string, TextBuffer::View> m_DefinedMacros;
+};
+
+enum class OpCodeKind
+{
+ HiKey,
+ HiVelocity,
+ LoKey,
+ LoopEnd,
+ LoopMode,
+ LoopStart,
+ LoVelocity,
+ PitchKeyCenter,
+ Sample
+};
+
+enum class LoopModeKind
+{
+ NoLoop,
+ OneShot,
+ LoopContinuous,
+ LoopSustain
+};
+
+struct OpCodeAndValue
+{
+ FileLocationInfo Location;
+ OpCodeKind OpCode;
+ std::variant<std::int32_t, double, LoopModeKind, std::string> Value;
+};
+
+enum class HeaderKind
+{
+ Control,
+ Global,
+ Group,
+ Region
+};
+
+struct Section
+{
+ template<typename T>
+ std::optional<T> GetAs(OpCodeKind opCode) const
+ {
+ // search in reverse order
+ auto it = std::find_if(OpCodes.rbegin(), OpCodes.rend(), [opCode] (auto&& x) { return x.OpCode == opCode; });
+
+ if (it == OpCodes.rend())
+ {
+ return std::nullopt;
+ }
+
+ const T* pValue = std::get_if<T>(&it->Value);
+
+ if (!pValue)
+ {
+ return std::nullopt;
+ }
+
+ return std::make_optional(*pValue);
+ }
+
+ FileLocationInfo HeaderLocation;
+ HeaderKind Header;
+ std::vector<OpCodeAndValue> OpCodes;
+};
+
+class Parser : private BasicParser
+{
+public:
+ explicit Parser(Preprocessor& pp) : m_Preprocessor(pp)
+ {
+ }
+
+ Preprocessor& GetPreprocessor()
+ {
+ return m_Preprocessor;
+ }
+
+ const std::vector<Section>& GetSections() const
+ {
+ return m_Sections;
+ }
+
+ void Parse()
+ {
+ auto view = m_Preprocessor.GetOutBuffer().GetView();
+
+ while (!view.IsEmpty())
+ {
+ DoSkipsNL(view);
+ Section sec;
+ sec.HeaderLocation = view.GetLocationInfo();
+
+ if (!ParseHeader(view, sec.Header))
+ {
+ throw ParserException(
+ m_Preprocessor.GetFileNameFromID(view.GetLocationInfo().FileID),
+ view.GetLocationInfo().Line,
+ "expected section header"
+ );
+ }
+
+ while (true)
+ {
+ DoSkipsNL(view);
+ OpCodeAndValue opVal;
+ opVal.Location = view.GetLocationInfo();
+
+ if (ParseOpCode(view, opVal.OpCode))
+ {
+ TextBuffer::View valView;
+ if (ParseValueString(view, valView))
+ {
+ switch (opVal.OpCode)
+ {
+ case OpCodeKind::HiKey:
+ case OpCodeKind::LoKey:
+ case OpCodeKind::PitchKeyCenter:
+ if (std::int32_t n; ParseMIDINoteNumber(valView, n))
+ {
+ opVal.Value = n;
+ }
+ else
+ {
+ throw ParserException(
+ m_Preprocessor.GetFileNameFromID(valView.GetLocationInfo().FileID),
+ valView.GetLocationInfo().Line,
+ "expected MIDI note number"
+ );
+ }
+ break;
+
+ case OpCodeKind::HiVelocity:
+ case OpCodeKind::LoopEnd:
+ case OpCodeKind::LoopStart:
+ case OpCodeKind::LoVelocity:
+ try
+ {
+ opVal.Value = std::stod(valView.ToString());
+ }
+ catch (const std::invalid_argument&)
+ {
+ throw ParserException(
+ m_Preprocessor.GetFileNameFromID(valView.GetLocationInfo().FileID),
+ valView.GetLocationInfo().Line,
+ "expected number"
+ );
+ }
+ catch (const std::out_of_range&)
+ {
+ throw ParserException(
+ m_Preprocessor.GetFileNameFromID(valView.GetLocationInfo().FileID),
+ valView.GetLocationInfo().Line,
+ "overflow error in float literal"
+ );
+ }
+ break;
+
+ case OpCodeKind::LoopMode:
+ opVal.Value = GetLoopModeKind(valView);
+ break;
+
+ default:
+ opVal.Value = valView.ToString();
+ break;
+ }
+
+ sec.OpCodes.push_back(std::move(opVal));
+ }
+ else
+ {
+ assert(false);
+ }
+ }
+ else
+ {
+ m_Sections.push_back(std::move(sec));
+ break;
+ }
+ }
+
+ }
+ }
+
+private:
+ bool ParseHeader(TextBuffer::View& view, HeaderKind& kind)
+ {
+ if (!Char(view, '<'))
+ {
+ return false;
+ }
+
+ TextBuffer::View word;
+ if (!AnyWord(view, word))
+ {
+ throw ParserException(
+ m_Preprocessor.GetFileNameFromID(view.GetLocationInfo().FileID),
+ view.GetLocationInfo().Line,
+ "expected header name"
+ );
+ }
+
+ if (!Char(view, '>'))
+ {
+ throw ParserException(
+ m_Preprocessor.GetFileNameFromID(view.GetLocationInfo().FileID),
+ view.GetLocationInfo().Line,
+ "expected '>'"
+ );
+ }
+
+ static const std::unordered_map<std::string_view, HeaderKind> HeaderMap{
+ {"control"sv, HeaderKind::Control},
+ {"global"sv, HeaderKind::Global},
+ {"group"sv, HeaderKind::Group},
+ {"region"sv, HeaderKind::Region}
+ };
+
+ auto it = HeaderMap.find(word.ToStringView());
+
+ if (it == HeaderMap.end())
+ {
+ throw ParserException(
+ m_Preprocessor.GetFileNameFromID(word.GetLocationInfo().FileID),
+ word.GetLocationInfo().Line,
+ "unknown header <"s.append(word.ToStringView()).append(">")
+ );
+ }
+
+ kind = it->second;
+ return true;
+ }
+
+ bool ParseOpCode(TextBuffer::View& view, OpCodeKind& op)
+ {
+ auto curView = view;
+
+ TextBuffer::View word;
+ if (!AnyWord(curView, word))
+ {
+ return false;
+ }
+
+ DoSkips(curView);
+
+ if (!Char(curView, '='))
+ {
+ return false;
+ }
+
+ static const std::unordered_map<std::string_view, OpCodeKind> OpCodeMap{
+ {"hikey"sv, OpCodeKind::HiKey},
+ {"hivel"sv, OpCodeKind::HiVelocity},
+ {"lokey"sv, OpCodeKind::LoKey},
+ {"loop_end"sv, OpCodeKind::LoopEnd},
+ {"loop_mode"sv, OpCodeKind::LoopMode},
+ {"loop_start"sv, OpCodeKind::LoopStart},
+ {"lovel"sv, OpCodeKind::LoVelocity},
+ {"pitch_keycenter"sv, OpCodeKind::PitchKeyCenter},
+ {"sample"sv, OpCodeKind::Sample}
+ };
+
+ auto it = OpCodeMap.find(word.ToStringView());
+
+ if (it == OpCodeMap.end())
+ {
+ throw ParserException(
+ m_Preprocessor.GetFileNameFromID(word.GetLocationInfo().FileID),
+ word.GetLocationInfo().Line,
+ "unknown opcode '"s.append(word.ToStringView()).append("'")
+ );
+ }
+
+ op = it->second;
+ view = curView;
+ return true;
+ }
+
+ bool ParseValueString(TextBuffer::View& view, TextBuffer::View& value)
+ {
+ auto curView = view;
+
+ while (SpaceChar(curView))
+ {
+ }
+
+ auto startView = curView;
+ auto endView = startView;
+
+ while (true)
+ {
+ while (SpaceChar(curView))
+ {
+ }
+
+ if (EndOfInput(curView) || EndOfLine(curView) || LineComment(curView) || BlockComment(curView))
+ {
+ break;
+ }
+ else if (Char(curView, '<'))
+ {
+ break;
+ }
+ else if (OpCodeKind op; ParseOpCode(curView, op))
+ {
+ break;
+ }
+ else if (TextBuffer::View seq; AnyCharSequence(curView, seq))
+ {
+ endView = curView;
+ }
+ else
+ {
+ assert(false);
+ }
+ }
+
+ view = endView;
+ value = startView;
+ value.SetLength(startView.GetLength() - endView.GetLength());
+ return true;
+ }
+
+ bool ParseMIDINoteNumber(TextBuffer::View& view, std::int32_t& n)
+ {
+ if (Integer(view, n))
+ {
+ return true;
+ }
+
+ auto pred = [&n] (char x)
+ {
+ switch (x)
+ {
+ case 'C':
+ case 'c':
+ n = 0;
+ return true;
+
+ case 'D':
+ case 'd':
+ n = 2;
+ return true;
+
+ case 'E':
+ case 'e':
+ n = 4;
+ return true;
+
+ case 'F':
+ case 'f':
+ n = 5;
+ return true;
+
+ case 'G':
+ case 'g':
+ n = 7;
+ return true;
+
+ case 'A':
+ case 'a':
+ n = 9;
+ return true;
+
+ case 'B':
+ case 'b':
+ n = 11;
+ return true;
+
+ default:
+ return false;
+ }
+ };
+
+ if (!CharIf(view, pred))
+ {
+ return false;
+ }
+
+ if (Char(view, '#'))
+ {
+ n++;
+ }
+
+ std::int32_t oct;
+ if (!Integer(view, oct))
+ {
+ throw ParserException(
+ m_Preprocessor.GetFileNameFromID(view.GetLocationInfo().FileID),
+ view.GetLocationInfo().Line,
+ "expected octave number"
+ );
+ }
+
+ n += oct * 12;
+ return true;
+ }
+
+ LoopModeKind GetLoopModeKind(TextBuffer::View view)
+ {
+ auto curView = view;
+ if (TextBuffer::View word; AnyWord(curView, word))
+ {
+ static const std::unordered_map<std::string_view, LoopModeKind> LoopModeKindMap{
+ {"no_loop"sv, LoopModeKind::NoLoop},
+ {"one_shot"sv, LoopModeKind::OneShot},
+ {"loop_continuous"sv, LoopModeKind::LoopContinuous},
+ {"loop_sustain"sv, LoopModeKind::LoopSustain}
+ };
+
+ auto it = LoopModeKindMap.find(word.ToStringView());
+
+ if (it != LoopModeKindMap.end())
+ {
+ return it->second;
+ }
+ }
+
+ throw ParserException(
+ m_Preprocessor.GetFileNameFromID(view.GetLocationInfo().FileID),
+ view.GetLocationInfo().Line,
+ "unknown loop_mode '"s.append(view.ToStringView()).append("'")
+ );
+ }
+
+ Preprocessor& m_Preprocessor;
+ std::vector<Section> m_Sections;
+};
+
+class InstrumentBuilder
+{
+public:
+ InstrumentBuilder(Parser& parser, std::string_view name) : m_Parser(parser), m_Name(name)
+ {
+ }
+
+ std::unique_ptr<Instrument, InstrumentDeleter> BuildInstrument()
+ {
+ auto flatSections = FlattenSections(m_Parser.GetSections());
+ std::unique_ptr<Instrument, InstrumentDeleter> pInstrument(reinterpret_cast<Instrument*>(safe_calloc(sizeof(Instrument), 1)));
+ pInstrument->type = INST_SFZ;
+ pInstrument->instname = safe_strdup(m_Name.c_str());
+
+ std::vector<std::unique_ptr<Instrument, InstrumentDeleter>> sampleInstruments;
+ sampleInstruments.reserve(flatSections.size());
+
+ for (auto&& i : flatSections)
+ {
+ sampleInstruments.push_back(BuildSample(i));
+ }
+
+ pInstrument->samples = std::accumulate(
+ sampleInstruments.begin(),
+ sampleInstruments.end(),
+ 0,
+ [] (auto&& a, auto&& b)
+ {
+ return a + b->samples;
+ }
+ );
+
+ pInstrument->sample = reinterpret_cast<Sample*>(safe_calloc(sizeof(Sample), pInstrument->samples));
+ Sample* pCurrentSample = pInstrument->sample;
+
+ for (auto&& i : sampleInstruments)
+ {
+ pCurrentSample = std::copy_n(i->sample, i->samples, pCurrentSample);
+ std::for_each(i->sample, i->sample + i->samples, [] (auto&& x) { x.data_alloced = false; });
+ }
+
+ return pInstrument;
+ }
+
+private:
+ std::unique_ptr<Instrument, InstrumentDeleter> BuildSample(const Section& flatSection)
+ {
+ if (auto sampleName = flatSection.GetAs<std::string>(OpCodeKind::Sample))
+ {
+ auto pSampleInstrument = BuildSingleSampleInstrument(*sampleName);
+
+ for (auto&& i : flatSection.OpCodes)
+ {
+ for (std::size_t j = 0; j < pSampleInstrument->samples; j++)
+ {
+ auto pSample = &pSampleInstrument->sample[j];
+
+ switch (i.OpCode)
+ {
+ case OpCodeKind::HiKey:
+ pSample->high_key = static_cast<int8>(std::get<std::int32_t>(i.Value));
+ break;
+
+ case OpCodeKind::HiVelocity:
+ pSample->high_vel = static_cast<uint8>(std::get<double>(i.Value));
+ break;
+
+ case OpCodeKind::LoKey:
+ pSample->low_key = static_cast<int8>(std::get<std::int32_t>(i.Value));
+ break;
+
+ case OpCodeKind::LoopEnd:
+ pSample->loop_end = static_cast<splen_t>(std::get<double>(i.Value)) << FRACTION_BITS;
+ break;
+
+ case OpCodeKind::LoopMode:
+ pSample->modes &= ~(MODES_LOOPING | MODES_PINGPONG | MODES_REVERSE | MODES_SUSTAIN);
+
+ switch (std::get<LoopModeKind>(i.Value))
+ {
+ case LoopModeKind::NoLoop:
+ break;
+
+ case LoopModeKind::OneShot:
+ // ???
+ break;
+
+ case LoopModeKind::LoopContinuous:
+ pSample->modes |= MODES_LOOPING | MODES_SUSTAIN;
+ break;
+
+ case LoopModeKind::LoopSustain:
+ pSample->modes |= MODES_LOOPING | MODES_SUSTAIN | MODES_RELEASE;
+ break;
+ }
+ break;
+
+ case OpCodeKind::LoopStart:
+ pSample->loop_start = static_cast<splen_t>(std::get<double>(i.Value)) << FRACTION_BITS;
+ break;
+
+ case OpCodeKind::LoVelocity:
+ pSample->low_vel = static_cast<uint8>(std::get<double>(i.Value));
+ break;
+
+ case OpCodeKind::PitchKeyCenter:
+ pSample->root_key = static_cast<int8>(std::get<std::int32_t>(i.Value));
+ pSample->root_freq = ::freq_table[pSample->root_key];
+ break;
+
+ case OpCodeKind::Sample:
+ break;
+ }
+ }
+ }
+
+ return pSampleInstrument;
+ }
+ else
+ {
+ throw ParserException(
+ m_Parser.GetPreprocessor().GetFileNameFromID(flatSection.HeaderLocation.FileID),
+ flatSection.HeaderLocation.Line,
+ "no sample specified for region"
+ );
+ }
+ }
+
+ std::unique_ptr<Instrument, InstrumentDeleter> BuildSingleSampleInstrument(std::string sampleUrl)
+ {
+ std::unique_ptr<Instrument, InstrumentDeleter> pInstrument(::extract_sample_file(sampleUrl.data()));
+
+ if (!pInstrument)
+ {
+ throw std::runtime_error("unable to load sample '"s + sampleUrl + "'");
+ }
+
+ return pInstrument;
+ }
+
+ std::vector<Section> FlattenSections(const std::vector<Section>& sections)
+ {
+ std::vector<Section> flatSections;
+ std::vector<OpCodeAndValue> controlOpCodes;
+ std::vector<OpCodeAndValue> globalOpCodes;
+ std::vector<OpCodeAndValue> groupOpCodes;
+
+ for (auto&& i : sections)
+ {
+ switch (i.Header)
+ {
+ case HeaderKind::Control:
+ controlOpCodes.insert(controlOpCodes.end(), i.OpCodes.begin(), i.OpCodes.end());
+ break;
+
+ case HeaderKind::Global:
+ globalOpCodes.insert(globalOpCodes.end(), i.OpCodes.begin(), i.OpCodes.end());
+ break;
+
+ case HeaderKind::Group:
+ groupOpCodes = i.OpCodes;
+ break;
+
+ case HeaderKind::Region:
+ auto& newSection = flatSections.emplace_back();
+ newSection.Header = i.Header;
+ newSection.HeaderLocation = i.HeaderLocation;
+ auto& opCodes = newSection.OpCodes;
+ opCodes.clear();
+ opCodes.reserve(controlOpCodes.size() + globalOpCodes.size() + groupOpCodes.size() + i.OpCodes.size());
+ opCodes.insert(opCodes.end(), controlOpCodes.begin(), controlOpCodes.end());
+ opCodes.insert(opCodes.end(), globalOpCodes.begin(), globalOpCodes.end());
+ opCodes.insert(opCodes.end(), groupOpCodes.begin(), groupOpCodes.end());
+ opCodes.insert(opCodes.end(), i.OpCodes.begin(), i.OpCodes.end());
+ break;
+ }
+ }
+
+ return flatSections;
+ }
+
+ Parser& m_Parser;
+ std::string m_Name;
+};
+
+struct InstrumentCacheEntry
+{
+ InstrumentCacheEntry(std::string_view filePath, std::unique_ptr<Instrument, InstrumentDeleter> pInstrument)
+ : FilePath(filePath), pInstrument(std::move(pInstrument))
+ {
+ }
+
+ std::string FilePath;
+ std::unique_ptr<Instrument, InstrumentDeleter> pInstrument;
+ std::vector<Instrument*> RefInstruments;
+};
+
+class InstrumentCache
+{
+public:
+ Instrument* LoadSFZ(std::string filePath)
+ {
+ auto it = std::find_if(
+ m_Instruments.begin(),
+ m_Instruments.end(),
+ [&filePath] (auto&& x)
+ {
+ return x.FilePath == filePath;
+ }
+ );
+
+ if (it == m_Instruments.end())
+ {
+ try
+ {
+ TimSFZ::Preprocessor pp(filePath);
+ pp.Preprocess();
+ TimSFZ::Parser parser(pp);
+ parser.Parse();
+ TimSFZ::InstrumentBuilder builder(parser, filePath);
+ m_Instruments.emplace_back(filePath, builder.BuildInstrument());
+ }
+ catch (const std::exception& e)
+ {
+ char str[] = "%s";
+ ctl->cmsg(CMSG_ERROR, VERB_NORMAL, str, e.what());
+ return nullptr;
+ }
+
+ it = std::prev(m_Instruments.end());
+ }
+
+ std::unique_ptr<Instrument, InstrumentDeleter> pInstRef(reinterpret_cast<Instrument*>(safe_calloc(sizeof(Instrument), 1)));
+ it->RefInstruments.push_back(pInstRef.get());
+ pInstRef->type = it->pInstrument->type;
+ pInstRef->instname = safe_strdup(it->pInstrument->instname);
+ pInstRef->samples = it->pInstrument->samples;
+ pInstRef->sample = reinterpret_cast<Sample*>(safe_calloc(sizeof(Sample), it->pInstrument->samples));
+ std::copy_n(it->pInstrument->sample, it->pInstrument->samples, pInstRef->sample);
+ std::for_each(pInstRef->sample, pInstRef->sample + pInstRef->samples, [] (auto&& x) { x.data_alloced = false; });
+
+ return pInstRef.release();
+ }
+
+ void FreeInstrument(Instrument* pInstrument)
+ {
+ safe_free(pInstrument->instname);
+ pInstrument->instname = nullptr;
+
+ auto it = std::find_if(
+ m_Instruments.begin(),
+ m_Instruments.end(),
+ [pInstrument] (auto&& x)
+ {
+ auto it = std::find(x.RefInstruments.begin(), x.RefInstruments.end(), pInstrument);
+ return it != x.RefInstruments.end();
+ }
+ );
+
+ if (it != m_Instruments.end())
+ {
+ it->RefInstruments.erase(std::find(it->RefInstruments.begin(), it->RefInstruments.end(), pInstrument));
+
+ if (it->RefInstruments.empty())
+ {
+ m_Instruments.erase(it);
+ }
+ }
+ }
+
+ void FreeAll()
+ {
+ m_Instruments.clear();
+ }
+
+private:
+ std::vector<InstrumentCacheEntry> m_Instruments;
+};
+
+InstrumentCache GlobalInstrumentCache;
+
+} // namespace TimSFZ
+
+extern "C"
+{
+
+// THis is no-op for now, but may be used in the future.
+void init_sfz(void)
+{
+}
+
+void free_sfz(void)
+{
+ TimSFZ::GlobalInstrumentCache.FreeAll();
+}
+
+Instrument *extract_sfz_file(char *sample_file)
+{
+ return TimSFZ::GlobalInstrumentCache.LoadSFZ(sample_file);
+}
+
+void free_sfz_file(Instrument *ip)
+{
+ TimSFZ::GlobalInstrumentCache.FreeInstrument(ip);
+}
+
+} // extern "C"
--- /dev/null
+// SFZ Support Routines for TiMidity++
+// Copyright (c) 2018 Starg <https://osdn.net/projects/timidity41>
+
+#pragma once
+
+#ifdef ENABLE_SFZ
+
+#include "instrum.h"
+
+void init_sfz(void);
+void free_sfz(void);
+Instrument *extract_sfz_file(char *sample_file);
+void free_sfz_file(Instrument *ip);
+
+#endif /* ENABLE_SFZ */
{
uint8 modes;
int32 sample_rate, root_freq;
- uint32 loopStart = 0, loopEnd = 0;
+ splen_t loopStart = 0, loopEnd = 0;
sample_rate = samplerc.dwSamplePeriod == 0 ? 0 : 1000000000L / samplerc.dwSamplePeriod;
root_freq = freq_table[samplerc.dwMIDIUnityNote];
const uint8 loopModes[] = { MODES_LOOPING, MODES_LOOPING | MODES_PINGPONG, MODES_LOOPING | MODES_REVERSE };
modes = loopModes[samplerc.loopType];
- loopStart = samplerc.loop_dwStart << FRACTION_BITS;
- loopEnd = samplerc.loop_dwEnd << FRACTION_BITS;
+ loopStart = (splen_t)samplerc.loop_dwStart << FRACTION_BITS;
+ loopEnd = (splen_t)samplerc.loop_dwEnd << FRACTION_BITS;
}
else
modes = 0;
sample = &inst->sample[i];
sample->data_alloced = 0;
sample->loop_start = 0;
- sample->loop_end = sample->data_length = frames << FRACTION_BITS;
+ sample->loop_end = sample->data_length = (splen_t)frames << FRACTION_BITS;
sample->sample_rate = sample_rate;
sample->low_key = 0;
sample->high_key = 127;
tf = sp->sfrom ? sfrom_sfrec->tf : rec->tf; ///r
#if defined(SF2_24BIT) && (defined(DATA_T_DOUBLE) || defined(DATA_T_FLOAT))
+#if 1 /* SF2_24BIT_SAMPLE_TYPE_FLOAT */
+ if(sp->lowbit > 0 ){
+ /* 24 bit */
+ splen_t cnt;
+ uint8 *lowbit;
+ uint16 *highbit;
+ float *tmp_data;
+
+ frames = divi_2(sp->len);
+ sample->data = (sample_t*)safe_large_malloc(sizeof(float) * (frames + 128));
+ sample->data_alloced = 1;
+ sample->data_type = SAMPLE_TYPE_FLOAT;
+ highbit = (uint16 *)safe_large_malloc(sizeof(int16) * frames); // 16bit
+ lowbit = (uint8 *)safe_large_malloc(sizeof(int8) * frames); // 8bit
+ tf_seek(tf, sp->start, SEEK_SET);
+ tf_read(highbit, sp->len, 1, tf);
+ tf_seek(tf, sp->lowbit, SEEK_SET);
+ tf_read(lowbit, frames, 1, tf);
+ tmp_data = (float *)sample->data;
+ for(j = 0; j < frames; j++) {
+ // 24bit to int32full
+ int32 tmp_i = 0; // 1byte 00\82Å\82¢\82¢\82ç\82µ\82¢\81H
+ tmp_i |= (uint32)lowbit[j] << 8; // 2byte
+ tmp_i |= (uint32)highbit[j] << 16; // 3-4byte
+#ifndef LITTLE_ENDIAN
+ XCHG_LONG(tmp_i)
+#endif
+ tmp_data[j] = (float)tmp_i * DIV_31BIT;
+ }
+ safe_free(highbit);
+ safe_free(lowbit);
+ /* set a small blank loop at the tail for avoiding abnormal loop. */
+ memset(&tmp_data[frames], 0, sizeof(float) * 128);
+ if (antialiasing_allowed)
+ antialiasing_float((float *)sample->data, sample->data_length >> FRACTION_BITS, sample->sample_rate, play_mode->rate);
+ }else
+#else /* SF2_24BIT_SAMPLE_TYPE_INT32 */
if(sp->lowbit > 0 ){
/* 24 bit */
splen_t cnt;
sample->data = (sample_t*)safe_large_malloc(sizeof(int32) * (frames + 128));
sample->data_alloced = 1;
sample->data_type = SAMPLE_TYPE_INT32;
-
highbit = (uint16 *)safe_large_malloc(sizeof(int16) * frames); // 16bit
- lowbit = (uint8 *)safe_large_malloc(sizeof(int8) * frames); // 8bit
-
+ lowbit = (uint8 *)safe_large_malloc(sizeof(int8) * frames); // 8bit
tf_seek(tf, sp->start, SEEK_SET);
tf_read(highbit, sp->len, 1, tf);
tf_seek(tf, sp->lowbit, SEEK_SET);
tf_read(lowbit, frames, 1, tf);
-
tmp_data = (uint32 *)sample->data;
for(j = 0; j < frames; j++) {
// 24bit to int32full
}
safe_free(highbit);
safe_free(lowbit);
-
/* set a small blank loop at the tail for avoiding abnormal loop. */
// tmp_data[frames] = tmp_data[frames + 1] = tmp_data[frames + 2] = 0;
memset(&tmp_data[frames], 0, sizeof(int32) * 128);
-
if (antialiasing_allowed)
antialiasing_int32((int32 *)sample->data, sample->data_length >> FRACTION_BITS, sample->sample_rate, play_mode->rate);
-
}else
-#endif
+#endif /* SF2_24BIT_SAMPLE_TYPE_FLOAT */
+#endif /* defined(SF2_24BIT) && (defined(DATA_T_DOUBLE) || defined(DATA_T_FLOAT)) */
{
/* 16 bit */
frames = divi_2(sp->len);
vp->v.loop_end = vp->len + 1;
if (vp->v.loop_start > vp->len)
vp->v.loop_start = vp->len;
+ if (vp->v.loop_start < 0)
+ vp->v.loop_start = 0;
if (vp->v.loop_start >= vp->v.loop_end)
{
vp->v.loop_start = vp->len;
NULL
};
PlayMode *play_mode = &dpm;
+int free_instruments_afterwards = 1;
+int compute_buffer_size;
#ifndef CFG_FOR_SF_SUPPORT_FFT
int32 freq_table[1];
FLOAT_T bend_fine[1];
OVERRIDETIMIDITYDATA otd = {0};
#if defined(__W32__)
+
+#include <windows.h>
+
#if defined(WINDRV) || defined(WINDRV_SETUP)
void timdrvOverrideSFSettingLoad(void)
#include <sys/types.h>
#endif
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
#include <stdio.h>
/* Architectures */
#if defined(IX86CPU) && (defined(_MSC_VER) || defined(__POCC__) || \
defined(__BORLANDC__) || defined(__WATCOMC__))
#define CALLINGCONV __fastcall
-#elif defined(IX86CPU) && defined(__GNUC__)
+#elif defined(IX86CPU) && !defined(AMD64CPU) && defined(__GNUC__)
#define CALLINGCONV __attribute__((fastcall))
#else
#define CALLINGCONV /**/
#define restrict /* not C99 */
#endif /* !restrict */
+#ifndef TIMIDITY_FORCEINLINE
+#ifdef __GNUC__
+#define TIMIDITY_FORCEINLINE __attribute__((__always_inline__))
+#elif defined(_MSC_VER)
+#define TIMIDITY_FORCEINLINE __forceinline
+#else
+#define TIMIDITY_FORCEINLINE inline
+#endif
+#endif /* TIMIDITY_FORCEINLINE */
/* The size of the internal buffer is 2^AUDIO_BUFFER_BITS samples.
This determines maximum number of samples ever computed in a row.
#ifdef __MINGW32__
#define aligned_malloc __mingw_aligned_malloc
#define aligned_free __mingw_aligned_free
-#elif __STDC_VERSION__ >= 201112L
-#define aligned_malloc(s,a) aligned_alloc(a,s)
-#define aligned_free free
-//#elif _POSIX_VERSION >= 200112L
-//#define aligned_malloc(s,a) posix_memalign(,a,s)
+/* aligned_malloc is unsafe because s must be a multiple of a */
+//#elif __STDC_VERSION__ >= 201112L
+//#define aligned_malloc(s,a) aligned_malloc(a,s)
//#define aligned_free free
+#elif defined(__GNUC__) && _POSIX_VERSION >= 200112L
+#define aligned_malloc(s,a) ({void *ptr; if(!s || posix_memalign(&ptr,a,s)) ptr = NULL; ptr;})
+#define aligned_free free
#elif _MSC_VER
#define aligned_malloc _aligned_malloc
#define aligned_free _aligned_free
switch(*chorus_status_sd.output_select){
case 0: // main
for(i = 0; i < count; i += 8){
- MM256_LS_ADD_PD(&buf[i], _mm256_load_pd(&chorus_effect_buffer_sub[cdmt_buf_o][i]), cho_level);
+ MM256_LS_FMA_PD(&buf[i], _mm256_load_pd(&chorus_effect_buffer_sub[cdmt_buf_o][i]), cho_level);
MM256_LS_FMA_PD(&reverb_effect_buffer_thread[cdmt_ofs_1][i], _mm256_load_pd(&chorus_effect_buffer_sub[cdmt_buf_o][i]), rev_level);
- MM256_LS_ADD_PD(&buf[i + 4], _mm256_load_pd(&chorus_effect_buffer_sub[cdmt_buf_o][i + 4]), cho_level);
+ MM256_LS_FMA_PD(&buf[i + 4], _mm256_load_pd(&chorus_effect_buffer_sub[cdmt_buf_o][i + 4]), cho_level);
MM256_LS_FMA_PD(&reverb_effect_buffer_thread[cdmt_ofs_1][i + 4], _mm256_load_pd(&chorus_effect_buffer_sub[cdmt_buf_o][i + 4]), rev_level);
}
break;
switch(*chorus_status_sd.output_select){
case 0: // main
for(i = 0; i < count; i += 8){
- MM256_LS_ADD_PS(&buf[i], _mm256_load_ps(&chorus_effect_buffer_sub[cdmt_buf_o][i]), cho_level);
+ MM256_LS_FMA_PS(&buf[i], _mm256_load_ps(&chorus_effect_buffer_sub[cdmt_buf_o][i]), cho_level);
MM256_LS_FMA_PS(&reverb_effect_buffer_thread[cdmt_ofs_1][i], _mm256_load_ps(&chorus_effect_buffer_sub[cdmt_buf_o][i]), rev_level);
}
break;
if(noise_sharp_type)
ns_shaping(master_effect_buffer_thread[cdmt_buf_o], me_cv[cdmt_buf_o].nsamples);
if (opt_limiter)
- do_limiter(master_effect_buffer_thread[cdmt_buf_o], me_cv[cdmt_buf_o].nsamples);
+ do_limiter(master_effect_buffer_thread[cdmt_buf_o], me_cv[cdmt_buf_o].count);
#ifdef VST_LOADER_ENABLE
#ifndef MASTER_VST_EFFECT2
vsp = _mm_loadu_ps(sp++);
vsp = _mm_shuffle_ps(vsp, vsp, 0x50); // [0,1,2,3] to {0,0,1,1]
vsp = _mm_mul_ps(vsp, vevol);
-#if !(defined(_MSC_VER) || defined(MSC_VER))
- {
- float *out = (float *)vsp;
- *(lp++) = out[0];
- *(lp++) = out[1];
- }
-#else
- *(lp++) = vsp.m128_f32[0];
- *(lp++) = vsp.m128_f32[1];
-#endif // !(defined(_MSC_VER) || defined(MSC_VER))
+ *(lp++) = MM_EXTRACT_F32(vsp,0);
+ *(lp++) = MM_EXTRACT_F32(vsp,1);
}
#else // ! USE_X86_EXT_INTRIN
case INST_SF2:
case INST_MOD:
case INST_PCM:
+#ifdef ENABLE_SFZ
+ case INST_SFZ:
+#endif
if(opt_resample_over_sampling){
int32 c2 = c * opt_resample_over_sampling;
resample_voice(v, sp, c2);
#include "tables.h"
#include "miditrace.h"
#include "effect.h"
+#include "freq.h"
#ifdef SUPPORT_SOUNDSPEC
#include "soundspec.h"
#endif /* SUPPORT_SOUNDSPEC */
#include "sndfontini.h"
#include "thread.h"
#include "miditrace.h"
+#include "flac_a.h"
+#include "sfz.h"
///r
#ifdef __BORLANDC__
#define inline
#ifdef IA_W32GUI
#include "w32g.h"
+#include "w32g_subwin.h"
#include "w32g_utl.h"
#endif
#include "portaudio_a.h"
#endif
+#ifdef __W32G__
+#include "w32g_utl.h"
+#endif
+
uint8 opt_normal_chorus_plus = 5; // chorusEX
}
///r
-static int set_gus_patchconf_opts(char *name,
+static int set_gus_patchconf_opts(const char *name,
int line, char *opts, ToneBankElement *tone)
{
char *cp;
///r
#define SET_GUS_PATCHCONF_COMMENT
-static int set_gus_patchconf(char *name, int line,
+static int set_gus_patchconf(const char *name, int line,
ToneBankElement *tone, char *pat, char **opts)
{
int j;
opts += 2;
}
#endif
+#ifdef ENABLE_SFZ
+ else if(strcmp(pat, "%sfz") == 0) /* sfz extension */
+ {
+ /* %sfz filename */
+ if (opts[0] == NULL)
+ {
+ ctl->cmsg(CMSG_ERROR, VERB_NORMAL,
+ "%s: line %d: Syntax error", name, line);
+ return 1;
+ }
+ tone->name = safe_strdup(opts[0]);
+ tone->instype = 5; // sfz
+ opts++;
+ }
+#endif
else if(strcmp(pat, "%pat") == 0) /* pat extention */
{
tone->instype = 0; // pat
return 0;
}
///r
-static int set_patchconf(char *name, int line, ToneBank *bank, char *w[], int dr, int mapid, int bankmapfrom, int bankno, int add)
+static int set_patchconf(const char *name, int line, ToneBank *bank, char *w[], int dr, int mapid, int bankmapfrom, int bankno, int add)
{
int i;
int elm;
};
void show_ao_device_info(FILE *fp);
FILE *fp;
- char version[32], *help_args[7], per_mark[2];
+ char version[64], *help_args[7], per_mark[2];
int i, j;
char *h;
ControlMode *cmp, **cmpp;
fp = open_pager();
strcpy(version, (!strstr(timidity_version, "current")) ? "version " : "");
strcat(version, timidity_version);
+ strcat(version, " ");
+ strcat(version, arch_string);
per_mark[0] = '%';
per_mark[1] = '\0';
help_args[0] = version;
#ifdef AU_FLAC
-extern void flac_set_option_verify(int);
-extern void flac_set_option_padding(int);
-extern void flac_set_compression_level(int);
static inline int parse_opt_flac_verify(const char *arg)
{
#else
"TiMidity++ ",
(strcmp(timidity_version, "current")) ? "version " : "",
- timidity_version, NLS,
+ timidity_version, " ", arch_string, NLS,
NLS,
#endif
"Copyright (C) 1999-2004 Masanao Izumo <iz@onicos.co.jp>", NLS,
static void interesting_message(void)
{
printf(
-"TiMidity++ %s%s -- MIDI to WAVE converter and player" NLS
+"TiMidity++ %s%s %s -- MIDI to WAVE converter and player" NLS
"Copyright (C) 1999-2004 Masanao Izumo <iz@onicos.co.jp>" NLS
"Copyright (C) 1995 Tuukka Toivonen <tt@cgs.fi>" NLS
NLS
"along with this program; if not, write to the Free Software" NLS
"Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA" NLS
NLS, (strcmp(timidity_version, "current")) ? "version " : "",
- timidity_version);
+ timidity_version,
+ arch_string
+ );
}
/* -------- functions for getopt_long ends here --------- */
#ifdef INT_SYNTH
init_int_synth();
#endif // INT_SYNTH
+#ifdef ENABLE_SFZ
+ init_sfz();
+#endif
#ifdef SUPPORT_SOUNDSPEC
if(view_soundspec_flag)
_CrtSetDbgFlag(CRTDEBUGFLAGS);
#endif
atexit(w32_exit);
+
+#ifdef ENABLE_VIRTUAL_TERMINAL_PROCESSING
+ {
+ HANDLE hStdOut = GetStdHandle(STD_OUTPUT_HANDLE);
+
+ if (hStdOut != INVALID_HANDLE_VALUE)
+ {
+ DWORD mode;
+
+ if (GetConsoleMode(hStdOut, &mode))
+ {
+ SetConsoleMode(hStdOut, mode | ENABLE_VIRTUAL_TERMINAL_PROCESSING);
+ }
+ }
+ }
+#endif
+
#endif /* __W32__ */
#if !defined(KBTIM) && !defined(WINDRV)
OverrideSFSettingLoad();
files = expand_file_archives(files, &nfiles);
if (nfiles > 0)
files_nbuf = files[0];
-#if !defined(IA_W32GUI) && !defined(IA_W32G_SYN)
+#if !defined(IA_W32GUI) && !defined(IA_W32G_SYN) && !defined(IA_WINSYN)
if (dumb_error_count)
sleep(1);
#endif
//free_reverb_buffer();
free_effect_buffers();
///r
+#ifdef ENABLE_SFZ
+ free_sfz();
+#endif
#ifdef INT_SYNTH
free_int_synth();
#endif // INT_SYNTH
//free_reverb_buffer();
free_effect_buffers();
///r
+#ifdef ENABLE_SFZ
+ free_sfz();
+#endif
#ifdef INT_SYNTH
free_int_synth();
#endif // INT_SYNTH
for(i = 0; i < count2; i += 8){
MM256_LSU_MUL_PS(&sp[i], vamp);
}
+ }
#elif (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE)
{
const int32 req_count_mask = ~(0x7);
/*****************************************************************************************************************************/
-#if defined(__CYGWIN32__) || defined(__MINGW32__)
-#ifdef HAVE_NEW_MMSYSTEM
-#include <mmsystem.h>
-#else
-/* On cygnus, there is not mmsystem.h for Multimedia API's.
- * mmsystem.h can not distribute becase of Microsoft Lisence
- * Then declare some of them here. **/
-#define WOM_OPEN 0x3BB
-#define WOM_CLOSE 0x3BC
-#define WOM_DONE 0x3BD
-#define WAVE_FORMAT_QUERY 0x0001
-#define WAVE_ALLOWSYNC 0x0002
-#define WAVE_FORMAT_PCM 1
-#define CALLBACK_FUNCTION 0x00030000l
-#define WAVERR_BASE 32
-#define WAVE_MAPPER (UINT)-1
-
-DECLARE_HANDLE(HWAVEOUT);
-DECLARE_HANDLE(HWAVE);
-typedef HWAVEOUT *LPHWAVEOUT;
-
-/* Define WAVEHDR, WAVEFORMAT structure */
-
-typedef struct wavehdr_tag
-{
- LPSTR lpData;
- DWORD dwBufferLength;
- DWORD dwBytesRecorded;
- DWORD dwUser;
- DWORD dwFlags;
- DWORD dwLoops;
- struct wavehdr_tag *lpNext;
- DWORD reserved;
-} WAVEHDR;
-
-typedef struct
-{
- WORD wFormatTag;
- WORD nChannels;
- DWORD nSamplesPerSec;
- DWORD nAvgBytesPerSec;
- WORD nBlockAlign;
- WORD wBitsPerSample;
- WORD cbSize;
-} WAVEFORMAT, WAVEFORMATEX, *LPWAVEFORMATEX;
-
-
-typedef struct waveoutcaps_tag
-{
- WORD wMid;
- WORD wPid;
- UINT vDriverVersion;
-#define MAXPNAMELEN 32
- char szPname[MAXPNAMELEN];
- DWORD dwFormats;
- WORD wChannels;
- DWORD dwSupport;
-} WAVEOUTCAPS;
-
-typedef WAVEHDR * LPWAVEHDR;
-typedef WAVEFORMAT * LPWAVEFORMAT;
-typedef WAVEOUTCAPS * LPWAVEOUTCAPS;
-typedef UINT MMRESULT;
-
-MMRESULT WINAPI waveOutOpen(LPHWAVEOUT, UINT, LPWAVEFORMAT, DWORD, DWORD, DWORD);
-MMRESULT WINAPI waveOutClose(HWAVEOUT);
-MMRESULT WINAPI waveOutPrepareHeader(HWAVEOUT, LPWAVEHDR, UINT);
-MMRESULT WINAPI waveOutUnprepareHeader(HWAVEOUT, LPWAVEHDR, UINT);
-MMRESULT WINAPI waveOutWrite(HWAVEOUT, LPWAVEHDR, UINT);
-UINT WINAPI waveOutGetNumDevs(void);
-MMRESULT WINAPI waveOutReset(HWAVEOUT);
-MMRESULT WINAPI waveOutGetDevCaps(UINT, LPWAVEOUTCAPS, UINT);
-MMRESULT WINAPI waveOutGetDevCapsA(UINT, LPWAVEOUTCAPS, UINT);
-#define waveOutGetDevCaps waveOutGetDevCapsA
-MMRESULT WINAPI waveOutGetID(HWAVEOUT, UINT*);
-
-#endif
-#endif /* __CYGWIN32__ */
-
///r
typedef struct {
WAVEFORMATEX Format;
#else /* defined(LEGACY_FLAC) */
-
+
+extern int g_load_libFLAC_dll(void);
+extern void g_free_libFLAC_dll(void);
extern const char * const * *g_FLAC__StreamEncoderInitStatusString;
extern const char * const * *g_FLAC__StreamEncoderStateString;
#include "config.h"\r
#endif /* HAVE_CONFIG_H */\r
#include "interface.h"\r
+#include "common.h"\r
\r
#if defined(AU_VORBIS_DLL) || defined(AU_OPUS_DLL)\r
\r
int load_ogg_dll(void)\r
{\r
if(!h_ogg_dll){\r
- h_ogg_dll = LoadLibrary("ogg.dll");\r
+ h_ogg_dll = LoadLibrary("libogg.dll");\r
if(!h_ogg_dll) return -1;\r
}\r
// ogg_dll.oggpack_writeinit = (type_oggpack_writeinit)GetProcAddress(h_ogg_dll,"oggpack_writeinit");\r
}\r
#endif\r
\r
-int ogg_page_eos(ogg_page *og)\r
+int ogg_page_eos(const ogg_page *og)\r
{\r
if(h_ogg_dll){\r
return ogg_dll.ogg_page_eos(og);\r
#include <objbase.h>
#if defined(_MSC_VER) && (_MSC_VER >= 1400)
//#include <Avrt.h>
-#include <Audioclient.h>
+#include <audioclient.h>
#include <audiopolicy.h>
//#define INITGUID
#include <mmdeviceapi.h>
#ifdef AU_WDMKS
-#ifdef __W32__
-#include "interface.h"
-#endif
#include <stdio.h>
#include <stdlib.h>
#ifndef NO_STRING_H
extern int optopt;
#ifndef UTILS_GETOPT_PRIVATE
-extern int getopt(int __argc, char * const *__argv, const char *__shortopts);
+extern int getopt(int argc, char * const *argv, const char *__shortopts);
#endif /* !UTILS_GETOPT_PRIVATE */
#endif /* <unistd.h> */
arguments to the option '\0'. This behavior is specific to the GNU
`getopt'. */
-extern int getopt_long(int __argc, char * const *__argv,
+extern int getopt_long(int argc, char * const *argv,
const char *__shortopts,
const struct option *__longopts, int *__longind);
-extern int getopt_long_only(int __argc, char * const *__argv,
+extern int getopt_long_only(int argc, char * const *argv,
const char *__shortopts,
const struct option *__longopts, int *__longind);
#endif /* !UTILS_GETOPT_PRIVATE */