[FMGEN] Hint to make some variables SIMD.
cmake_minimum_required (VERSION 2.6)
message("* vm/fmgen")
-SET(THIS_LIB_VERSION 1.13.1)
+SET(THIS_LIB_VERSION 1.14.0)
add_definitions(-D__LIBFMGEN_VERSION=\"libCSPfmgen.${THIS_LIB_VERSION}\")
SET(s_vm_fmgen_srcs
//
inline void StoreSample(Sample& dest, ISample data)
{
- if (sizeof(Sample) == 2)
+ __UNLIKELY_IF(sizeof(Sample) == 2)
dest = (Sample) Limit(dest + data, 0x7fff, -0x8000);
else
dest += data;
#ifndef __FMGEN_MISC_H
#define __FMGEN_MISC_H
+#include <algorithm>
+
+inline int Max(int x, int y) { return std::max(x, y); }
+inline int Min(int x, int y) { return std::min(x, y); }
-inline int Max(int x, int y) { return (x > y) ? x : y; }
-inline int Min(int x, int y) { return (x < y) ? x : y; }
inline int Abs(int x) { return x >= 0 ? x : -x; }
+//#if defined(__cplusplus) && (__cplusplus >= 201703L)
+//#define Limit(foo, max, min) std::clamp((int)foo, (int)min, (int)max)
+//#else
inline int Limit(int v, int max, int min)
-{
+{
return v > max ? max : (v < min ? min : v);
}
+//#endif
+#if defined(__has_builtin) && (__has_builtin(__builtin_bswap32))
+inline unsigned int BSwap(unsigned int a)
+{
+ return __builtin_bswap32(a);
+}
+#else
inline unsigned int BSwap(unsigned int a)
{
return (a >> 24) | ((a >> 8) & 0xff00) | ((a << 8) & 0xff0000) | (a << 24);
}
+#endif
inline unsigned int NtoBCD(unsigned int a)
{
}
+#if defined(__cplusplus) && (__cplusplus >= 201703L)
+#include <numeric>
+using std::gcd;
+#else
template<class T>
inline T gcd(T x, T y)
{
return x;
}
+#endif
template<class T>
T bessel0(T x)
namespace FM
{
-int OPM::amtable[4][OPM_LFOENTS] = { -1, };
-int OPM::pmtable[4][OPM_LFOENTS];
+__DECL_ALIGNED(16) int OPM::amtable[4][OPM_LFOENTS] = { -1, };
+__DECL_ALIGNED(16) int OPM::pmtable[4][OPM_LFOENTS];
// ---------------------------------------------------------------------------
// 構築
db_l = Min(db_l, 20);
db_r = Min(db_r, 20);
- if (db_l > -192)
+ __LIKELY_IF (db_l > -192)
fmvolume_l = int(16384.0 * pow(10.0, db_l / 40.0));
else
fmvolume_l = 0;
- if (db_r > -192)
+ __LIKELY_IF (db_r > -192)
fmvolume_r = int(16384.0 * pow(10.0, db_r / 40.0));
else
fmvolume_r = 0;
//
void OPM::SetReg(uint addr, uint data)
{
- if (addr >= 0x100)
+ __UNLIKELY_IF (addr >= 0x100)
return;
int c = addr & 7;
//
void OPM::SetParameter(uint addr, uint data)
{
- const static uint8 sltable[16] =
+ __DECL_ALIGNED(16) const static uint8 sltable[16] =
{
0, 4, 8, 12, 16, 20, 24, 28,
32, 36, 40, 44, 48, 52, 56, 124,
};
- const static uint8 slottable[4] = { 0, 2, 1, 3 };
+ __DECL_ALIGNED(16) const static uint8 slottable[4] = { 0, 2, 1, 3 };
uint slot = slottable[(addr >> 3) & 3];
Operator* op = &ch[addr & 7].op[slot];
//
void OPM::BuildLFOTable()
{
- if (amtable[0][0] != -1)
+ __UNLIKELY_IF (amtable[0][0] != -1)
return;
for (int type=0; type<4; type++)
}
lfo_count_prev_ = lfo_count_;
lfo_step_++;
- if ((lfo_step_ & 7) == 0)
+ __UNLIKELY_IF ((lfo_step_ & 7) == 0)
{
lfo_count_ += lfo_count_diff_;
}
if (activech & 0x0004) (*idest[6] += ch[6].Calc());
if (activech & 0x0001)
{
- if (noisedelta & 0x80)
+ __UNLIKELY_IF (noisedelta & 0x80)
*idest[7] += ch[7].CalcN(Noise());
else
*idest[7] += ch[7].Calc();
if (activech & 0x0004) (*idest[6] += ch[6].CalcL());
if (activech & 0x0001)
{
- if (noisedelta & 0x80)
+ __UNLIKELY_IF (noisedelta & 0x80)
*idest[7] += ch[7].CalcLN(Noise());
else
*idest[7] += ch[7].CalcL();
//
void OPM::Mix(Sample* buffer, int nsamples)
{
-//#define IStoSampleL(s) ((Limit(s, 0xffff, -0x10000) * fmvolume_l) >> 14)
-//#define IStoSampleR(s) ((Limit(s, 0xffff, -0x10000) * fmvolume_r) >> 14)
-#define IStoSampleL(s) ((s * fmvolume_l) >> 14)
-#define IStoSampleR(s) ((s * fmvolume_r) >> 14)
+#define IStoSampleL(s) ((Limit(s, 0xffff, -0x10000) * fmvolume_l) >> 14)
+#define IStoSampleR(s) ((Limit(s, 0xffff, -0x10000) * fmvolume_r) >> 14)
+//#define IStoSampleL(s) ((s * fmvolume_l) >> 14)
+//#define IStoSampleR(s) ((s * fmvolume_r) >> 14)
//#define IStoSample(s) ((s * fmvolume) >> 14)
// odd bits - active, even bits - lfo
for (Sample* dest = buffer; dest < limit; dest+=2)
{
ibuf[1] = ibuf[2] = ibuf[3] = 0;
- if (activech & 0xaaaa)
+ __LIKELY_IF (activech & 0xaaaa)
LFO(), MixSubL(activech, idest);
else
LFO(), MixSub(activech, idest);
bool interrupt;
uint8 reg01;
- uint8 kc[8];
- uint8 kf[8];
- uint8 pan[8];
+ __DECL_ALIGNED(16) uint8 kc[8];
+ __DECL_ALIGNED(16) uint8 kf[8];
+ __DECL_ALIGNED(16) uint8 pan[8];
- Channel4 ch[8];
+ __DECL_ALIGNED(16) Channel4 ch[8];
Chip chip;
static void BuildLFOTable();
- static int amtable[4][OPM_LFOENTS];
- static int pmtable[4][OPM_LFOENTS];
+ __DECL_ALIGNED(16) static int amtable[4][OPM_LFOENTS];
+ __DECL_ALIGNED(16) static int pmtable[4][OPM_LFOENTS];
public:
int dbgGetOpOut(int c, int s) { return ch[c].op[s].dbgopout_; }
#if defined(BUILD_OPN) || defined(BUILD_OPNA) || defined (BUILD_OPNB) || defined(BUILD_OPN2)
-uint32 OPNBase::lfotable[8]; // OPNA/B 用
+__DECL_ALIGNED(16) uint32 OPNBase::lfotable[8]; // OPNA/B 用
OPNBase::OPNBase()
{
psg.SetChannelMask(mask >> 6);
}
+#define IStoSampleL(s) ((Limit(s, 0x7fff, -0x8000) * fmvolume_l) >> 14)
+#define IStoSampleR(s) ((Limit(s, 0x7fff, -0x8000) * fmvolume_r) >> 14)
+//#define IStoSampleL(s) ((s * fmvolume_l) >> 14)
+//#define IStoSampleR(s) ((s * fmvolume_r) >> 14)
// 合成(2ch)
void OPN::Mix(Sample* buffer, int nsamples)
{
-//#define IStoSampleL(s) ((Limit(s, 0x7fff, -0x8000) * fmvolume_l) >> 14)
-//#define IStoSampleR(s) ((Limit(s, 0x7fff, -0x8000) * fmvolume_r) >> 14)
-#define IStoSampleL(s) ((s * fmvolume_l) >> 14)
-#define IStoSampleR(s) ((s * fmvolume_r) >> 14)
psg.Mix(buffer, nsamples);
StoreSample(dest[1], s_r);
}
}
-#undef IStoSampleL
-#undef IStoSampleR
}
// ---------------------------------------------------------------------------
#if defined(BUILD_OPNA) || defined(BUILD_OPNB)
-int OPNABase::amtable[FM_LFOENTS] = { -1, };
-int OPNABase::pmtable[FM_LFOENTS];
+__DECL_ALIGNED(16) int OPNABase::amtable[FM_LFOENTS] = { -1, };
+__DECL_ALIGNED(16) int OPNABase::pmtable[FM_LFOENTS];
-int32 OPNABase::tltable[FM_TLENTS+FM_TLPOS];
+__DECL_ALIGNED(16) int32 OPNABase::tltable[FM_TLENTS+FM_TLPOS];
bool OPNABase::tablehasmade = false;
OPNABase::OPNABase()
void OPNABase::MixSubSL(int activech, ISample** dest)
{
+#if 1
+ int _mask = 0x001;
+ __LIKELY_IF((activech & _mask) != 0) {
+ *dest[0] = ch[0].CalcL();
+ }
+ _mask <<= 2;
+ for(int i = 1; i < 6; i++) {
+ __LIKELY_IF((activech & _mask) != 0) {
+ *dest[i] += ch[i].CalcL();
+ }
+ _mask <<= 2;
+ }
+#else
if (activech & 0x001) (*dest[0] = ch[0].CalcL());
if (activech & 0x004) (*dest[1] += ch[1].CalcL());
if (activech & 0x010) (*dest[2] += ch[2].CalcL());
if (activech & 0x040) (*dest[3] += ch[3].CalcL());
if (activech & 0x100) (*dest[4] += ch[4].CalcL());
if (activech & 0x400) (*dest[5] += ch[5].CalcL());
+#endif
}
inline void OPNABase::MixSubS(int activech, ISample** dest)
{
+#if 1
+ int _mask = 0x001;
+ __UNLIKELY_IF((activech & _mask) != 0) {
+ *dest[0] = ch[0].Calc();
+ }
+ _mask <<= 2;
+ for(int i = 1; i < 6; i++) {
+ __UNLIKELY_IF((activech & _mask) != 0) {
+ *dest[i] += ch[i].Calc();
+ }
+ _mask <<= 2;
+ }
+#else
if (activech & 0x001) (*dest[0] = ch[0].Calc());
if (activech & 0x004) (*dest[1] += ch[1].Calc());
if (activech & 0x010) (*dest[2] += ch[2].Calc());
if (activech & 0x040) (*dest[3] += ch[3].Calc());
if (activech & 0x100) (*dest[4] += ch[4].Calc());
if (activech & 0x400) (*dest[5] += ch[5].Calc());
+#endif
}
// ---------------------------------------------------------------------------
// ---------------------------------------------------------------------------
// 合成
//
-//#define IStoSampleL(s) ((Limit(s, 0x7fff, -0x8000) * fmvolume_l) >> 14)
-//#define IStoSampleR(s) ((Limit(s, 0x7fff, -0x8000) * fmvolume_r) >> 14)
-#define IStoSampleL(s) ((s * fmvolume_l) >> 14)
-#define IStoSampleR(s) ((s * fmvolume_r) >> 14)
void OPNABase::Mix6(Sample* buffer, int nsamples, int activech)
{
// Mix
- ISample ibuf[4];
- ISample* idest[6];
+ __DECL_ALIGNED(16) ISample ibuf[4];
+ __DECL_ALIGNED(16) ISample* idest[6];
idest[0] = &ibuf[pan[0]];
idest[1] = &ibuf[pan[1]];
idest[2] = &ibuf[pan[2]];
idest[5] = &ibuf[pan[5]];
Sample* limit = buffer + nsamples * 2;
+
for (Sample* dest = buffer; dest < limit; dest+=2)
{
ibuf[1] = ibuf[2] = ibuf[3] = 0;
- if (activech & 0xaaa)
+ __LIKELY_IF(activech & 0xaaa)
LFO(), MixSubSL(activech, idest);
else
MixSubS(activech, idest);
for (int i=0; i<6; i++)
{
Rhythm& r = rhythm[i];
- if ((rhythmkey & (1 << i)) && r.level < 128)
+ if ((rhythmkey & (1 << i)) /*&& r.level < 128*/)
{
int db_l = Limit(rhythmtl+rhythmtvol_l+r.level+r.volume_l, 127, -31);
int db_r = Limit(rhythmtl+rhythmtvol_r+r.level+r.volume_r, 127, -31);
// ---------------------------------------------------------------------------
// YM2610
//
-int OPNB::jedi_table[(48+1)*16];
+__DECL_ALIGNED(16) int OPNB::jedi_table[(48+1)*16];
void OPNB::InitADPCMATable()
{
for (int i=0; i<6; i++)
{
ADPCMA& r = adpcma[i];
- if ((adpcmakey & (1 << i)) && r.level < 128)
+ if ((adpcmakey & (1 << i)) /*&& r.level < 128*/)
{
uint mask_l = r.pan & 2 ? -1 : 0;
uint mask_r = r.pan & 1 ? -1 : 0;
#if defined(BUILD_OPN2)
-int OPN2Base::amtable[FM_LFOENTS] = { -1, };
-int OPN2Base::pmtable[FM_LFOENTS];
+__DECL_ALIGNED(16) int OPN2Base::amtable[FM_LFOENTS] = { -1, };
+__DECL_ALIGNED(16) int OPN2Base::pmtable[FM_LFOENTS];
-int32 OPN2Base::tltable[FM_TLENTS+FM_TLPOS];
+__DECL_ALIGNED(16) int32 OPN2Base::tltable[FM_TLENTS+FM_TLPOS];
bool OPN2Base::tablehasmade = false;
OPN2Base::OPN2Base()
if (activech & 0x040) (*dest[3] += ch[3].CalcL());
if (activech & 0x100) (*dest[4] += ch[4].CalcL());
if (activech & 0x400) {
- ISample tmp = ((ISample) dac_data) << 5;
- if ((dac_enabled)) {
+ __UNLIKELY_IF ((dac_enabled)) {
+ ISample tmp = ((ISample) dac_data) << 5;
(*dest[5] += tmp);
} else {
(*dest[5] += ch[5].CalcL());
if (activech & 0x040) (*dest[3] += ch[3].Calc());
if (activech & 0x100) (*dest[4] += ch[4].Calc());
if (activech & 0x400) {
- ISample tmp = ((ISample) dac_data) << 5;
- if ((dac_enabled)) {
+ __UNLIKELY_IF ((dac_enabled)) {
+ ISample tmp = ((ISample) dac_data) << 5;
(*dest[5] += tmp);
} else {
(*dest[5] += ch[5].Calc());
// ---------------------------------------------------------------------------
// 合成
//
-//#define IStoSampleL(s) ((Limit(s, 0x7fff, -0x8000) * fmvolume_l) >> 14)
-//#define IStoSampleR(s) ((Limit(s, 0x7fff, -0x8000) * fmvolume_r) >> 14)
-
void OPN2Base::Mix6(Sample* buffer, int nsamples, int activech)
{
// Mix
- ISample ibuf[4];
- ISample* idest[6];
+ __DECL_ALIGNED(16) ISample ibuf[4];
+ __DECL_ALIGNED(16) ISample* idest[6];
idest[0] = &ibuf[pan[0]];
idest[1] = &ibuf[pan[1]];
idest[2] = &ibuf[pan[2]];
idest[4] = &ibuf[pan[4]];
idest[5] = &ibuf[pan[5]];
Sample* limit = buffer + nsamples * 2;
+
for (Sample* dest = buffer; dest < limit; dest+=2)
{
ibuf[1] = ibuf[2] = ibuf[3] = 0;
- if (activech & 0xaaa)
+ __LIKELY_IF (activech & 0xaaa)
LFO(), MixSubSL(activech, idest);
else
MixSubS(activech, idest);
StoreSample(dest[1], IStoSampleR(ibuf[1] + ibuf[3]));
}
}
+#undef IStoSampleL
+#undef IStoSampleR
+
// ---------------------------------------------------------------------------
// ステートセーブ
//
Channel4* csmch;
- static uint32 lfotable[8];
-
+ __DECL_ALIGNED(16) static uint32 lfotable[8];
private:
void TimerA();
int DecodeADPCMBSample(uint);
// FM \89¹\8c¹\8aÖ\8cW
- uint8 pan[6];
- uint8 fnum2[9];
+ __DECL_ALIGNED(16) uint8 pan[6];
+ __DECL_ALIGNED(16) uint8 fnum2[9];
uint8 reg22;
uint reg29; // OPNA only?
uint32 lfocount;
uint32 lfodcount;
- uint fnum[6];
- uint fnum3[3];
+ __DECL_ALIGNED(16) uint fnum[6];
+ __DECL_ALIGNED(16) uint fnum3[3];
// ADPCM \8aÖ\8cW
uint8* adpcmbuf; // ADPCM RAM
uint8 control1; // ADPCM \83R\83\93\83g\83\8d\81[\83\8b\83\8c\83W\83X\83^\82P
uint8 control2; // ADPCM \83R\83\93\83g\83\8d\81[\83\8b\83\8c\83W\83X\83^\82Q
- uint8 adpcmreg[8]; // ADPCM \83\8c\83W\83X\83^\82Ì\88ê\95\94\95ª
+ __DECL_ALIGNED(16) uint8 adpcmreg[8]; // ADPCM \83\8c\83W\83X\83^\82Ì\88ê\95\94\95ª
int rhythmmask_;
- Channel4 ch[6];
+ __DECL_ALIGNED(16) Channel4 ch[6];
static void BuildLFOTable();
- static int amtable[FM_LFOENTS];
- static int pmtable[FM_LFOENTS];
- static int32 tltable[FM_TLENTS+FM_TLPOS];
+ __DECL_ALIGNED(16) static int amtable[FM_LFOENTS];
+ __DECL_ALIGNED(16) static int pmtable[FM_LFOENTS];
+ __DECL_ALIGNED(16) static int32 tltable[FM_TLENTS+FM_TLPOS];
static bool tablehasmade;
};
void LFO();
// FM \89¹\8c¹\8aÖ\8cW
- uint8 pan[6];
- uint8 fnum2[9];
+ __DECL_ALIGNED(16) uint8 pan[6];
+ __DECL_ALIGNED(16) uint8 fnum2[9];
uint8 reg22;
uint reg29; // OPNA only?
uint32 lfocount;
uint32 lfodcount;
- uint fnum[6];
- uint fnum3[3];
+ __DECL_ALIGNED(16) uint fnum[6];
+ __DECL_ALIGNED(16) uint fnum3[3];
- Channel4 ch[6];
+ __DECL_ALIGNED(16) Channel4 ch[6];
int32 dac_data;
bool dac_enabled;
static void BuildLFOTable();
- static int amtable[FM_LFOENTS];
- static int pmtable[FM_LFOENTS];
- static int32 tltable[FM_TLENTS+FM_TLPOS];
+ __DECL_ALIGNED(16) static int amtable[FM_LFOENTS];
+ __DECL_ALIGNED(16) static int pmtable[FM_LFOENTS];
+ __DECL_ALIGNED(16) static int32 tltable[FM_TLENTS+FM_TLPOS];
static bool tablehasmade;
};
// YM2203(OPN) ----------------------------------------------------
void SetStatus(uint bit);
void ResetStatus(uint bit);
- uint fnum[3];
- uint fnum3[3];
- uint8 fnum2[6];
+ __DECL_ALIGNED(16) uint fnum[3];
+ __DECL_ALIGNED(16) uint fnum3[3];
+ __DECL_ALIGNED(16) uint8 fnum2[6];
- Channel4 ch[3];
+ __DECL_ALIGNED(16) Channel4 ch[3];
};
// YM2608(OPNA) ---------------------------------------------------
void RhythmMix(Sample* buffer, uint count);
// \83\8a\83Y\83\80\89¹\8c¹\8aÖ\8cW
- Rhythm rhythm[6];
+ __DECL_ALIGNED(16) Rhythm rhythm[6];
int8 rhythmtl; // \83\8a\83Y\83\80\91S\91Ì\82Ì\89¹\97Ê
int rhythmtvol_l;
int rhythmtvol_r;
// ADPCMA \8aÖ\8cW
uint8* adpcmabuf; // ADPCMA ROM
int adpcmasize;
- ADPCMA adpcma[6];
+ __DECL_ALIGNED(16) ADPCMA adpcma[6];
int8 adpcmatl; // ADPCMA \91S\91Ì\82Ì\89¹\97Ê
int adpcmatvol_l;
int adpcmatvol_r;
uint8 adpcmakey; // ADPCMA \82Ì\83L\81[
int adpcmastep;
- uint8 adpcmareg[32];
+ __DECL_ALIGNED(16) uint8 adpcmareg[32];
- static int jedi_table[(48+1)*16];
+ __DECL_ALIGNED(16) static int jedi_table[(48+1)*16];
- Channel4 ch[6];
+ __DECL_ALIGNED(16) Channel4 ch[6];
};
// YM2612/3438(OPN2) ----------------------------------------------------
//
inline void PSG::StoreSample(Sample& dest, int32 data)
{
- if (sizeof(Sample) == 2)
+ __UNLIKELY_IF(sizeof(Sample) == 2)
dest = (Sample) Limit(dest + data, 0x7fff, -0x8000);
else
dest += data;