OSDN Git Service

[VM][FMTOWNS][VRAM] Optimize for DWORD aligned accessing.
[csp-qt/common_source_project-fm7.git] / source / src / vm / fmtowns / vram.cpp
1 /*
2         Skelton for retropc emulator
3
4         Author : Kyuma Ohta <whatisthis.sowhat _at_ gmail.com>
5         Date   : 2016.12.28 -
6
7         [ FM-Towns VRAM ]
8         History: 2017.01.16 Initial.
9 */
10
11
12 #include "common.h"
13 #include "./towns_common.h"
14 #include "./crtc.h"
15 #include "./vram.h"
16
17 namespace FMTOWNS {
18
19 void TOWNS_VRAM::initialize()
20 {
21         memset(vram, 0x00, sizeof(vram));
22 }
23
24 void TOWNS_VRAM::reset()
25 {
26         lock();
27
28 __DECL_VECTORIZED_LOOP
29         for(int i = 0; i < 8; i++) {
30                 packed_pixel_mask_reg[i] = 0xff;
31         }
32         vram_access_reg_addr = 0;
33         unlock();
34 }
35
36
37 void TOWNS_VRAM::write_memory_mapped_io8(uint32_t addr, uint32_t data)
38 {
39         const uint32_t naddr = calc_std_address_offset(addr);
40         const uint8_t mask = packed_pixel_mask_reg[naddr & 3];
41
42         lock();
43         uint8_t nd = vram[naddr];
44         uint8_t dd = data;
45         dd &= mask;
46         nd &= ~mask;
47         dd |= nd;
48
49         vram[naddr] = dd;
50         unlock();
51
52 }
53
54 void TOWNS_VRAM::write_memory_mapped_io16(uint32_t addr, uint32_t data)
55 {
56         lock();
57         const uint32_t maddr = addr & 1;
58         const uint32_t paddr0 = calc_std_address_offset(addr);
59         const uint32_t paddr1 = calc_std_address_offset(addr + 1);
60
61         pair16_t dmask;
62         pair16_t xdata;
63         uint16_t ydata;
64
65         dmask.b.l = packed_pixel_mask_reg[paddr0 & 7];
66         dmask.b.h = packed_pixel_mask_reg[paddr1 & 7];
67
68         xdata.b.l = vram[paddr0];
69         xdata.b.h = vram[paddr1];
70
71         ydata = data;
72
73         xdata.w &= ~(dmask.w);
74         ydata   &= dmask.w;
75
76         xdata.w |= ydata;
77
78         vram[paddr0] = xdata.b.l;
79         vram[paddr1] = xdata.b.h;
80
81         unlock();
82         return;
83 }
84
85 void TOWNS_VRAM::write_memory_mapped_io32(uint32_t addr, uint32_t data)
86 {
87         lock();
88         uint32_t maddr = addr & 3;
89         uint32_t paddr0, paddr1, paddr2, paddr3;
90         pair32_t dmask;
91         pair32_t xdata;
92         uint32_t ydata;
93         __LIKELY_IF(maddr == 0) { // Aligned
94                 paddr0 = calc_std_address_offset(addr);
95                 dmask.read_4bytes_le_from(&(packed_pixel_mask_reg[0]));
96                 xdata.read_4bytes_le_from(&(vram[paddr0]));
97         } else {
98                 // Unaligned
99                 paddr0 = calc_std_address_offset(addr);
100                 paddr1 = calc_std_address_offset(addr + 1);
101                 paddr2 = calc_std_address_offset(addr + 2);
102                 paddr3 = calc_std_address_offset(addr + 3);
103
104                 dmask.b.l  = packed_pixel_mask_reg[paddr0 & 7];
105                 dmask.b.h  = packed_pixel_mask_reg[paddr1 & 7];
106                 dmask.b.h2 = packed_pixel_mask_reg[paddr2 & 7];
107                 dmask.b.h3 = packed_pixel_mask_reg[paddr3 & 7];
108
109                 xdata.b.l  = vram[paddr0];
110                 xdata.b.h  = vram[paddr1];
111                 xdata.b.h2 = vram[paddr2];
112                 xdata.b.h3 = vram[paddr3];
113         }
114
115         ydata = data;
116
117         xdata.d &= ~(dmask.d);
118         ydata   &= dmask.d;
119         xdata.d |= ydata;
120
121         __LIKELY_IF(maddr == 0) { // Aligned
122                 xdata.write_4bytes_le_to(&(vram[paddr0]));
123         } else {
124                 // Unaligned
125                 vram[paddr0] = xdata.b.l;
126                 vram[paddr1] = xdata.b.h;
127                 vram[paddr2] = xdata.b.h2;
128                 vram[paddr3] = xdata.b.h3;
129         }
130         unlock();
131         return;
132 }
133
134 uint32_t TOWNS_VRAM::read_memory_mapped_io8(uint32_t addr)
135 {
136         lock();
137         const uint32_t naddr = calc_std_address_offset(addr);
138         uint32_t n = vram[naddr];
139         unlock();
140         return n;
141 }
142
143 uint32_t TOWNS_VRAM::read_memory_mapped_io16(uint32_t addr)
144 {
145         const uint32_t paddr0 = calc_std_address_offset(addr);
146         const uint32_t paddr1 = calc_std_address_offset(addr + 1);
147         //const uint32_t maddr = addr & 1;
148         pair16_t data;
149
150         lock();
151         data.b.l = vram[paddr0];
152         data.b.h = vram[paddr1];
153         unlock();
154         return (uint32_t)(data.w);
155 }
156
157 uint32_t TOWNS_VRAM::read_memory_mapped_io32(uint32_t addr)
158 {
159         pair32_t data;
160         __LIKELY_IF((addr & 3) == 0) { // Aligned
161                 const uint32_t paddr = calc_std_address_offset(addr);
162                 data.read_4bytes_le_from(&(vram[paddr]));
163         } else {
164                 const uint32_t paddr0 = calc_std_address_offset(addr);
165                 const uint32_t paddr1 = calc_std_address_offset(addr + 1);
166                 const uint32_t paddr2 = calc_std_address_offset(addr + 2);
167                 const uint32_t paddr3 = calc_std_address_offset(addr + 3);
168                 data.b.l  = vram[paddr0];
169                 data.b.h  = vram[paddr1];
170                 data.b.h2 = vram[paddr2];
171                 data.b.h3 = vram[paddr3];
172         }
173         return data.d;
174 }
175
176 void TOWNS_VRAM::write_dma_data8w(uint32_t addr, uint32_t data, int* wait)
177 {
178         __LIKELY_IF(wait != NULL) {
179                 *wait = 0; // WAIT SETS by TOWNS_MEMORY:: .
180         }
181         write_memory_mapped_io8(addr, data);
182 }
183
184 void TOWNS_VRAM::write_dma_data16w(uint32_t addr, uint32_t data, int* wait)
185 {
186         __LIKELY_IF(wait != NULL) {
187                 *wait = 0; // WAIT SETS by TOWNS_MEMORY:: .
188         }
189         write_memory_mapped_io16(addr, data);
190 }
191
192 void TOWNS_VRAM::write_dma_data32w(uint32_t addr, uint32_t data, int* wait)
193 {
194         __LIKELY_IF(wait != NULL) {
195                 *wait = 0; // WAIT SETS by TOWNS_MEMORY:: .
196         }
197         write_memory_mapped_io32(addr, data);
198 }
199
200 uint32_t TOWNS_VRAM::read_dma_data8w(uint32_t addr, int* wait)
201 {
202         __LIKELY_IF(wait != NULL) {
203                 *wait = 0; // WAIT SETS by TOWNS_MEMORY:: .
204         }
205         return read_memory_mapped_io8(addr);
206 }
207
208 uint32_t TOWNS_VRAM::read_dma_data16w(uint32_t addr, int* wait)
209 {
210         __LIKELY_IF(wait != NULL) {
211                 *wait = 0; // WAIT SETS by TOWNS_MEMORY:: .
212         }
213         return read_memory_mapped_io16(addr);
214 }
215
216 uint32_t TOWNS_VRAM::read_dma_data32w(uint32_t addr, int* wait)
217 {
218         __LIKELY_IF(wait != NULL) {
219                 *wait = 0; // WAIT SETS by TOWNS_MEMORY:: .
220         }
221         return read_memory_mapped_io32(addr);
222 }
223
224 void TOWNS_VRAM::write_signal(int id, uint32_t data, uint32_t mask)
225 {
226         // ToDo
227 }
228 // Renderers
229
230 void TOWNS_VRAM::write_io8(uint32_t address,  uint32_t data)
231 {
232         switch(address & 0xffff) {
233         case 0x0458:
234                 vram_access_reg_addr = data & 3;
235 //              out_debug_log(_T("VRAM ACCESS(0458h)=%02X"), data);
236                 break;
237         case 0x045a:
238                 packed_pixel_mask_reg[(vram_access_reg_addr << 1) + 0] = data;
239                 packed_pixel_mask_reg[(vram_access_reg_addr << 1) + 4] = data;
240 //              out_debug_log(_T("VRAM MASK(045Ah)=%08X"), packed_pixel_mask_reg.d);
241                 break;
242         case 0x045b:
243                 packed_pixel_mask_reg[(vram_access_reg_addr << 1) + 1] = data;
244                 packed_pixel_mask_reg[(vram_access_reg_addr << 1) + 5] = data;
245 //              out_debug_log(_T("VRAM MASK(045Bh)=%08X"), packed_pixel_mask_reg.d);
246                 break;
247         case 0x05ee:
248                 // ToDo: Implement around VRAM cache.
249                 // VCMEN (Disabled) : Bit0
250                 break;
251         }
252 }
253
254 void TOWNS_VRAM::write_io16(uint32_t address,  uint32_t data)
255 {
256         pair32_t d;
257         d.d = data;
258         switch(address & 0xffff) {
259         case 0x0458:
260                 vram_access_reg_addr = data & 3;
261                 break;
262         case 0x045a:
263                 {
264                         pair16_t w;
265                         w.w = data;
266                         packed_pixel_mask_reg[(vram_access_reg_addr << 1) + 0] = w.b.l;
267                         packed_pixel_mask_reg[(vram_access_reg_addr << 1) + 1] = w.b.h;
268                         packed_pixel_mask_reg[(vram_access_reg_addr << 1) + 4] = w.b.l;
269                         packed_pixel_mask_reg[(vram_access_reg_addr << 1) + 5] = w.b.h;
270                 }
271                 break;
272         case 0x5ee:
273                 {
274                         pair16_t n;
275                         n.w = data;
276                         write_io8(0x05ee, n.b.l);
277                 }
278                 break;
279         }
280 }
281
282 uint32_t TOWNS_VRAM::read_io8(uint32_t address)
283 {
284         switch(address & 0xffff) {
285         case 0x0458:
286                 return vram_access_reg_addr;
287                 break;
288         case 0x045a:
289                 {
290                         uint8_t v = packed_pixel_mask_reg[(vram_access_reg_addr << 1) + 0];
291                         return (uint32_t)v;
292                 }
293                 break;
294         case 0x045b:
295                 {
296                         uint8_t v = packed_pixel_mask_reg[(vram_access_reg_addr << 1) + 1];
297                         return (uint32_t)v;
298                 }
299                 break;
300         case 0x5ee:
301                 // ToDo: Implement around VRAM cache.
302                 // Bit7 = 0 if ready to turn on/off VRAM cache.
303                 // VCMEN (Disabled) : Bit0
304                 if((cpu_id == 0x02) || (cpu_id >= 0x04)) { // i486 SX/DX and after Pentium.
305                         // Still Disabled VRAM feature and disable VCMEN.
306                         return 0xff;
307                 }
308                 return 0xff;
309                 break;
310         }
311         return 0xff;
312 }
313
314 uint32_t TOWNS_VRAM::read_io16(uint32_t address)
315 {
316         switch(address & 0xffff) {
317         case 0x0458:
318                 return vram_access_reg_addr;
319                 break;
320         case 0x045a:
321                 {
322                         pair16_t w;
323                         w.b.l = packed_pixel_mask_reg[(vram_access_reg_addr << 1) + 0];
324                         w.b.h = packed_pixel_mask_reg[(vram_access_reg_addr << 1) + 1];
325                         return (uint32_t)(w.w);
326                 }
327                 break;
328         case 0x05ee:
329                 {
330                         pair16_t n;
331                         n.b.l = read_io8(0x05ee);
332                         n.b.h = 0xff;
333                         return n.w;
334                 }
335                 break;
336         }
337         return 0xffff;
338 }
339
340 void TOWNS_VRAM::get_data_from_vram(bool is_single, uint32_t offset, uint32_t bytes, uint8_t* dst)
341 {
342         __UNLIKELY_IF((bytes == 0) || (bytes > (TOWNS_CRTC_MAX_PIXELS * sizeof(uint16_t))) || (dst == nullptr)) {
343                 return;
344         }
345         uint32_t addr = offset & TOWNS_VRAM_ADDR_MASK;
346         uint8_t* p = ___assume_aligned(dst, 16);
347
348         lock();
349         if(is_single) {
350                 addr |= (1 << (TOWNS_VRAM_ADDR_SHIFT + 1));
351                 __DECL_ALIGNED(32) uint8_t cache[16];
352                 __DECL_ALIGNED(32) uint8_t cache2[16];
353                 for(int i = bytes; i >= 16; i -= 16) {
354                         for(int j = 0; j < 16; j++) {
355                                 cache[j] = vram[calc_std_address_offset(addr + j)];
356                         }
357                         __DECL_VECTORIZED_LOOP
358                         for(int j = 0; j < 16; j++) {
359                                 p[j] = cache[j];
360                         }
361                         p += 16;
362                         addr += 16;
363                 }
364                 bytes = bytes & 0x0f; // MOD
365                 for(int j = 0; j < bytes; j++) {
366                         cache2[j] = vram[calc_std_address_offset(addr + j)];
367                 }
368                 for(int j = 0; j < bytes; j++) {
369                         p[j] = cache2[j];
370                 }
371         } else {
372                 __LIKELY_IF((addr + bytes) <= (TOWNS_VRAM_ADDR_MASK + 1)) {
373                         // Not Wrapped.
374                         memcpy(p, &(vram[addr]), bytes);
375                 } else {
376                         uint32_t nb = (addr + bytes) - (TOWNS_VRAM_ADDR_MASK + 1);
377                         __LIKELY_IF(nb < bytes) {
378                                 memcpy(p, &(vram[addr]), bytes - nb);
379                                 __LIKELY_IF(nb > 0) {
380                                         memcpy(&(p[bytes - nb]), &(vram[0]), nb);
381                                 }
382                         } else {
383                                 // Fallthrough.
384                                 memcpy(p, &(vram[addr]), bytes - nb);
385                         }
386                 }
387         }
388         unlock();
389 }
390
391 bool TOWNS_VRAM::set_buffer_to_vram(uint32_t offset, uint8_t *buf, int words)
392 {
393 //              uint32_t offset2 = calc_std_address_offset(offset);
394         const uint32_t offset2 = offset & TOWNS_VRAM_ADDR_MASK;
395 //              if(words > 16) return false;
396         __UNLIKELY_IF(words <= 0) return false;
397         uint8_t* p = &(vram[offset2]);
398
399         lock();
400         __LIKELY_IF((offset2 + (words << 1)) <= (TOWNS_VRAM_ADDR_MASK + 1)) {
401                 memcpy(p, buf, words << 1);
402         } else {
403                 int nb = (TOWNS_VRAM_ADDR_MASK + 1) - offset2;
404                 memcpy(p, buf, nb);
405                 int nnb = (words << 1) - nb;
406                 __LIKELY_IF(nnb > 0) {
407                         memcpy(vram, &(buf[nb]), nnb);
408                 }
409         }
410         unlock();
411         return true;
412 }
413
414 bool TOWNS_VRAM::get_vram_to_buffer(uint32_t offset, uint8_t *buf, int words)
415 {
416         //uint32_t offset2 = calc_std_address_offset(offset);
417         const uint32_t offset2 = offset & TOWNS_VRAM_ADDR_MASK;
418 //              if(words > 16) return false;
419         __UNLIKELY_IF(words <= 0) return false;
420
421         lock();
422         uint8_t* p = &(vram[offset2]);
423         __LIKELY_IF((offset2 + (words << 1)) <= (TOWNS_VRAM_ADDR_MASK + 1)) {
424                 memcpy(buf, p, words << 1);
425         } else {
426                 uint32_t nb = (TOWNS_VRAM_ADDR_MASK + 1) - offset2;
427                 memcpy(buf, p, nb);
428                 int nnb = (words << 1) - nb;
429                 __LIKELY_IF(nnb > 0) {
430                         memcpy(&(buf[nb]), vram, nnb);
431                 }
432         }
433         unlock();
434         return true;
435 }
436
437 #define STATE_VERSION   3
438
439 bool TOWNS_VRAM::process_state(FILEIO* state_fio, bool loading)
440 {
441         if(!state_fio->StateCheckUint32(STATE_VERSION)) {
442                 return false;
443         }
444
445         if(!state_fio->StateCheckInt32(this_device_id)) {
446                 return false;
447         }
448
449         lock();
450
451         state_fio->StateValue(vram_access_reg_addr);
452         state_fio->StateArray(packed_pixel_mask_reg, sizeof(packed_pixel_mask_reg), 1);
453
454         state_fio->StateArray(vram, sizeof(vram), 1);
455
456         unlock();
457         return true;
458 }
459 }