OSDN Git Service

- added cpu/ppu files
authorastoria-d <astoria-d@mail.goo.ne.jp>
Sun, 1 Sep 2013 02:04:19 +0000 (11:04 +0900)
committerastoria-d <astoria-d@mail.goo.ne.jp>
Sun, 1 Sep 2013 02:04:19 +0000 (11:04 +0900)
- removed debug output

de1_nes/.gitignore
de1_nes/cpu/alu.vhd [new file with mode: 0644]
de1_nes/cpu/decoder.vhd [new file with mode: 0644]
de1_nes/cpu/mos6502.vhd [new file with mode: 0644]
de1_nes/de1_nes.qsf
de1_nes/de1_nes.qws
de1_nes/ppu/ppu.vhd [new file with mode: 0644]
de1_nes/ppu/render.vhd [new file with mode: 0644]

index 1055d93..b4e77e3 100644 (file)
@@ -3,4 +3,5 @@
 *.rpt\r
 *.pof\r
 *.sof\r
+*.bak\r
 db/*
\ No newline at end of file
diff --git a/de1_nes/cpu/alu.vhd b/de1_nes/cpu/alu.vhd
new file mode 100644 (file)
index 0000000..a00cb48
--- /dev/null
@@ -0,0 +1,1008 @@
+----------------------------
+---- 6502 ALU implementation
+----------------------------
+library ieee;
+use ieee.std_logic_1164.all;
+use ieee.std_logic_unsigned.all;
+use ieee.std_logic_arith.conv_std_logic_vector;
+
+entity alu is 
+    generic (   dsize : integer := 8
+            );
+    port (  clk             : in std_logic;
+            pcl_inc_n       : in std_logic;
+            pch_inc_n       : in std_logic;
+            sp_oe_n         : in std_logic;
+            sp_push_n       : in std_logic;
+            sp_pop_n        : in std_logic;
+            abs_xy_n        : in std_logic;
+            pg_next_n       : in std_logic;
+            zp_n            : in std_logic;
+            zp_xy_n         : in std_logic;
+            rel_calc_n      : in std_logic;
+            indir_n         : in std_logic;
+            indir_x_n       : in std_logic;
+            indir_y_n       : in std_logic;
+            arith_en_n      : in std_logic;
+            instruction     : in std_logic_vector (dsize - 1 downto 0);
+            exec_cycle      : in std_logic_vector (5 downto 0);
+            int_d_bus       : inout std_logic_vector (dsize - 1 downto 0);
+            acc_out         : in std_logic_vector (dsize - 1 downto 0);
+            index_bus       : in std_logic_vector (dsize - 1 downto 0);
+            bal             : in std_logic_vector (dsize - 1 downto 0);
+            bah             : in std_logic_vector (dsize - 1 downto 0);
+            addr_back       : out std_logic_vector (dsize - 1 downto 0);
+            acc_in          : out std_logic_vector (dsize - 1 downto 0);
+            abl             : out std_logic_vector (dsize - 1 downto 0);
+            abh             : out std_logic_vector (dsize - 1 downto 0);
+            pcl_inc_carry   : out std_logic;
+            ea_carry        : out std_logic;
+            carry_in        : in std_logic;
+            negative        : out std_logic;
+            zero            : out std_logic;
+            carry_out       : out std_logic;
+            overflow        : out std_logic
+    );
+end alu;
+
+architecture rtl of alu is
+
+component d_flip_flop
+    generic (
+            dsize : integer := 8
+            );
+    port (  
+            clk     : in std_logic;
+            res_n   : in std_logic;
+            set_n   : in std_logic;
+            we_n    : in std_logic;
+            d       : in std_logic_vector (dsize - 1 downto 0);
+            q       : out std_logic_vector (dsize - 1 downto 0)
+        );
+end component;
+
+component d_flip_flop_bit
+    port (  
+            clk     : in std_logic;
+            res_n   : in std_logic;
+            set_n   : in std_logic;
+            we_n    : in std_logic;
+            d       : in std_logic;
+            q       : out std_logic
+        );
+end component;
+
+component tri_state_buffer
+    generic (
+            dsize : integer := 8
+            );
+    port (  
+            oe_n    : in std_logic;
+            d       : in std_logic_vector (dsize - 1 downto 0);
+            q       : out std_logic_vector (dsize - 1 downto 0)
+        );
+end component;
+
+component address_calculator
+    generic (   dsize : integer := 8
+            );
+    port ( 
+            sel         : in std_logic_vector (1 downto 0);
+            addr1       : in std_logic_vector (dsize - 1 downto 0);
+            addr2       : in std_logic_vector (dsize - 1 downto 0);
+            addr_out    : out std_logic_vector (dsize - 1 downto 0);
+            carry_in    : in std_logic;
+            carry_out   : out std_logic
+    );
+end component;
+
+component alu_core
+    generic (   dsize : integer := 8
+            );
+    port ( 
+            sel         : in std_logic_vector (3 downto 0);
+            d1          : in std_logic_vector (dsize - 1 downto 0);
+            d2          : in std_logic_vector (dsize - 1 downto 0);
+            d_out       : out std_logic_vector (dsize - 1 downto 0);
+            carry_in    : in std_logic;
+            negative    : out std_logic;
+            zero        : out std_logic;
+            carry_out   : out std_logic;
+            overflow    : out std_logic
+    );
+end component;
+
+
+--------- signals for address calucuration ----------
+signal al_buf_we_n : std_logic;
+signal ah_buf_we_n : std_logic;
+
+signal al_reg_in : std_logic_vector (dsize - 1 downto 0);
+signal ah_reg_in : std_logic_vector (dsize - 1 downto 0);
+signal al_reg : std_logic_vector (dsize - 1 downto 0);
+signal ah_reg : std_logic_vector (dsize - 1 downto 0);
+
+
+signal a_sel : std_logic_vector (1 downto 0);
+signal addr1 : std_logic_vector (dsize - 1 downto 0);
+signal addr2 : std_logic_vector (dsize - 1 downto 0);
+signal addr_out : std_logic_vector (dsize - 1 downto 0);
+
+signal addr_c_in : std_logic;
+signal addr_c : std_logic;
+
+signal pcl_carry_reg_in : std_logic;
+
+----------- signals for arithmatic ----------
+signal sel : std_logic_vector (3 downto 0);
+signal d1 : std_logic_vector (dsize - 1 downto 0);
+signal d2 : std_logic_vector (dsize - 1 downto 0);
+signal d_out : std_logic_vector (dsize - 1 downto 0);
+signal alu_out : std_logic_vector (dsize - 1 downto 0);
+
+signal n : std_logic;
+signal z : std_logic;
+signal c : std_logic;
+signal v : std_logic;
+
+signal arith_buf_we_n : std_logic;
+signal arith_buf_oe_n : std_logic;
+signal arith_reg_in : std_logic_vector (dsize - 1 downto 0);
+signal arith_reg : std_logic_vector (dsize - 1 downto 0);
+signal arith_reg_out : std_logic_vector (dsize - 1 downto 0);
+signal d_oe_n : std_logic;
+
+signal m2m_stat_1 : std_logic;
+signal m2m_stat_2 : std_logic;
+
+begin
+
+    ----------------------------------------
+     -- address calucurator instances ----
+    ----------------------------------------
+    al_dff : d_flip_flop generic map (dsize) 
+            port map(clk, '1', '1', al_buf_we_n, al_reg_in, al_reg);
+    ah_dff : d_flip_flop generic map (dsize) 
+            port map(clk, '1', '1', ah_buf_we_n, ah_reg_in, ah_reg);
+
+    --pcl carry flag set.
+    pcl_carry_reg_in <= addr_c when pcl_inc_n = '0' else
+                '0';
+
+    pch_carry_dff_bit : d_flip_flop_bit 
+            port map(clk, '1', '1', 
+                    '0', pcl_carry_reg_in, pcl_inc_carry);
+
+    addr_calc_inst : address_calculator generic map (dsize)
+            port map (a_sel, addr1, addr2, addr_out, addr_c_in, addr_c);
+
+
+    ----------------------------------------
+     -- arithmatic operation instances ----
+    ----------------------------------------
+    arith_dff : d_flip_flop generic map (dsize) 
+            port map(clk, '1', '1', arith_buf_we_n, arith_reg_in, arith_reg);
+    arith_buf : tri_state_buffer generic map (dsize)
+            port map (arith_buf_oe_n, arith_reg, arith_reg_out);
+
+    alu_inst : alu_core generic map (dsize)
+            port map (sel, d1, d2, alu_out, carry_in, n, z, c, v);
+    alu_buf : tri_state_buffer generic map (dsize)
+            port map (d_oe_n, alu_out, d_out);
+
+    -------------------------------
+    ------ alu main process -------
+    -------------------------------
+    alu_p : process (
+                    clk, 
+                    ---for address calucuration
+                    pcl_inc_n, pch_inc_n, sp_oe_n, sp_push_n, sp_pop_n,
+                    abs_xy_n, pg_next_n, zp_n, zp_xy_n, rel_calc_n, 
+                    indir_n, indir_x_n, indir_y_n, 
+                    index_bus, bal, bal, addr_c_in, addr_out, addr_c,
+
+                    --for arithmatic operation.
+                    arith_en_n,
+                    instruction, exec_cycle, int_d_bus, acc_out, 
+                    carry_in, n, z, c, v,
+                    arith_reg, arith_reg_out, alu_out, d_out
+                    )
+
+constant ADDR_ADC    : std_logic_vector (1 downto 0) := "00";
+constant ADDR_INC    : std_logic_vector (1 downto 0) := "01";
+constant ADDR_DEC    : std_logic_vector (1 downto 0) := "10";
+constant ADDR_SIGNED_ADD : std_logic_vector (1 downto 0) := "11";
+
+constant ALU_AND    : std_logic_vector (3 downto 0) := "0000";
+constant ALU_EOR    : std_logic_vector (3 downto 0) := "0001";
+constant ALU_OR     : std_logic_vector (3 downto 0) := "0010";
+constant ALU_BIT    : std_logic_vector (3 downto 0) := "0011";
+constant ALU_ADC    : std_logic_vector (3 downto 0) := "0100";
+constant ALU_SBC    : std_logic_vector (3 downto 0) := "0101";
+constant ALU_CMP    : std_logic_vector (3 downto 0) := "0110";
+constant ALU_ASL    : std_logic_vector (3 downto 0) := "0111";
+constant ALU_LSR    : std_logic_vector (3 downto 0) := "1000";
+constant ALU_ROL    : std_logic_vector (3 downto 0) := "1001";
+constant ALU_ROR    : std_logic_vector (3 downto 0) := "1010";
+constant ALU_INC    : std_logic_vector (3 downto 0) := "1011";
+constant ALU_DEC    : std_logic_vector (3 downto 0) := "1100";
+
+---for indirect addressing.
+constant T1 : std_logic_vector (5 downto 0) := "000001";
+constant T2 : std_logic_vector (5 downto 0) := "000010";
+constant T3 : std_logic_vector (5 downto 0) := "000011";
+constant T4 : std_logic_vector (5 downto 0) := "000100";
+constant T5 : std_logic_vector (5 downto 0) := "000101";
+
+procedure output_d_bus is
+begin
+    arith_buf_we_n <= '0';
+    arith_buf_oe_n <= '0';
+    d_oe_n <= '0';
+    arith_reg_in <= d_out;
+    if (clk = '0') then
+        int_d_bus <= d_out;
+    else
+        int_d_bus <= arith_reg_out;
+    end if;
+end  procedure;
+
+procedure d_print(msg : string) is
+use std.textio.all;
+--use ieee.std_logic_textio.all;
+--variable out_l : line;
+begin
+--    write(out_l, msg);
+--    writeline(output, out_l);
+end  procedure;
+
+procedure set_nz is
+begin
+    negative <= n;
+    zero <= z;
+end procedure;
+
+    begin
+
+    -------------------------------
+    ----- address calcuration -----
+    -------------------------------
+    if (pcl_inc_n = '0') then
+        a_sel <= ADDR_INC;
+        addr1 <= bal;
+        addr_back <= addr_out;
+
+        --keep the value in the cycle
+        al_buf_we_n <= '0';
+        al_reg_in <= bal;
+        if (instruction = "01001100") then
+            ---exceptional case: only jmp instruction 
+            abl <= bal;
+        else
+            if (clk = '0') then
+                abl <= bal;
+            else
+                abl <= al_reg;
+            end if;
+        end if;
+        abh <= bah;
+
+    elsif (pch_inc_n = '0') then
+        a_sel <= ADDR_INC;
+        addr1 <= bah;
+        addr_back <= addr_out;
+
+        --inc pch cycle is not fetch cycle.
+        --it is special cycle.
+        abl <= bal;
+        abh <= bah;
+
+    elsif (sp_oe_n = '0') then
+        --stack operation...
+        abh <= "00000001";
+
+        if (sp_push_n /= '0' and sp_pop_n /= '0') then
+            abl <= bal;
+        elsif (sp_pop_n = '0') then
+            --case pop
+            a_sel <= ADDR_INC;
+            addr1 <= bal;
+            addr_back <= addr_out;
+
+            al_buf_we_n <= '0';
+            al_reg_in <= bal;
+            if (clk = '0') then
+                abl <= bal;
+            else
+                abl <= al_reg;
+            end if;
+        else
+            ---case push
+            a_sel <= ADDR_DEC;
+            addr1 <= bal;
+            addr_back <= addr_out;
+
+            al_buf_we_n <= '0';
+            al_reg_in <= bal;
+            if (clk = '0') then
+                abl <= bal;
+            else
+                abl <= al_reg;
+            end if;
+        end if;
+    elsif (zp_n = '0') then
+        if (zp_xy_n <= '0') then
+            a_sel <= ADDR_ADC;
+            addr1 <= bal;
+            addr2 <= index_bus;
+            addr_c_in <= '0';
+
+            abh <= "00000000";
+            abl <= addr_out;
+        else
+            abh <= "00000000";
+            abl <= bal;
+        end if;
+
+    elsif (abs_xy_n = '0') then
+        if (pg_next_n = '0') then
+            a_sel <= ADDR_INC;
+            addr1 <= bah;
+            ea_carry <= '0';
+
+            al_buf_we_n <= '1';
+            abh <= addr_out;
+            ---al is in the al_reg.
+            abl <= al_reg;
+        else
+            a_sel <= ADDR_ADC;
+            addr1 <= bal;
+            addr2 <= index_bus;
+            addr_c_in <= '0';
+            ea_carry <= addr_c;
+
+            ---keep al for page crossed case
+            al_buf_we_n <= '0';
+            al_reg_in <= addr_out;
+            abh <= bah;
+            abl <= addr_out;
+        end if;
+
+    elsif (rel_calc_n = '0') then
+        if (pg_next_n = '0') then
+            if (int_d_bus(7) = '1') then
+                ---backward relative branch
+                a_sel <= ADDR_DEC;
+            else
+                ---forward relative branch
+                a_sel <= ADDR_INC;
+            end if;
+            ---addr1 is pch.`
+            addr1 <= bah;
+            ---rel val is on the d_bus.
+            addr_back <= addr_out;
+            ea_carry <= '0'; 
+
+            --keep the value in the cycle
+            ah_buf_we_n <= '0';
+            ah_reg_in <= addr_out;
+            abh <= addr_out;
+            --al no change.
+            abl <= bal;
+        else
+            a_sel <= ADDR_SIGNED_ADD;
+            ---addr1 is pcl.`
+            addr1 <= bal;
+            ---rel val is on the d_bus.
+            addr2 <= int_d_bus;
+            addr_back <= addr_out;
+            ea_carry <= addr_c;
+
+            --keep the value in the cycle
+            al_buf_we_n <= '0';
+            al_reg_in <= addr_out;
+            if (clk = '0') then
+                abl <= addr_out;
+            else
+                abl <= al_reg;
+            end if;
+            abh <= bah;
+        end if;
+    elsif (indir_n = '0') then
+        abh <= bah;
+        --get next address.
+        addr1 <= bal;
+        a_sel <= ADDR_INC;
+        abl <= addr_out;
+
+        --TODO: must handle page crossing case...
+        ea_carry <= addr_c;
+
+    elsif (indir_x_n = '0') then
+
+    elsif (indir_y_n = '0') then
+
+        if (clk = '0') then
+        if (exec_cycle = T2) then
+            ---input is IAL.
+            abh <= "00000000";
+            abl <= bal;
+
+            ---save BAL.
+            al_buf_we_n <= '0';
+            al_reg_in <= int_d_bus;
+
+        elsif (exec_cycle = T3) then
+            al_buf_we_n <= '1';
+
+            abh <= "00000000";
+
+            --input is IAL + 1
+            a_sel <= ADDR_INC;
+            addr1 <= bal;
+            abl <= addr_out;
+
+            ---save BAH.
+            ah_buf_we_n <= '0';
+            ah_reg_in <= int_d_bus;
+
+        elsif (exec_cycle = T4) then
+
+            ---add y reg.
+            a_sel <= ADDR_ADC;
+
+            --bal from al_reg.
+            addr1 <= al_reg;
+            addr2 <= index_bus;
+            addr_c_in <= '0';
+            ea_carry <= addr_c;
+
+            --bah from ah_reg
+            abh <= ah_reg;
+            abl <= addr_out;
+
+            ---save the address.
+            al_buf_we_n <= '0';
+            al_reg_in <= addr_out;
+            ah_buf_we_n <= '0';
+            ah_reg_in <= ah_reg;
+        elsif (exec_cycle = T5) then
+
+            if (pg_next_n = '0') then
+                a_sel <= ADDR_INC;
+                addr1 <= ah_reg;
+                ---next page.
+                abh <= addr_out;
+                abl <= al_reg;
+            else
+                abh <= ah_reg;
+                abl <= al_reg;
+            end if;
+        end if; -- if (exec_cycle = T2) then
+        end if; --if (clk = '0') then
+    else
+        al_buf_we_n <= '1';
+        ah_buf_we_n <= '1';
+
+        abl <= bal;
+        abh <= bah;
+
+        ----addr_back is always bal for jmp/jsr instruction....
+        -----TODO must check later if it's ok.
+        addr_back <= bal;
+    end if; --if (pcl_inc_n = '0') then
+
+    -------------------------------
+    ---- arithmatic operations-----
+    -------------------------------
+    if (arith_en_n = '0') then
+
+        if instruction = conv_std_logic_vector(16#ca#, dsize) then
+            --d_print("dex");
+            sel <= ALU_DEC;
+            d1 <= index_bus;
+            set_nz;
+            output_d_bus;
+
+        elsif instruction = conv_std_logic_vector(16#88#, dsize) then
+            --d_print("dey");
+            sel <= ALU_DEC;
+            d1 <= index_bus;
+            set_nz;
+            output_d_bus;
+
+        elsif instruction = conv_std_logic_vector(16#e8#, dsize) then
+            --d_print("inx");
+            sel <= ALU_INC;
+            d1 <= index_bus;
+            set_nz;
+            output_d_bus;
+
+        elsif instruction = conv_std_logic_vector(16#c8#, dsize) then
+            --d_print("iny");
+            sel <= ALU_INC;
+            d1 <= index_bus;
+            set_nz;
+            output_d_bus;
+
+        elsif instruction = conv_std_logic_vector(16#0a#, dsize) then
+            --d_print("asl");
+            sel <= ALU_ASL;
+            d1 <= acc_out;
+            set_nz;
+            carry_out <= c;
+            output_d_bus;
+
+        elsif instruction = conv_std_logic_vector(16#2a#, dsize) then
+            --rol acc.
+            sel <= ALU_ROL;
+            d1 <= acc_out;
+            set_nz;
+            carry_out <= c;
+            output_d_bus;
+
+        elsif instruction = conv_std_logic_vector(16#4a#, dsize) then
+            --lsr acc.
+            sel <= ALU_LSR;
+            d1 <= acc_out;
+            set_nz;
+            carry_out <= c;
+            output_d_bus;
+
+        elsif instruction = conv_std_logic_vector(16#6a#, dsize) then
+            --ror acc.
+            sel <= ALU_ROR;
+            d1 <= acc_out;
+            set_nz;
+            carry_out <= c;
+            output_d_bus;
+
+        --instruction is aaabbbcc format.
+        elsif instruction (1 downto 0) = "01" then
+            if instruction (7 downto 5) = "000" then
+                --d_print("ora");
+                sel <= ALU_OR;
+                d1 <= acc_out;
+                d2 <= int_d_bus;
+                d_oe_n <= '0';
+                acc_in <= d_out;
+                set_nz;
+
+            elsif instruction (7 downto 5) = "001" then
+                --d_print("and");
+                sel <= ALU_AND;
+                d1 <= acc_out;
+                d2 <= int_d_bus;
+                d_oe_n <= '0';
+                acc_in <= d_out;
+                set_nz;
+
+            elsif instruction (7 downto 5) = "010" then
+                --d_print("eor");
+                sel <= ALU_EOR;
+                d1 <= acc_out;
+                d2 <= int_d_bus;
+                d_oe_n <= '0';
+                acc_in <= d_out;
+                set_nz;
+
+            elsif instruction (7 downto 5) = "011" then
+                --d_print("adc");
+                sel <= ALU_ADC;
+                d1 <= acc_out;
+                d2 <= int_d_bus;
+                d_oe_n <= '0';
+
+                acc_in <= d_out;
+                set_nz;
+                carry_out <= c;
+                overflow <= v;
+
+            elsif instruction (7 downto 5) = "110" then
+                --d_print("cmp");
+                --cmpare A - M.
+                sel <= ALU_CMP;
+                d1 <= acc_out;
+                d2 <= int_d_bus;
+                set_nz;
+                carry_out <= c;
+
+            elsif instruction (7 downto 5) = "111" then
+                --d_print("sbc");
+                sel <= ALU_SBC;
+                d1 <= acc_out;
+                d2 <= int_d_bus;
+                d_oe_n <= '0';
+
+                acc_in <= d_out;
+                set_nz;
+                carry_out <= c;
+                overflow <= v;
+
+            end if; --if instruction (7 downto 5) = "000" then
+
+        elsif instruction (1 downto 0) = "10" then
+
+            --this group is all memory to memory instruction (except for stx/ldx).
+            --memory to memory operation takes two cycles.
+            --first is write original data 
+            --second is write modified data
+
+            arith_reg_in <= int_d_bus;
+            d1 <= arith_reg_out;
+            int_d_bus <= d_out;
+
+            if (clk = '0') then
+                --d_print("clk hi");
+                if (m2m_stat_1 = '0') then
+                    --first cycle. keep input variable.
+                    --d_print("inc first.");
+                    m2m_stat_1 <= '1';
+                    arith_buf_we_n <= '0';
+                    arith_buf_oe_n <= '1';
+                    d_oe_n <= '1';
+
+                end if;
+            end if;
+
+            if (clk'event and clk = '0') then
+                if (m2m_stat_2 = '0') then
+                    --second cycle read from register, output modified data.
+                    --d_print("inc second...");
+                    m2m_stat_2 <= '1';
+                    arith_buf_we_n <= '1';
+                    arith_buf_oe_n <= '0';
+                    d_oe_n <= '0';
+                end if;
+            end if;
+
+
+            if instruction (7 downto 5) = "000" then
+                --d_print("asl");
+                sel <= ALU_ASL;
+                set_nz;
+                carry_out <= c;
+
+            elsif instruction (7 downto 5) = "001" then
+                --d_print("rol");
+                sel <= ALU_ROL;
+                set_nz;
+                carry_out <= c;
+
+            elsif instruction (7 downto 5) = "010" then
+                --d_print("lsr");
+                sel <= ALU_LSR;
+                set_nz;
+                carry_out <= c;
+
+            elsif instruction (7 downto 5) = "011" then
+                --d_print("ror");
+                sel <= ALU_ROR;
+                set_nz;
+                carry_out <= c;
+
+            elsif instruction (7 downto 5) = "110" then
+                --d_print("dec");
+                sel <= ALU_DEC;
+                set_nz;
+
+            elsif instruction (7 downto 5) = "111" then
+                --d_print("alu inc");
+                sel <= ALU_INC;
+                set_nz;
+
+            end if; --if instruction (7 downto 5) = "000" then
+
+        elsif instruction (1 downto 0) = "00" then
+            if instruction (7 downto 5) = "001" then
+                --d_print("bit");
+                sel <= ALU_BIT;
+                d1 <= acc_out;
+                d2 <= int_d_bus;
+                set_nz;
+                overflow <= v;
+            elsif instruction (7 downto 5) = "110" then
+                --d_print("cpy");
+                sel <= ALU_CMP;
+                d1 <= index_bus;
+                d2 <= int_d_bus;
+                set_nz;
+                carry_out <= c;
+
+            elsif instruction (7 downto 5) = "111" then
+               -- d_print("cpx");
+                sel <= ALU_CMP;
+                d1 <= index_bus;
+                d2 <= int_d_bus;
+                set_nz;
+                carry_out <= c;
+
+            end if; --if instruction (7 downto 5) = "001" then
+        end if; --if instruction = conv_std_logic_vector(16#ca#, dsize) 
+    else
+        --d_print("no arith");
+        m2m_stat_1 <= '0';
+        m2m_stat_2 <= '0';
+        d_oe_n <= '1';
+        arith_buf_we_n <= '1';
+        arith_buf_oe_n <= '1';
+        int_d_bus <= (others => 'Z');
+    end if; -- if (arith_en_n = '0') then
+
+    end process;
+
+end rtl;
+
+-----------------------------------------
+---------- Address calculator------------
+-----------------------------------------
+
+library ieee;
+use ieee.std_logic_1164.all;
+use ieee.std_logic_unsigned.all;
+
+entity address_calculator is 
+    generic (   dsize : integer := 8
+            );
+    port ( 
+            sel         : in std_logic_vector (1 downto 0);
+            addr1       : in std_logic_vector (dsize - 1 downto 0);
+            addr2       : in std_logic_vector (dsize - 1 downto 0);
+            addr_out    : out std_logic_vector (dsize - 1 downto 0);
+            carry_in    : in std_logic;
+            carry_out   : out std_logic
+    );
+end address_calculator;
+
+architecture rtl of address_calculator is
+
+constant ADDR_ADC    : std_logic_vector (1 downto 0) := "00";
+constant ADDR_INC    : std_logic_vector (1 downto 0) := "01";
+constant ADDR_DEC    : std_logic_vector (1 downto 0) := "10";
+constant ADDR_SIGNED_ADD : std_logic_vector (1 downto 0) := "11";
+
+begin
+
+    alu_p : process (sel, addr1, addr2, carry_in)
+    variable res : std_logic_vector (dsize downto 0);
+
+    begin
+    if sel = ADDR_ADC then
+        res := ('0' & addr1) + ('0' & addr2) + carry_in;
+        addr_out <= res(dsize - 1 downto 0);
+        carry_out <= res(dsize);
+
+    elsif sel = ADDR_SIGNED_ADD then
+        res := ('0' & addr1) + ('0' & addr2);
+        addr_out <= res(dsize - 1 downto 0);
+--        if (addr2(dsize - 1) = '0') then
+--            ---positive value add.
+--            if (res(dsize) = '1') then
+--                carry_out <= '1';
+--            else
+--                carry_out <= '0';
+--            end if;
+--        else
+--            ---negative value add.
+--            if (res(dsize) = '0') then
+--                carry_out <= '1';
+--            else
+--                carry_out <= '0';
+--            end if;
+--        end if;
+        -->>>simplified above.
+        if ((addr2(dsize - 1) xor res(dsize)) = '1') then
+            carry_out <= '1';
+        else
+            carry_out <= '0';
+        end if;
+
+    elsif sel = ADDR_INC then
+        res := ('0' & addr1) + "000000001";
+        addr_out <= res(dsize - 1 downto 0);
+        carry_out <= res(dsize);
+    elsif sel = ADDR_DEC then
+        res := ('0' & addr1) - "000000001";
+        addr_out <= res(dsize - 1 downto 0);
+        carry_out <= res(dsize);
+    end if;
+    end process;
+
+end rtl;
+
+
+-----------------------------------------
+------------- ALU Core -----------------
+-----------------------------------------
+
+library ieee;
+use ieee.std_logic_1164.all;
+use ieee.std_logic_unsigned.all;
+
+----d1 = acc
+----d2 = memory
+entity alu_core is 
+    generic (   dsize : integer := 8
+            );
+    port ( 
+            sel         : in std_logic_vector (3 downto 0);
+            d1          : in std_logic_vector (dsize - 1 downto 0);
+            d2          : in std_logic_vector (dsize - 1 downto 0);
+            d_out       : out std_logic_vector (dsize - 1 downto 0);
+            carry_in    : in std_logic;
+            negative    : out std_logic;
+            zero        : out std_logic;
+            carry_out   : out std_logic;
+            overflow    : out std_logic
+    );
+end alu_core;
+
+architecture rtl of alu_core is
+
+procedure d_print(msg : string) is
+use std.textio.all;
+--use ieee.std_logic_textio.all;
+variable out_l : line;
+begin
+--    write(out_l, msg);
+--    writeline(output, out_l);
+end  procedure;
+
+constant ALU_AND    : std_logic_vector (3 downto 0) := "0000";
+constant ALU_EOR    : std_logic_vector (3 downto 0) := "0001";
+constant ALU_OR     : std_logic_vector (3 downto 0) := "0010";
+constant ALU_BIT    : std_logic_vector (3 downto 0) := "0011";
+constant ALU_ADC    : std_logic_vector (3 downto 0) := "0100";
+constant ALU_SBC    : std_logic_vector (3 downto 0) := "0101";
+constant ALU_CMP    : std_logic_vector (3 downto 0) := "0110";
+constant ALU_ASL    : std_logic_vector (3 downto 0) := "0111";
+constant ALU_LSR    : std_logic_vector (3 downto 0) := "1000";
+constant ALU_ROL    : std_logic_vector (3 downto 0) := "1001";
+constant ALU_ROR    : std_logic_vector (3 downto 0) := "1010";
+constant ALU_INC    : std_logic_vector (3 downto 0) := "1011";
+constant ALU_DEC    : std_logic_vector (3 downto 0) := "1100";
+
+begin
+
+    alu_p : process (sel, d1, d2, carry_in)
+    variable res : std_logic_vector (dsize downto 0);
+
+procedure set_n (data : in std_logic_vector (dsize - 1 downto 0)) is
+begin
+    if (data(7) = '1') then
+        negative <= '1';
+    else
+        negative <= '0';
+    end if;
+end procedure;
+
+procedure set_z (data : in std_logic_vector (dsize - 1 downto 0)) is
+begin
+    if  (data = "00000000") then
+        zero <= '1';
+    else
+        zero <= '0';
+    end if;
+end procedure;
+
+    begin
+    if sel = ALU_AND then
+        res(dsize - 1 downto 0) := d1 and d2;
+        set_n(res(dsize - 1 downto 0));
+        set_z(res(dsize - 1 downto 0));
+        d_out <= res(dsize - 1 downto 0);
+
+    elsif sel = ALU_EOR then
+        res(dsize - 1 downto 0) := d1 xor d2;
+        set_n(res(dsize - 1 downto 0));
+        set_z(res(dsize - 1 downto 0));
+        d_out <= res(dsize - 1 downto 0);
+
+    elsif sel = ALU_OR then
+        res(dsize - 1 downto 0) := d1 or d2;
+        set_n(res(dsize - 1 downto 0));
+        set_z(res(dsize - 1 downto 0));
+        d_out <= res(dsize - 1 downto 0);
+
+    elsif sel = ALU_BIT then
+        --transfer bit 7 and 6  of memory data to n, v flag.
+        negative <= d2(7);
+        overflow <= d2(6);
+        ----zero bit after A and M.
+        res(dsize - 1 downto 0) := d1 and d2;
+        set_z(res(dsize - 1 downto 0));
+
+    elsif sel = ALU_ADC then
+        res := ('0' & d1) + ('0' & d2) + carry_in;
+        d_out <= res(dsize - 1 downto 0);
+        carry_out <= res(dsize);
+        if ((d1(dsize - 1) = d2(dsize - 1)) 
+            and (d1(dsize - 1) /= res(dsize - 1))) then
+            overflow <= '1';
+        else
+            overflow <= '0';
+        end if;
+        set_n(res(dsize - 1 downto 0));
+        set_z(res(dsize - 1 downto 0));
+
+    elsif sel = ALU_SBC then
+        ---A - M - ~C -> A
+        res := ('0' & d1) - ('0' & d2) - not carry_in;
+        d_out <= res(dsize - 1 downto 0);
+
+        --c Set if unsigned borrow not required; cleared if unsigned borrow.
+        carry_out <= not res(dsize);
+        --v Set if signed borrow required; cleared if no signed borrow.
+        if ((d1(dsize - 1) /= d2(dsize - 1)) 
+            and (d1(dsize - 1) /= res(dsize - 1))) then
+            overflow <= '1';
+        else
+            overflow <= '0';
+        end if;
+        set_n(res(dsize - 1 downto 0));
+        set_z(res(dsize - 1 downto 0));
+
+    elsif sel = ALU_CMP then
+        res := ('0' & d1) - ('0' & d2);
+        if (d1 >= d2) then
+            carry_out <= '1';
+        else
+            carry_out <= '0';
+        end if;
+        set_n(res(dsize - 1 downto 0));
+        set_z(res(dsize - 1 downto 0));
+
+    elsif sel = ALU_ASL then
+        res(dsize - 1 downto 1) := d1(dsize - 2 downto 0);
+        res(0) := '0';
+
+        d_out <= res(dsize - 1 downto 0);
+        set_n(res(dsize - 1 downto 0));
+        set_z(res(dsize - 1 downto 0));
+        carry_out <= d1(dsize - 1);
+
+    elsif sel = ALU_LSR then
+        res(dsize - 1) := '0';
+        res(dsize - 2 downto 0) := d1(dsize - 1 downto 1);
+
+        d_out <= res(dsize - 1 downto 0);
+        set_n(res(dsize - 1 downto 0));
+        set_z(res(dsize - 1 downto 0));
+        carry_out <= d1(0);
+
+    elsif sel = ALU_ROL then
+        res(dsize - 1 downto 1) := d1(dsize - 2 downto 0);
+        res(0) := carry_in;
+
+        d_out <= res(dsize - 1 downto 0);
+        set_n(res(dsize - 1 downto 0));
+        set_z(res(dsize - 1 downto 0));
+        carry_out <= d1(7);
+
+    elsif sel = ALU_ROR then
+        res(dsize - 1) := carry_in;
+        res(dsize - 2 downto 0) := d1(dsize - 1 downto 1);
+
+        d_out <= res(dsize - 1 downto 0);
+        set_n(res(dsize - 1 downto 0));
+        set_z(res(dsize - 1 downto 0));
+        carry_out <= d1(0);
+
+    elsif sel = ALU_INC then
+        res := ('0' & d1) + "000000001";
+        d_out <= res(dsize - 1 downto 0);
+        set_n(res(dsize - 1 downto 0));
+        set_z(res(dsize - 1 downto 0));
+
+    elsif sel = ALU_DEC then
+        res := ('0' & d1) - "000000001";
+        d_out <= res(dsize - 1 downto 0);
+        set_n(res(dsize - 1 downto 0));
+        set_z(res(dsize - 1 downto 0));
+
+    end if;
+
+    end process;
+
+end rtl;
+
diff --git a/de1_nes/cpu/decoder.vhd b/de1_nes/cpu/decoder.vhd
new file mode 100644 (file)
index 0000000..633d6e3
--- /dev/null
@@ -0,0 +1,2756 @@
+library ieee;
+use ieee.std_logic_1164.all;
+use ieee.std_logic_arith.conv_std_logic_vector;
+use ieee.std_logic_unsigned.conv_integer;
+
+entity decoder is 
+    generic (dsize : integer := 8);
+    port (  set_clk         : in std_logic;
+            trig_clk        : in std_logic;
+            res_n           : in std_logic;
+            irq_n           : in std_logic;
+            nmi_n           : in std_logic;
+            rdy             : in std_logic;
+            instruction     : in std_logic_vector (dsize - 1 downto 0);
+            exec_cycle      : in std_logic_vector (5 downto 0);
+            next_cycle      : out std_logic_vector (5 downto 0);
+            status_reg      : inout std_logic_vector (dsize - 1 downto 0);
+            inst_we_n       : out std_logic;
+            ad_oe_n         : out std_logic;
+            dbuf_int_oe_n   : out std_logic;
+            dl_al_we_n      : out std_logic;
+            dl_ah_we_n      : out std_logic;
+            dl_al_oe_n      : out std_logic;
+            dl_ah_oe_n      : out std_logic;
+            dl_dh_oe_n      : out std_logic;
+            pcl_inc_n       : out std_logic;
+            pch_inc_n       : out std_logic;
+            pcl_cmd         : out std_logic_vector(3 downto 0);
+            pch_cmd         : out std_logic_vector(3 downto 0);
+            sp_cmd          : out std_logic_vector(3 downto 0);
+            sp_oe_n         : out std_logic;
+            sp_push_n       : out std_logic;
+            sp_pop_n        : out std_logic;
+            acc_cmd         : out std_logic_vector(3 downto 0);
+            x_cmd           : out std_logic_vector(3 downto 0);
+            y_cmd           : out std_logic_vector(3 downto 0);
+            abs_xy_n        : out std_logic;
+            ea_carry        : in  std_logic;
+            pg_next_n       : out std_logic;
+            zp_n            : out std_logic;
+            zp_xy_n         : out std_logic;
+            rel_calc_n      : out std_logic;
+            indir_n         : out std_logic;
+            indir_x_n       : out std_logic;
+            indir_y_n       : out std_logic;
+            arith_en_n      : out std_logic;
+            stat_dec_oe_n   : out std_logic;
+            stat_bus_oe_n   : out std_logic;
+            stat_set_flg_n  : out std_logic;
+            stat_flg        : out std_logic;
+            stat_bus_all_n  : out std_logic;
+            stat_bus_nz_n   : out std_logic;
+            stat_alu_we_n   : out std_logic;
+            r_vec_oe_n      : out std_logic;
+            n_vec_oe_n      : out std_logic;
+            i_vec_oe_n      : out std_logic;
+            r_nw            : out std_logic
+            ;---for parameter check purpose!!!
+            check_bit     : out std_logic_vector(1 to 5)
+        );
+end decoder;
+
+architecture rtl of decoder is
+
+component d_flip_flop_bit
+    port (  
+            clk     : in std_logic;
+            res_n   : in std_logic;
+            set_n   : in std_logic;
+            we_n    : in std_logic;
+            d       : in std_logic;
+            q       : out std_logic
+        );
+end component;
+
+procedure d_print(msg : string) is
+use std.textio.all;
+--use ieee.std_logic_textio.all;
+variable out_l : line;
+begin
+--    write(out_l, msg);
+--    writeline(output, out_l);
+end  procedure;
+
+---ival : 0x0000 - 0xffff
+function conv_hex8(ival : integer) return string is
+variable tmp1, tmp2 : integer;
+variable hex_chr: string (1 to 16) := "0123456789abcdef";
+begin
+    tmp2 := (ival mod 16 ** 2) / 16 ** 1;
+    tmp1 := ival mod 16 ** 1;
+    return hex_chr(tmp2 + 1) & hex_chr(tmp1 + 1);
+end;
+
+--cycle bit format 
+-- bit 5    : pcl increment carry flag
+-- bit 4,3  : cycle type: 00 normal, 01 reset , 10 nmi, 11 irq 
+-- bit 2-0  : cycle
+
+--00xxx : exec cycle : T0 > T1 > T2 > T3 > T4 > T5 > T6 > T0
+constant T0 : std_logic_vector (5 downto 0) := "000000";
+constant T1 : std_logic_vector (5 downto 0) := "000001";
+constant T2 : std_logic_vector (5 downto 0) := "000010";
+constant T3 : std_logic_vector (5 downto 0) := "000011";
+constant T4 : std_logic_vector (5 downto 0) := "000100";
+constant T5 : std_logic_vector (5 downto 0) := "000101";
+constant T6 : std_logic_vector (5 downto 0) := "000110";
+
+--01xxx : reset cycle : R0 > R1 > R2 > R3 > R4 > R5 > T0
+constant R0 : std_logic_vector (5 downto 0) := "001000";
+constant R1 : std_logic_vector (5 downto 0) := "001001";
+constant R2 : std_logic_vector (5 downto 0) := "001010";
+constant R3 : std_logic_vector (5 downto 0) := "001011";
+constant R4 : std_logic_vector (5 downto 0) := "001100";
+constant R5 : std_logic_vector (5 downto 0) := "001101";
+
+--10xxx : nmi cycle : T0 > N1 > N2 > N3 > N4 > N5 > T0
+constant N1 : std_logic_vector (5 downto 0) := "010001";
+constant N2 : std_logic_vector (5 downto 0) := "010010";
+constant N3 : std_logic_vector (5 downto 0) := "010011";
+constant N4 : std_logic_vector (5 downto 0) := "010100";
+constant N5 : std_logic_vector (5 downto 0) := "010101";
+
+--11xxx : irq cycle : T0 > I1 > I2 > I3 > I4 > I5 > T0
+constant I1 : std_logic_vector (5 downto 0) := "011001";
+constant I2 : std_logic_vector (5 downto 0) := "011010";
+constant I3 : std_logic_vector (5 downto 0) := "011011";
+constant I4 : std_logic_vector (5 downto 0) := "011100";
+constant I5 : std_logic_vector (5 downto 0) := "011101";
+
+constant ERROR_CYCLE : std_logic_vector (5 downto 0) := "111111";
+
+-- SR Flags (bit 7 to bit 0):
+--  7   N   ....    Negative
+--  6   V   ....    Overflow
+--  5   -   ....    ignored
+--  4   B   ....    Break
+--  3   D   ....    Decimal (use BCD for arithmetics)
+--  2   I   ....    Interrupt (IRQ disable)
+--  1   Z   ....    Zero
+--  0   C   ....    Carry
+constant st_N : integer := 7;
+constant st_V : integer := 6;
+constant st_B : integer := 4;
+constant st_D : integer := 3;
+constant st_I : integer := 2;
+constant st_Z : integer := 1;
+constant st_C : integer := 0;
+
+---for pch_inc_n.
+signal pch_inc_input : std_logic;
+
+---for nmi handling
+signal nmi_handled_n : std_logic;
+
+-- page boundary handling
+signal a2_abs_xy_next_cycle     : std_logic_vector (5 downto 0);
+signal a2_indir_y_next_cycle    : std_logic_vector (5 downto 0);
+signal a58_branch_next_cycle    : std_logic_vector (5 downto 0);
+signal wait_a2_abs_xy_next      : std_logic;
+signal wait_a2_indir_y_next     : std_logic;
+signal wait_a58_branch_next     : std_logic;
+
+begin
+
+    ---pc page next is connected to top bit of exec_cycle
+    pch_inc_input <= not exec_cycle(5);
+    pch_inc_reg : d_flip_flop_bit 
+            port map(set_clk, '1', '1', '0', pch_inc_input, pch_inc_n);
+
+    a2_abs_xy_next_cycle <= T4 when ea_carry = '1' else
+                    T0;
+    a2_indir_y_next_cycle <= T5 when ea_carry = '1' else
+                    T0;
+    a58_branch_next_cycle <= T3 when ea_carry = '1' else
+                    T0;
+
+    main_p : process (set_clk, res_n, nmi_n, 
+                     a2_abs_xy_next_cycle, a2_indir_y_next_cycle, a58_branch_next_cycle)
+
+-------------------------------------------------------------
+-------------------------------------------------------------
+----------------------- comon routines ----------------------
+-------------------------------------------------------------
+-------------------------------------------------------------
+
+----------gate_cmd format
+------3 : front port oe_n
+------2 : front port we_n
+------1 : back port oe_n
+------0 : back port we_n
+procedure front_oe (signal cmd : out std_logic_vector(3 downto 0); 
+    val : in std_logic) is
+begin
+    cmd(3) <= val;
+end;
+procedure front_we (signal cmd : out std_logic_vector(3 downto 0); 
+    val : in std_logic) is
+begin
+    cmd(2) <= val;
+end;
+procedure back_oe (signal cmd : out std_logic_vector(3 downto 0); 
+    val : in std_logic) is
+begin
+    cmd(1) <= val;
+end;
+procedure back_we (signal cmd : out std_logic_vector(3 downto 0); 
+    val : in std_logic) is
+begin
+    cmd(0) <= val;
+end;
+
+procedure fetch_next is
+begin
+    pcl_inc_n <= '0';
+    back_oe(pcl_cmd, '0');
+    back_oe(pch_cmd, '0');
+    back_we(pcl_cmd, '0');
+    back_we(pch_cmd, '1');
+end procedure;
+
+procedure fetch_stop is
+begin
+    pcl_inc_n <= '1';
+    back_oe(pcl_cmd, '1');
+    back_oe(pch_cmd, '1');
+    back_we(pcl_cmd, '1');
+end procedure;
+
+procedure read_status is
+begin
+    status_reg <= (others => 'Z');
+    stat_dec_oe_n <= '0';
+end  procedure;
+
+procedure disable_pins is
+begin
+    --disable the last opration pins.
+    dbuf_int_oe_n <= '1';
+    dl_al_we_n <= '1';
+    dl_ah_we_n <= '1';
+    dl_ah_oe_n <= '1';
+    dl_dh_oe_n <= '1';
+    sp_cmd <= "1111";
+    sp_oe_n <= '1';
+    sp_push_n <= '1';
+    sp_pop_n <= '1';
+    acc_cmd <= "1111";
+    x_cmd <= "1111";
+    y_cmd <= "1111";
+
+    abs_xy_n <= '1';
+    pg_next_n <= '1';
+    zp_n <= '1';
+    zp_xy_n <= '1';
+    rel_calc_n <= '1';
+    indir_n <= '1';
+    indir_x_n <= '1';
+    indir_y_n <= '1';
+    arith_en_n <= '1';
+
+    read_status;
+    stat_bus_oe_n <= '1';
+    stat_set_flg_n <= '1';
+    stat_flg <= '1';
+    stat_bus_all_n <= '1';
+    stat_bus_nz_n <= '1';
+    stat_alu_we_n <= '1';
+
+    r_vec_oe_n <= '1';
+    n_vec_oe_n <= '1';
+    i_vec_oe_n <= '1';
+
+    wait_a2_abs_xy_next <= '0';
+    wait_a2_indir_y_next <= '0';
+    wait_a58_branch_next <= '0';
+end  procedure;
+
+procedure fetch_inst (inc_pcl : in std_logic) is
+begin
+    if instruction = conv_std_logic_vector(16#4c#, dsize) then
+        --if prior cycle is jump instruction, 
+        --fetch opcode from where the latch is pointing to.
+
+        --latch > al.
+        dl_al_oe_n <= '0';
+        pcl_cmd <= "1110";
+    else
+        --fetch opcode and pcl increment.
+        pcl_cmd <= "1100";
+        dl_al_oe_n <= '1';
+    end if;
+
+    ad_oe_n <= '0';
+    pch_cmd <= "1101";
+    inst_we_n <= '0';
+    pcl_inc_n <= inc_pcl;
+    r_nw <= '1';
+
+    d_print(string'("fetch 1"));
+end  procedure;
+
+---T0 cycle routine 
+---(along with the page boundary condition, the last 
+---cycle is bypassed and slided to T0.)
+procedure t0_cycle is
+begin
+    disable_pins;
+    if (nmi_n = '0' and nmi_handled_n = '1') then
+        --start nmi handling...
+        fetch_inst('1');
+        next_cycle <= N1;
+    else
+        fetch_inst('0');
+        next_cycle <= T1;
+    end if;
+end  procedure;
+
+---common routine for single byte instruction.
+procedure single_inst is
+begin
+    fetch_stop;
+    next_cycle <= T0;
+end  procedure;
+
+procedure fetch_imm is
+begin
+    d_print("immediate");
+    fetch_next;
+    --send data from data bus buffer.
+    --receiver is instruction dependent.
+    dbuf_int_oe_n <= '0';
+    next_cycle <= T0;
+end  procedure;
+
+procedure set_nz_from_bus is
+begin
+    --status register n/z bit update.
+    stat_bus_nz_n <= '0';
+end  procedure;
+
+procedure set_zc_from_alu is
+begin
+    --status register n/z bit update.
+    stat_alu_we_n <= '0';
+    stat_dec_oe_n <= '1';
+    status_reg <= "00000011";
+end  procedure;
+
+procedure set_nz_from_alu is
+begin
+    --status register n/z/c bit update.
+    stat_alu_we_n <= '0';
+    stat_dec_oe_n <= '1';
+    status_reg <= "10000010";
+end  procedure;
+
+procedure set_nzc_from_alu is
+begin
+    --status register n/z/c bit update.
+    stat_alu_we_n <= '0';
+    stat_dec_oe_n <= '1';
+    status_reg <= "10000011";
+end  procedure;
+
+procedure set_nvz_from_alu is
+begin
+    --status register n/z/v bit update.
+    stat_alu_we_n <= '0';
+    stat_dec_oe_n <= '1';
+    status_reg <= "11000010";
+end  procedure;
+
+procedure set_nvzc_from_alu is
+begin
+    stat_alu_we_n <= '0';
+    stat_dec_oe_n <= '1';
+    status_reg <= "11000011";
+end  procedure;
+
+--flag on/off instruction
+procedure set_flag (int_flg : in integer; val : in std_logic) is
+begin
+    stat_dec_oe_n <= '1';
+    stat_set_flg_n <= '0';
+    --specify which to set.
+    status_reg(7 downto int_flg + 1) 
+        <= (others =>'0');
+    status_reg(int_flg - 1 downto 0) 
+        <= (others =>'0');
+    status_reg(int_flg) <= '1';
+    stat_flg <= val;
+end  procedure;
+
+--for sec/clc
+procedure set_flag0 (val : in std_logic) is
+begin
+    stat_dec_oe_n <= '1';
+    stat_set_flg_n <= '0';
+    status_reg <= "00000001";
+    stat_flg <= val;
+end  procedure;
+
+procedure fetch_low is
+begin
+    d_print("fetch low 2");
+    --fetch next opcode (abs low).
+    fetch_next;
+    --latch abs low data.
+    dbuf_int_oe_n <= '0';
+    dl_al_we_n <= '0';
+    next_cycle <= T2;
+end  procedure;
+
+procedure abs_fetch_high is
+begin
+    d_print("abs (xy) 3");
+    dl_al_we_n <= '1';
+
+    --latch abs hi data.
+    fetch_next;
+    dbuf_int_oe_n <= '0';
+    dl_ah_we_n <= '0';
+    next_cycle <= T3;
+end  procedure;
+
+procedure abs_latch_out is
+begin
+    --d_print("abs 4");
+    fetch_stop;
+    dl_ah_we_n <= '1';
+
+    --latch > al/ah.
+    dl_al_oe_n <= '0';
+    dl_ah_oe_n <= '0';
+end  procedure;
+
+procedure ea_x_out is
+begin
+    -----calucurate and output effective addr
+    back_oe(x_cmd, '0');
+    abs_xy_n <= '0';
+end  procedure;
+
+procedure ea_y_out is
+begin
+    back_oe(y_cmd, '0');
+    abs_xy_n <= '0';
+end  procedure;
+
+--A.2. internal execution on memory data
+
+procedure a2_zp is
+begin
+    if exec_cycle = T1 then
+        fetch_low;
+    elsif exec_cycle = T2 then
+        fetch_stop;
+        dbuf_int_oe_n <= '0';
+        dl_al_we_n <= '1';
+
+        --calc zp.
+        dl_al_oe_n <= '0';
+        zp_n <= '0';
+        next_cycle <= T0;
+    end if;
+end  procedure;
+
+procedure a2_abs is
+begin
+    if exec_cycle = T1 then
+        fetch_low;
+    elsif exec_cycle = T2 then
+        abs_fetch_high;
+    elsif exec_cycle = T3 then
+        abs_latch_out;
+        dbuf_int_oe_n <= '0';
+        next_cycle <= T0;
+    end if;
+end  procedure;
+
+procedure a2_page_next is
+begin
+    --close open gate if page boundary crossed.
+    back_we(acc_cmd, '1');
+    front_we(acc_cmd, '1');
+    front_we(x_cmd, '1');
+    front_we(y_cmd, '1');
+    stat_alu_we_n <= '1';
+end  procedure;
+
+procedure a2_abs_xy (is_x : in boolean) is
+begin
+    if exec_cycle = T1 then
+        fetch_low;
+    elsif exec_cycle = T2 then
+        abs_fetch_high;
+    elsif exec_cycle = T3 then
+        --ea calc & lda
+        pg_next_n <= '1';
+        abs_latch_out;
+        if (is_x = true) then
+            ea_x_out;
+        else
+            ea_y_out;
+        end if;
+        dbuf_int_oe_n <= '0';
+
+        wait_a2_abs_xy_next <= '1';
+        next_cycle <= a2_abs_xy_next_cycle;
+        d_print("absx step 1");
+    elsif exec_cycle = T4 then
+        --case page boundary crossed.
+        --redo inst.
+        d_print("absx 5 (page boudary crossed.)");
+        --next page.
+        pg_next_n <= not ea_carry;
+        next_cycle <= T0;
+    end if;
+end  procedure;
+
+procedure a2_zp_xy (is_x : in boolean) is
+begin
+    if exec_cycle = T1 then
+        fetch_low;
+    elsif exec_cycle = T2 then
+        fetch_stop;
+        --output BAL only
+        dbuf_int_oe_n <= '0';
+        dl_al_we_n <= '1';
+
+        --calc zp.
+        dl_al_oe_n <= '0';
+        zp_n <= '0';
+        next_cycle <= T3;
+    elsif exec_cycle = T3 then
+        --t3 zp, xy 
+        zp_xy_n <= '0';
+        if (is_x = true) then
+            back_oe(x_cmd, '0');
+        else
+            back_oe(y_cmd, '0');
+        end if;
+        next_cycle <= T0;
+    end if;
+end  procedure;
+
+procedure a2_indir_y is
+begin
+    if exec_cycle = T1 then
+        fetch_low;
+        --get IAL
+        dl_al_we_n <= '0';
+
+    elsif exec_cycle = T2 then
+        fetch_stop;
+        dl_al_we_n <= '1';
+
+        ---address is 00:IAL
+        --output BAL @IAL
+        indir_y_n <= '0';
+        dl_al_oe_n <= '0';
+        dbuf_int_oe_n <= '0';
+        next_cycle <= T3;
+
+    elsif exec_cycle = T3 then
+        indir_y_n <= '0';
+        dl_al_oe_n <= '0';
+        --output BAH @IAL+1
+        dbuf_int_oe_n <= '0';
+        next_cycle <= T4;
+
+    elsif exec_cycle = T4 then
+        dl_al_oe_n <= '1';
+        dbuf_int_oe_n <= '1';
+
+        --add index y.
+        pg_next_n <= '1';
+        back_oe(y_cmd, '0');
+        indir_y_n <= '0';
+        dbuf_int_oe_n <= '0';
+
+        wait_a2_indir_y_next <= '1';
+        next_cycle <= a2_indir_y_next_cycle;
+    elsif exec_cycle = T5 then
+        --case page boundary crossed.
+        --redo inst.
+        d_print("(indir), y (page boudary crossed.)");
+        --next page.
+        pg_next_n <= not ea_carry;
+        next_cycle <= T0;
+    end if;
+end  procedure;
+
+--A.3. store operation.
+
+procedure a3_zp is
+begin
+    if exec_cycle = T1 then
+        fetch_low;
+    elsif exec_cycle = T2 then
+        fetch_stop;
+        dbuf_int_oe_n <= '1';
+        dl_al_we_n <= '1';
+
+        --calc zp.
+        dl_al_oe_n <= '0';
+        zp_n <= '0';
+        r_nw <= '0';
+        next_cycle <= T0;
+    end if;
+end  procedure;
+
+procedure a3_zp_xy (is_x : in boolean) is
+begin
+    if exec_cycle = T1 then
+        fetch_low;
+    elsif exec_cycle = T2 then
+        fetch_stop;
+        dbuf_int_oe_n <= '1';
+        dl_al_we_n <= '1';
+
+        --calc zp.
+        dl_al_oe_n <= '0';
+        zp_n <= '0';
+        next_cycle <= T3;
+    elsif exec_cycle = T3 then
+        --calc zp + index.
+        dl_al_oe_n <= '0';
+        zp_n <= '0';
+        zp_xy_n <= '0';
+        if (is_x = true) then
+            back_oe(x_cmd, '0');
+        else
+            back_oe(y_cmd, '0');
+        end if;
+
+        --write data
+        r_nw <= '0';
+        next_cycle <= T0;
+    end if;
+end  procedure;
+
+procedure a3_abs is
+begin
+    if exec_cycle = T1 then
+        fetch_low;
+    elsif exec_cycle = T2 then
+        abs_fetch_high;
+    elsif exec_cycle = T3 then
+        abs_latch_out;
+        dbuf_int_oe_n <= '1';
+        r_nw <= '0';
+        next_cycle <= T0;
+    end if;
+end  procedure;
+
+procedure a3_abs_xy (is_x : in boolean) is
+begin
+    if exec_cycle = T1 then
+        fetch_low;
+    elsif exec_cycle = T2 then
+        abs_fetch_high;
+    elsif exec_cycle = T3 then
+        --ea calc & lda
+        pg_next_n <= '1';
+        abs_latch_out;
+        dbuf_int_oe_n <= '1';
+        if (is_x = true) then
+            ea_x_out;
+        else
+            ea_y_out;
+        end if;
+        next_cycle <= T4;
+    elsif exec_cycle = T4 then
+        pg_next_n <= not ea_carry;
+        abs_latch_out;
+        if (is_x = true) then
+            ea_x_out;
+        else
+            ea_y_out;
+        end if;
+        r_nw <= '0';
+        next_cycle <= T0;
+    end if;
+end  procedure;
+
+
+procedure a3_indir_y is
+begin
+    if exec_cycle = T1 then
+        fetch_low;
+        --get IAL
+        dl_al_we_n <= '0';
+
+    elsif exec_cycle = T2 then
+        fetch_stop;
+        dl_al_we_n <= '1';
+
+        ---address is 00:IAL
+        --output BAL @IAL
+        indir_y_n <= '0';
+        dl_al_oe_n <= '0';
+        dbuf_int_oe_n <= '0';
+        next_cycle <= T3;
+
+    elsif exec_cycle = T3 then
+        indir_y_n <= '0';
+        dl_al_oe_n <= '0';
+        --output BAH @IAL+1
+        dbuf_int_oe_n <= '0';
+        next_cycle <= T4;
+
+    elsif exec_cycle = T4 then
+        dl_al_oe_n <= '1';
+        dbuf_int_oe_n <= '1';
+
+        --add index y.
+        pg_next_n <= '1';
+        back_oe(y_cmd, '0');
+        indir_y_n <= '0';
+        next_cycle <= T5;
+
+    elsif exec_cycle = T5 then
+        --page handling.
+        back_oe(y_cmd, '1');
+        indir_y_n <= '0';
+        pg_next_n <= not ea_carry;
+        r_nw <= '0';
+        next_cycle <= T0;
+    end if;
+end  procedure;
+
+
+---A.4. read-modify-write operation
+
+procedure a4_zp is
+begin
+    if exec_cycle = T1 then
+        fetch_low;
+    elsif exec_cycle = T2 then
+        fetch_stop;
+        dbuf_int_oe_n <= '1';
+        dl_al_we_n <= '1';
+
+        --t2 cycle only read
+        dl_al_oe_n <= '0';
+        zp_n <= '0';
+        next_cycle <= T3;
+    elsif exec_cycle = T3 then
+        dl_al_oe_n <= '0';
+        zp_n <= '0';
+        --keep data in the alu reg.
+        arith_en_n <= '0';
+        dbuf_int_oe_n <= '0';
+        next_cycle <= T4;
+    elsif exec_cycle = T4 then
+        dbuf_int_oe_n <= '1';
+
+        --t5 cycle writes modified value.
+        dl_al_oe_n <= '0';
+        zp_n <= '0';
+        r_nw <= '0';
+        arith_en_n <= '0';
+        next_cycle <= T0;
+    end if;
+end  procedure;
+
+procedure a4_zp_x is
+begin
+    if exec_cycle = T1 then
+        fetch_low;
+    elsif exec_cycle = T2 then
+        fetch_stop;
+        dbuf_int_oe_n <= '1';
+        dl_al_we_n <= '1';
+
+        --t2 cycle read bal only.
+        dl_al_oe_n <= '0';
+        zp_n <= '0';
+        next_cycle <= T3;
+    elsif exec_cycle = T3 then
+        --t3 cycle read bal + x
+        dl_al_oe_n <= '0';
+        zp_n <= '0';
+        zp_xy_n <= '0';
+        back_oe(x_cmd, '0');
+        next_cycle <= T4;
+    elsif exec_cycle = T4 then
+        dl_al_oe_n <= '0';
+        zp_n <= '0';
+        zp_xy_n <= '0';
+        back_oe(x_cmd, '0');
+
+        --keep data in the alu reg.
+        arith_en_n <= '0';
+        dbuf_int_oe_n <= '0';
+        next_cycle <= T5;
+    elsif exec_cycle = T5 then
+        dbuf_int_oe_n <= '1';
+
+        --t5 cycle writes modified value.
+        dl_al_oe_n <= '0';
+        zp_n <= '0';
+        zp_xy_n <= '0';
+        back_oe(x_cmd, '0');
+        r_nw <= '0';
+        arith_en_n <= '0';
+        next_cycle <= T0;
+    end if;
+end  procedure;
+
+
+procedure a4_abs is
+begin
+    if exec_cycle = T1 then
+        fetch_low;
+    elsif exec_cycle = T2 then
+        abs_fetch_high;
+    elsif exec_cycle = T3 then
+        --T3 cycle do nothing.
+        abs_latch_out;
+        next_cycle <= T4;
+    elsif exec_cycle = T4 then
+        abs_latch_out;
+
+        --t4 cycle save data in the alu register only.
+        --hardware maunual says write original data, 
+        --but this implementation doesn't write because bus shortage....
+        arith_en_n <= '0';
+        next_cycle <= T5;
+    elsif exec_cycle = T5 then
+        dbuf_int_oe_n <= '1';
+
+        --t5 cycle writes modified value.
+        r_nw <= '0';
+        arith_en_n <= '0';
+        next_cycle <= T0;
+    end if;
+end  procedure;
+
+procedure a4_abs_x is
+begin
+    if exec_cycle = T1 then
+        fetch_low;
+    elsif exec_cycle = T2 then
+        abs_fetch_high;
+    elsif exec_cycle = T3 then
+        --T3 cycle discarded.
+        pg_next_n <= '1';
+        abs_latch_out;
+        ea_x_out;
+        dbuf_int_oe_n <= '0';
+        next_cycle <= T4;
+
+    elsif exec_cycle = T4 then
+        --t4 cycle fetch only.
+        abs_latch_out;
+        ea_x_out;
+        pg_next_n <= not ea_carry;
+        next_cycle <= T5;
+
+    elsif exec_cycle = T5 then
+        --t4 cycle redo fetch and save data in the alu register only.
+        arith_en_n <= '0';
+        next_cycle <= T6;
+
+    elsif exec_cycle = T6 then
+        --t5 cycle writes modified value.
+        r_nw <= '0';
+        arith_en_n <= '0';
+        dbuf_int_oe_n <= '1';
+        next_cycle <= T0;
+
+    end if;
+end  procedure;
+
+
+-- A.5.1 push stack
+procedure a51_push is
+begin
+    if exec_cycle = T1 then
+        fetch_stop;
+        next_cycle <= T2;
+    elsif exec_cycle = T2 then
+        back_oe(sp_cmd, '0');
+        back_we(sp_cmd, '0');
+        sp_push_n <= '0';
+        sp_oe_n <= '0';
+        r_nw <= '0';
+        next_cycle <= T0;
+    end if;
+end procedure;
+
+-- A.5.2 pull stack
+procedure a52_pull is
+begin
+    if exec_cycle = T1 then
+        fetch_stop;
+        next_cycle <= T2;
+
+    elsif exec_cycle = T2 then
+        --stack decrement first.
+        back_oe(sp_cmd, '0');
+        back_we(sp_cmd, '0');
+        sp_pop_n <= '0';
+        sp_oe_n <= '0';
+        next_cycle <= T3;
+
+    elsif exec_cycle = T3 then
+        sp_pop_n <= '1';
+        back_we(sp_cmd, '1');
+
+        ---pop data from stack.
+        back_oe(sp_cmd, '0');
+        sp_oe_n <= '0';
+        dbuf_int_oe_n <= '0';
+        next_cycle <= T0;
+    end if;
+end procedure;
+
+
+-- A.5.8 branch operations
+
+procedure a58_branch (int_flg : in integer; br_cond : in std_logic) is
+begin
+    if exec_cycle = T1 then
+        fetch_next;
+        if status_reg(int_flg) = br_cond then
+            d_print("get rel");
+
+            --latch rel value.
+            dbuf_int_oe_n <= '0';
+            dl_ah_we_n <= '0';
+            next_cycle <= T2;
+        else
+            d_print("no branch");
+            next_cycle <= T0;
+        end if;
+    elsif exec_cycle = T2 then
+        d_print("rel ea");
+        fetch_stop;
+        dbuf_int_oe_n <= '1';
+        dl_ah_we_n <= '1';
+
+        --calc relative addr.
+        rel_calc_n <= '0';
+        pg_next_n <= '1';
+        dl_dh_oe_n <= '0';
+        back_oe(pcl_cmd, '0');
+        back_oe(pch_cmd, '0');
+        back_we(pcl_cmd, '0');
+
+        wait_a58_branch_next <= '1';
+        next_cycle <= a58_branch_next_cycle;
+    elsif exec_cycle = T3 then
+        d_print("page crossed.");
+        --page crossed. adh calc.
+        back_we(pcl_cmd, '1');
+        back_oe(pcl_cmd, '0');
+        back_oe(pch_cmd, '0');
+        back_we(pch_cmd, '0');
+        dl_dh_oe_n <= '0';
+
+        rel_calc_n <= '0';
+        pg_next_n <= '0';
+        next_cycle <= T0;
+    end if;
+end  procedure;
+
+-------------------------------------------------------------
+-------------------------------------------------------------
+---------------- main state machine start.... ---------------
+-------------------------------------------------------------
+-------------------------------------------------------------
+    begin
+
+        if (res_n = '0') then
+            --pc l/h is reset vector.
+            pcl_cmd <= "1110";
+            pch_cmd <= "1110";
+            next_cycle <= R0;
+        elsif (res_n'event and res_n = '1') then
+            pcl_cmd <= "1111";
+            pch_cmd <= "1111";
+        end if;
+
+        if (nmi_n'event and nmi_n = '1') then
+            --reset nmi handle status
+            nmi_handled_n <= '1';
+        end if;
+
+
+        if (a2_abs_xy_next_cycle'event) then
+            if (wait_a2_abs_xy_next = '1') then
+                d_print("absx step 2");
+                next_cycle <= a2_abs_xy_next_cycle;
+                if (ea_carry = '1') then
+                    a2_page_next;
+                end if;
+            end if;
+        end if;
+
+        if (a2_indir_y_next_cycle'event) then
+            if (wait_a2_indir_y_next = '1') then
+                d_print("indir step 2");
+                next_cycle <= a2_indir_y_next_cycle;
+                if (ea_carry = '1') then
+                    a2_page_next;
+                end if;
+            end if;
+        end if;
+
+        if (a58_branch_next_cycle'event) then
+            if (wait_a58_branch_next = '1') then
+                d_print("branch step 2");
+                next_cycle <= a58_branch_next_cycle;
+                if (ea_carry = '1') then
+                    a2_page_next;
+                end if;
+            end if;
+        end if;
+
+        if (set_clk'event and set_clk = '1' and res_n = '1') then
+            d_print(string'("-"));
+
+            if exec_cycle = T0 then
+                --cycle #1
+                t0_cycle;
+
+            elsif exec_cycle = T1 or exec_cycle = T2 or exec_cycle = T3 or 
+                exec_cycle = T4 or exec_cycle = T5 or exec_cycle = T6 then
+                --execute inst.
+
+                ---asyncronous page change might happen.
+                back_we(pch_cmd, '1');
+
+                if exec_cycle = T1 then
+                    d_print("decode and execute inst: " 
+                            & conv_hex8(conv_integer(instruction)));
+                    --disable pin for jmp instruction 
+                    dl_al_oe_n <= '1';
+                    back_we(pcl_cmd, '1');
+                    front_we(pch_cmd, '1');
+
+                    --grab instruction register data.
+                    inst_we_n <= '1';
+                end if;
+
+                --imelementation is wriiten in the order of hardware manual
+                --appendix A.
+
+                ----------------------------------------
+                --A.1. Single byte instruction.
+                ----------------------------------------
+                if instruction = conv_std_logic_vector(16#0a#, dsize) then
+                    --asl acc mode.
+                    d_print("asl");
+                    arith_en_n <= '0';
+                    back_oe(acc_cmd, '0');
+                    front_we(acc_cmd, '0');
+                    set_nzc_from_alu;
+                    single_inst;
+
+                elsif instruction = conv_std_logic_vector(16#18#, dsize) then
+                    d_print("clc");
+                    set_flag0 ('0');
+                    single_inst;
+
+                elsif instruction = conv_std_logic_vector(16#d8#, dsize) then
+                    d_print("cld");
+                    set_flag (st_D, '0');
+                    single_inst;
+
+                elsif instruction = conv_std_logic_vector(16#58#, dsize) then
+                    d_print("cli");
+                    set_flag (st_I, '0');
+                    single_inst;
+
+                elsif instruction = conv_std_logic_vector(16#b8#, dsize) then
+                    d_print("clv");
+                    set_flag (st_V, '0');
+                    single_inst;
+
+                elsif instruction = conv_std_logic_vector(16#ca#, dsize) then
+                    d_print("dex");
+                    arith_en_n <= '0';
+                    back_oe(x_cmd, '0');
+                    front_we(x_cmd, '0');
+                    --set nz bit.
+                    set_nz_from_bus;
+                    single_inst;
+
+                elsif instruction = conv_std_logic_vector(16#88#, dsize) then
+                    d_print("dey");
+                    arith_en_n <= '0';
+                    back_oe(y_cmd, '0');
+                    front_we(y_cmd, '0');
+                    --set nz bit.
+                    set_nz_from_bus;
+                    single_inst;
+
+                elsif instruction = conv_std_logic_vector(16#e8#, dsize) then
+                    d_print("inx");
+                    arith_en_n <= '0';
+                    back_oe(x_cmd, '0');
+                    front_we(x_cmd, '0');
+                    --set nz bit.
+                    set_nz_from_bus;
+                    single_inst;
+
+                elsif instruction = conv_std_logic_vector(16#c8#, dsize) then
+                    d_print("iny");
+                    arith_en_n <= '0';
+                    back_oe(y_cmd, '0');
+                    front_we(y_cmd, '0');
+                    set_nz_from_bus;
+                    single_inst;
+
+                elsif instruction = conv_std_logic_vector(16#4a#, dsize) then
+                    --lsr acc mode
+                    d_print("lsr");
+                    arith_en_n <= '0';
+                    back_oe(acc_cmd, '0');
+                    front_we(acc_cmd, '0');
+                    set_zc_from_alu;
+                    single_inst;
+
+                elsif instruction = conv_std_logic_vector(16#ea#, dsize) then
+                    d_print("nop");
+                    single_inst;
+
+                elsif instruction = conv_std_logic_vector(16#2a#, dsize) then
+                    --rol acc
+                    d_print("rol");
+                    arith_en_n <= '0';
+                    back_oe(acc_cmd, '0');
+                    front_we(acc_cmd, '0');
+                    set_nzc_from_alu;
+                    single_inst;
+
+                elsif instruction = conv_std_logic_vector(16#6a#, dsize) then
+                    --ror acc
+                    d_print("ror");
+                    arith_en_n <= '0';
+                    back_oe(acc_cmd, '0');
+                    front_we(acc_cmd, '0');
+                    set_nzc_from_alu;
+                    single_inst;
+
+                elsif instruction = conv_std_logic_vector(16#38#, dsize) then
+                    d_print("sec");
+                    set_flag0 ('1');
+                    single_inst;
+
+                elsif instruction = conv_std_logic_vector(16#f8#, dsize) then
+                    d_print("sed");
+                    set_flag (st_D, '1');
+                    single_inst;
+
+                elsif instruction = conv_std_logic_vector(16#78#, dsize) then
+                    d_print("sei");
+                    set_flag (st_I, '1');
+                    single_inst;
+
+                elsif instruction = conv_std_logic_vector(16#aa#, dsize) then
+                    d_print("tax");
+                    set_nz_from_bus;
+                    single_inst;
+                    front_oe(acc_cmd, '0');
+                    front_we(x_cmd, '0');
+
+                elsif instruction = conv_std_logic_vector(16#a8#, dsize) then
+                    d_print("tay");
+                    set_nz_from_bus;
+                    single_inst;
+                    front_oe(acc_cmd, '0');
+                    front_we(y_cmd, '0');
+
+                elsif instruction = conv_std_logic_vector(16#ba#, dsize) then
+                    d_print("tsx");
+                    set_nz_from_bus;
+
+                elsif instruction = conv_std_logic_vector(16#8a#, dsize) then
+                    d_print("txa");
+                    set_nz_from_bus;
+                    single_inst;
+                    front_oe(x_cmd, '0');
+                    front_we(acc_cmd, '0');
+
+                elsif instruction = conv_std_logic_vector(16#9a#, dsize) then
+                    d_print("txs");
+                    set_nz_from_bus;
+                    single_inst;
+                    front_oe(x_cmd, '0');
+                    front_we(sp_cmd, '0');
+
+                elsif instruction = conv_std_logic_vector(16#98#, dsize) then
+                    d_print("tya");
+                    set_nz_from_bus;
+                    single_inst;
+                    front_oe(y_cmd, '0');
+                    front_we(acc_cmd, '0');
+
+
+
+                ----------------------------------------
+                --A.2. internal execution on memory data
+                ----------------------------------------
+                elsif instruction  = conv_std_logic_vector(16#69#, dsize) then
+                    --imm
+                    d_print("adc");
+                    fetch_imm;
+                    arith_en_n <= '0';
+                    back_oe(acc_cmd, '0');
+                    back_we(acc_cmd, '0');
+                    set_nvzc_from_alu;
+
+                elsif instruction  = conv_std_logic_vector(16#65#, dsize) then
+                    --zp
+                    d_print("adc");
+                    a2_zp;
+                    if exec_cycle = T2 then
+                        arith_en_n <= '0';
+                        back_oe(acc_cmd, '0');
+                        back_we(acc_cmd, '0');
+                        set_nvzc_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#75#, dsize) then
+                    --zp, x
+                    d_print("adc");
+                    a2_zp_xy(true);
+                    if exec_cycle = T3 then
+                        arith_en_n <= '0';
+                        back_oe(acc_cmd, '0');
+                        back_we(acc_cmd, '0');
+                        set_nvzc_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#6d#, dsize) then
+                    --abs
+                    d_print("adc");
+                    a2_abs;
+                    if exec_cycle = T3 then
+                        arith_en_n <= '0';
+                        back_oe(acc_cmd, '0');
+                        back_we(acc_cmd, '0');
+                        set_nvzc_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#7d#, dsize) then
+                    --abs, x
+                    d_print("adc");
+                    a2_abs_xy(true);
+                    if exec_cycle = T3 or exec_cycle = T4 then
+                        arith_en_n <= '0';
+                        back_oe(acc_cmd, '0');
+                        back_we(acc_cmd, '0');
+                        set_nvzc_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#79#, dsize) then
+                    --abs, y
+                    d_print("adc");
+                    a2_abs_xy(false);
+                    if exec_cycle = T3 or exec_cycle = T4 then
+                        arith_en_n <= '0';
+                        back_oe(acc_cmd, '0');
+                        back_we(acc_cmd, '0');
+                        set_nvzc_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#61#, dsize) then
+                    --(indir, x)
+                    d_print("adc");
+
+                elsif instruction  = conv_std_logic_vector(16#71#, dsize) then
+                    --(indir), y
+                    d_print("adc");
+                    a2_indir_y;
+                    if exec_cycle = T4 or exec_cycle = T5 then
+                        arith_en_n <= '0';
+                        back_oe(acc_cmd, '0');
+                        back_we(acc_cmd, '0');
+                        set_nvzc_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#29#, dsize) then
+                    --imm
+                    d_print("and");
+                    fetch_imm;
+                    arith_en_n <= '0';
+                    back_oe(acc_cmd, '0');
+                    back_we(acc_cmd, '0');
+                    set_nz_from_alu;
+
+                elsif instruction  = conv_std_logic_vector(16#25#, dsize) then
+                    --zp
+                    d_print("and");
+                    a2_zp;
+                    if exec_cycle = T2 then
+                        arith_en_n <= '0';
+                        back_oe(acc_cmd, '0');
+                        back_we(acc_cmd, '0');
+                        set_nz_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#35#, dsize) then
+                    --zp, x
+                    d_print("and");
+                    a2_zp_xy(true);
+                    if exec_cycle = T3 then
+                        arith_en_n <= '0';
+                        back_oe(acc_cmd, '0');
+                        back_we(acc_cmd, '0');
+                        set_nz_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#2d#, dsize) then
+                    --abs
+                    d_print("and");
+                    a2_abs;
+                    if exec_cycle = T3 then
+                        arith_en_n <= '0';
+                        back_oe(acc_cmd, '0');
+                        back_we(acc_cmd, '0');
+                        set_nz_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#3d#, dsize) then
+                    --abs, x
+                    d_print("and");
+                    a2_abs_xy(true);
+                    if exec_cycle = T3 or exec_cycle = T4 then
+                        arith_en_n <= '0';
+                        back_oe(acc_cmd, '0');
+                        back_we(acc_cmd, '0');
+                        set_nz_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#39#, dsize) then
+                    --abs, y
+                    d_print("and");
+                    a2_abs_xy(false);
+                    if exec_cycle = T3 or exec_cycle = T4 then
+                        arith_en_n <= '0';
+                        back_oe(acc_cmd, '0');
+                        back_we(acc_cmd, '0');
+                        set_nz_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#21#, dsize) then
+                    --(indir, x)
+                    d_print("and");
+
+                elsif instruction  = conv_std_logic_vector(16#31#, dsize) then
+                    --(indir), y
+                    d_print("and");
+                    a2_indir_y;
+                    if exec_cycle = T4 or exec_cycle = T5 then
+                        arith_en_n <= '0';
+                        back_oe(acc_cmd, '0');
+                        back_we(acc_cmd, '0');
+                        set_nz_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#24#, dsize) then
+                    --zp
+                    d_print("bit");
+                    a2_zp;
+                    if exec_cycle = T2 then
+                        arith_en_n <= '0';
+                        back_oe(acc_cmd, '0');
+                        set_nvz_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#2c#, dsize) then
+                    --abs
+                    d_print("bit");
+                    a2_abs;
+                    if exec_cycle = T3 then
+                        arith_en_n <= '0';
+                        back_oe(acc_cmd, '0');
+                        set_nvz_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#c9#, dsize) then
+                    --imm
+                    d_print("cmp");
+                    fetch_imm;
+                    arith_en_n <= '0';
+                    back_oe(acc_cmd, '0');
+                    set_nzc_from_alu;
+
+                elsif instruction  = conv_std_logic_vector(16#c5#, dsize) then
+                    --zp
+                    d_print("cmp");
+                    a2_zp;
+                    if exec_cycle = T2 then
+                        arith_en_n <= '0';
+                        back_oe(acc_cmd, '0');
+                        set_nzc_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#d5#, dsize) then
+                    --zp, x
+                    d_print("cmp");
+                    a2_zp_xy(true);
+                    if exec_cycle = T3 then
+                        arith_en_n <= '0';
+                        back_oe(acc_cmd, '0');
+                        set_nzc_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#cd#, dsize) then
+                    --abs
+                    d_print("cmp");
+                    a2_abs;
+                    if exec_cycle = T3 then
+                        arith_en_n <= '0';
+                        back_oe(acc_cmd, '0');
+                        set_nzc_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#dd#, dsize) then
+                    --abs, x
+                    d_print("cmp");
+                    a2_abs_xy(true);
+                    if exec_cycle = T3 or exec_cycle = T4 then
+                        arith_en_n <= '0';
+                        back_oe(acc_cmd, '0');
+                        set_nzc_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#d9#, dsize) then
+                    --abs, y
+                    d_print("cmp");
+                    a2_abs_xy(false);
+                    if exec_cycle = T3 or exec_cycle = T4 then
+                        arith_en_n <= '0';
+                        back_oe(acc_cmd, '0');
+                        set_nzc_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#c1#, dsize) then
+                    --(indir, x)
+                    d_print("cmp");
+
+                elsif instruction  = conv_std_logic_vector(16#d1#, dsize) then
+                    --(indir), y
+                    d_print("cmp");
+                    a2_indir_y;
+                    if exec_cycle = T4 or exec_cycle = T5 then
+                        arith_en_n <= '0';
+                        back_oe(acc_cmd, '0');
+                        set_nzc_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#e0#, dsize) then
+                    --imm
+                    d_print("cpx");
+                    fetch_imm;
+                    arith_en_n <= '0';
+                    back_oe(x_cmd, '0');
+                    set_nzc_from_alu;
+
+                elsif instruction  = conv_std_logic_vector(16#e4#, dsize) then
+                    --zp
+                    d_print("cpx");
+                    a2_zp;
+                    if exec_cycle = T2 then
+                        arith_en_n <= '0';
+                        back_oe(x_cmd, '0');
+                        set_nzc_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#ec#, dsize) then
+                    --abs
+                    d_print("cpx");
+                    a2_abs;
+                    if exec_cycle = T3 then
+                        arith_en_n <= '0';
+                        back_oe(x_cmd, '0');
+                        set_nzc_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#c0#, dsize) then
+                    --imm
+                    d_print("cpy");
+                    fetch_imm;
+                    arith_en_n <= '0';
+                    back_oe(y_cmd, '0');
+                    set_nzc_from_alu;
+
+                elsif instruction  = conv_std_logic_vector(16#c4#, dsize) then
+                    --zp
+                    d_print("cpy");
+                    a2_zp;
+                    if exec_cycle = T2 then
+                        arith_en_n <= '0';
+                        back_oe(y_cmd, '0');
+                        set_nzc_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#cc#, dsize) then
+                    --abs
+                    d_print("cpy");
+                    a2_abs;
+                    if exec_cycle = T3 then
+                        arith_en_n <= '0';
+                        back_oe(y_cmd, '0');
+                        set_nzc_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#49#, dsize) then
+                    --imm
+                    d_print("eor");
+                    fetch_imm;
+                    arith_en_n <= '0';
+                    back_oe(acc_cmd, '0');
+                    back_we(acc_cmd, '0');
+                    set_nz_from_alu;
+
+                elsif instruction  = conv_std_logic_vector(16#45#, dsize) then
+                    --zp
+                    d_print("eor");
+                    a2_zp;
+                    if exec_cycle = T2 then
+                        arith_en_n <= '0';
+                        back_oe(acc_cmd, '0');
+                        back_we(acc_cmd, '0');
+                        set_nz_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#55#, dsize) then
+                    --zp, x
+                    d_print("eor");
+                    a2_zp_xy(true);
+                    if exec_cycle = T3 then
+                        arith_en_n <= '0';
+                        back_oe(acc_cmd, '0');
+                        back_we(acc_cmd, '0');
+                        set_nz_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#4d#, dsize) then
+                    --abs
+                    d_print("eor");
+                    a2_abs;
+                    if exec_cycle = T3 then
+                        arith_en_n <= '0';
+                        back_oe(acc_cmd, '0');
+                        back_we(acc_cmd, '0');
+                        set_nz_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#5d#, dsize) then
+                    --abs, x
+                    d_print("eor");
+                    a2_abs_xy(true);
+                    if exec_cycle = T3 or exec_cycle = T4 then
+                        arith_en_n <= '0';
+                        back_oe(acc_cmd, '0');
+                        back_we(acc_cmd, '0');
+                        set_nz_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#59#, dsize) then
+                    --abs, y
+                    d_print("eor");
+                    a2_abs_xy(false);
+                    if exec_cycle = T3 or exec_cycle = T4 then
+                        arith_en_n <= '0';
+                        back_oe(acc_cmd, '0');
+                        back_we(acc_cmd, '0');
+                        set_nz_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#41#, dsize) then
+                    --(indir, x)
+                    d_print("eor");
+
+                elsif instruction  = conv_std_logic_vector(16#51#, dsize) then
+                    --(indir), y
+                    d_print("eor");
+                    a2_indir_y;
+                    if exec_cycle = T4 or exec_cycle = T5 then
+                        arith_en_n <= '0';
+                        back_oe(acc_cmd, '0');
+                        back_we(acc_cmd, '0');
+                        set_nz_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#a9#, dsize) then
+                    --imm
+                    d_print("lda");
+                    fetch_imm;
+                    front_we(acc_cmd, '0');
+                    set_nz_from_bus;
+
+                elsif instruction  = conv_std_logic_vector(16#a5#, dsize) then
+                    --zp
+                    d_print("lda");
+                    a2_zp;
+                    if exec_cycle = T2 then
+                        front_we(acc_cmd, '0');
+                        set_nz_from_bus;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#b5#, dsize) then
+                    --zp, x
+                    d_print("lda");
+                    a2_zp_xy(true);
+                    if exec_cycle = T3 then
+                        front_we(acc_cmd, '0');
+                        set_nz_from_bus;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#ad#, dsize) then
+                    --abs
+                    d_print("lda");
+                    a2_abs;
+                    if exec_cycle = T3 then
+                        set_nz_from_bus;
+                        front_we(acc_cmd, '0');
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#bd#, dsize) then
+                    --abs, x
+                    d_print("lda");
+                    a2_abs_xy(true);
+                    if exec_cycle = T3 or exec_cycle = T4 then
+                        --lda.
+                        front_we(acc_cmd, '0');
+                        set_nz_from_bus;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#b9#, dsize) then
+                    --abs, y
+                    d_print("lda");
+                    a2_abs_xy(false);
+                    if exec_cycle = T3 or exec_cycle = T4 then
+                        --lda.
+                        front_we(acc_cmd, '0');
+                        set_nz_from_bus;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#a1#, dsize) then
+                    --(indir, x)
+                    d_print("lda");
+
+                elsif instruction  = conv_std_logic_vector(16#b1#, dsize) then
+                    --(indir), y
+                    d_print("lda");
+                    a2_indir_y;
+                    if exec_cycle = T4 or exec_cycle = T5 then
+                        --lda.
+                        front_we(acc_cmd, '0');
+                        set_nz_from_bus;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#a2#, dsize) then
+                    --imm
+                    d_print("ldx");
+                    fetch_imm;
+                    set_nz_from_bus;
+                    front_we(x_cmd, '0');
+
+                elsif instruction  = conv_std_logic_vector(16#a6#, dsize) then
+                    --zp
+                    d_print("ldx");
+                    a2_zp;
+                    if exec_cycle = T2 then
+                        front_we(x_cmd, '0');
+                        set_nz_from_bus;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#b6#, dsize) then
+                    --zp, y
+                    d_print("ldx");
+                    a2_zp_xy(false);
+                    if exec_cycle = T3 then
+                        front_we(x_cmd, '0');
+                        set_nz_from_bus;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#ae#, dsize) then
+                    --abs
+                    d_print("ldx");
+                    a2_abs;
+                    if exec_cycle = T3 then
+                        set_nz_from_bus;
+                        front_we(x_cmd, '0');
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#be#, dsize) then
+                    --abs, y
+                    d_print("ldx");
+                    a2_abs_xy(false);
+                    if exec_cycle = T3 or exec_cycle = T4 then
+                        front_we(x_cmd, '0');
+                        set_nz_from_bus;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#a0#, dsize) then
+                    --imm
+                    d_print("ldy");
+                    fetch_imm;
+                    set_nz_from_bus;
+                    front_we(y_cmd, '0');
+
+                elsif instruction  = conv_std_logic_vector(16#a4#, dsize) then
+                    --zp
+                    d_print("ldy");
+                    a2_zp;
+                    if exec_cycle = T2 then
+                        front_we(y_cmd, '0');
+                        set_nz_from_bus;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#b4#, dsize) then
+                    --zp, x
+                    d_print("ldy");
+                    a2_zp_xy(true);
+                    if exec_cycle = T3 then
+                        front_we(y_cmd, '0');
+                        set_nz_from_bus;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#ac#, dsize) then
+                    --abs
+                    d_print("ldy");
+                    a2_abs;
+                    if exec_cycle = T3 then
+                        set_nz_from_bus;
+                        front_we(y_cmd, '0');
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#bc#, dsize) then
+                    --abs, x
+                    d_print("ldy");
+                    a2_abs_xy(true);
+                    if exec_cycle = T3 or exec_cycle = T4 then
+                        set_nz_from_bus;
+                        front_we(y_cmd, '0');
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#09#, dsize) then
+                    --imm
+                    d_print("ora");
+                    fetch_imm;
+                    arith_en_n <= '0';
+                    back_oe(acc_cmd, '0');
+                    back_we(acc_cmd, '0');
+                    set_nz_from_alu;
+
+                elsif instruction  = conv_std_logic_vector(16#05#, dsize) then
+                    --zp
+                    d_print("ora");
+                    a2_zp;
+                    if exec_cycle = T2 then
+                        arith_en_n <= '0';
+                        back_oe(acc_cmd, '0');
+                        back_we(acc_cmd, '0');
+                        set_nz_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#15#, dsize) then
+                    --zp, x
+                    d_print("ora");
+                    a2_zp_xy(true);
+                    if exec_cycle = T3 then
+                        arith_en_n <= '0';
+                        back_oe(acc_cmd, '0');
+                        back_we(acc_cmd, '0');
+                        set_nz_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#0d#, dsize) then
+                    --abs
+                    d_print("ora");
+                    a2_abs;
+                    if exec_cycle = T3 then
+                        arith_en_n <= '0';
+                        back_oe(acc_cmd, '0');
+                        back_we(acc_cmd, '0');
+                        set_nz_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#1d#, dsize) then
+                    --abs, x
+                    d_print("ora");
+                    a2_abs_xy(true);
+                    if exec_cycle = T3 or exec_cycle = T4 then
+                        arith_en_n <= '0';
+                        back_oe(acc_cmd, '0');
+                        back_we(acc_cmd, '0');
+                        set_nz_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#19#, dsize) then
+                    --abs, y
+                    d_print("ora");
+                    a2_abs_xy(false);
+                    if exec_cycle = T3 or exec_cycle = T4 then
+                        arith_en_n <= '0';
+                        back_oe(acc_cmd, '0');
+                        back_we(acc_cmd, '0');
+                        set_nz_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#01#, dsize) then
+                    --(indir, x)
+                    d_print("ora");
+
+                elsif instruction  = conv_std_logic_vector(16#11#, dsize) then
+                    --(indir), y
+                    d_print("ora");
+                    a2_indir_y;
+                    if exec_cycle = T4 or exec_cycle = T5 then
+                        arith_en_n <= '0';
+                        back_oe(acc_cmd, '0');
+                        back_we(acc_cmd, '0');
+                        set_nz_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#e9#, dsize) then
+                    --imm
+                    d_print("sbc");
+                    fetch_imm;
+                    arith_en_n <= '0';
+                    back_oe(acc_cmd, '0');
+                    back_we(acc_cmd, '0');
+                    set_nvzc_from_alu;
+
+                elsif instruction  = conv_std_logic_vector(16#e5#, dsize) then
+                    --zp
+                    d_print("sbc");
+                    a2_zp;
+                    if exec_cycle = T2 then
+                        arith_en_n <= '0';
+                        back_oe(acc_cmd, '0');
+                        back_we(acc_cmd, '0');
+                        set_nvzc_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#f5#, dsize) then
+                    --zp, x
+                    d_print("sbc");
+                    a2_zp_xy(true);
+                    if exec_cycle = T3 then
+                        arith_en_n <= '0';
+                        back_oe(acc_cmd, '0');
+                        back_we(acc_cmd, '0');
+                        set_nvzc_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#ed#, dsize) then
+                    --abs
+                    d_print("sbc");
+                    a2_abs;
+                    if exec_cycle = T3 then
+                        arith_en_n <= '0';
+                        back_oe(acc_cmd, '0');
+                        back_we(acc_cmd, '0');
+                        set_nvzc_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#fd#, dsize) then
+                    --abs, x
+                    d_print("sbc");
+                    a2_abs_xy(true);
+                    if exec_cycle = T3 or exec_cycle = T4 then
+                        arith_en_n <= '0';
+                        back_oe(acc_cmd, '0');
+                        back_we(acc_cmd, '0');
+                        set_nvzc_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#f9#, dsize) then
+                    --abs, y
+                    d_print("sbc");
+                    a2_abs_xy(false);
+                    if exec_cycle = T3 or exec_cycle = T4 then
+                        arith_en_n <= '0';
+                        back_oe(acc_cmd, '0');
+                        back_we(acc_cmd, '0');
+                        set_nvzc_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#e1#, dsize) then
+                    --(indir, x)
+                    d_print("sbc");
+
+                elsif instruction  = conv_std_logic_vector(16#f1#, dsize) then
+                    --(indir), y
+                    d_print("sbc");
+                    a2_indir_y;
+                    if exec_cycle = T4 or exec_cycle = T5 then
+                        arith_en_n <= '0';
+                        back_oe(acc_cmd, '0');
+                        back_we(acc_cmd, '0');
+                        set_nvzc_from_alu;
+                    end if;
+
+
+
+                ----------------------------------------
+                ---A.3. store operation.
+                ----------------------------------------
+                elsif instruction  = conv_std_logic_vector(16#85#, dsize) then
+                    --zp
+                    d_print("sta");
+                    a3_zp;
+                    if exec_cycle = T2 then
+                        front_oe(acc_cmd, '0');
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#95#, dsize) then
+                    --zp, x
+                    d_print("sta");
+                    a3_zp_xy(true);
+                    if exec_cycle = T2 then
+                        front_oe(acc_cmd, '0');
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#8d#, dsize) then
+                    --abs
+                    d_print("sta");
+                    a3_abs;
+                    if exec_cycle = T3 then
+                        front_oe(acc_cmd, '0');
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#9d#, dsize) then
+                    --abs, x
+                    d_print("sta");
+                    a3_abs_xy (true);
+                    if exec_cycle = T4 then
+                        front_oe(acc_cmd, '0');
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#99#, dsize) then
+                    --abs, y
+                    d_print("sta");
+                    a3_abs_xy (false);
+                    if exec_cycle = T4 then
+                        front_oe(acc_cmd, '0');
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#81#, dsize) then
+                    --(indir, x)
+                    d_print("sta");
+
+                elsif instruction  = conv_std_logic_vector(16#91#, dsize) then
+                    --(indir), y
+                    d_print("sta");
+                    a3_indir_y;
+                    if exec_cycle = T5 then
+                        front_oe(acc_cmd, '0');
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#86#, dsize) then
+                    --zp
+                    d_print("stx");
+                    a3_zp;
+                    if exec_cycle = T2 then
+                        front_oe(x_cmd, '0');
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#96#, dsize) then
+                    --zp, y
+                    d_print("stx");
+                    a3_zp_xy(false);
+                    if exec_cycle = T2 then
+                        front_oe(x_cmd, '0');
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#8e#, dsize) then
+                    --abs
+                    d_print("stx");
+                    a3_abs;
+                    if exec_cycle = T3 then
+                        front_oe(x_cmd, '0');
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#84#, dsize) then
+                    --zp
+                    d_print("sty");
+                    a3_zp;
+                    if exec_cycle = T2 then
+                        front_oe(y_cmd, '0');
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#94#, dsize) then
+                    --zp, x
+                    d_print("sty");
+                    a3_zp_xy(true);
+                    if exec_cycle = T2 then
+                        front_oe(y_cmd, '0');
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#8c#, dsize) then
+                    --abs
+                    d_print("sty");
+                    a3_abs;
+                    if exec_cycle = T3 then
+                        front_oe(y_cmd, '0');
+                    end if;
+
+
+                ----------------------------------------
+                ---A.4. read-modify-write operation
+                ----------------------------------------
+                elsif instruction  = conv_std_logic_vector(16#06#, dsize) then
+                    --zp
+                    d_print("asl");
+                    a4_zp;
+                    if exec_cycle = T4 then
+                        set_nzc_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#16#, dsize) then
+                    --zp, x
+                    d_print("asl");
+                    a4_zp_x;
+                    if exec_cycle = T5 then
+                        set_nzc_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#0e#, dsize) then
+                    --abs
+                    d_print("asl");
+                    a4_abs;
+                    if exec_cycle = T5 then
+                        set_nzc_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#1e#, dsize) then
+                    --abs, x
+                    d_print("asl");
+                    a4_abs_x;
+                    if exec_cycle = T6 then
+                        set_nzc_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#c6#, dsize) then
+                    --zp
+                    d_print("dec");
+                    a4_zp;
+                    if exec_cycle = T4 then
+                        set_nz_from_bus;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#d6#, dsize) then
+                    --zp, x
+                    d_print("dec");
+                    a4_zp_x;
+                    if exec_cycle = T5 then
+                        set_nz_from_bus;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#ce#, dsize) then
+                    --abs
+                    d_print("dec");
+                    a4_abs;
+                    if exec_cycle = T5 then
+                        set_nz_from_bus;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#de#, dsize) then
+                    --abs, x
+                    d_print("dec");
+                    a4_abs_x;
+                    if exec_cycle = T6 then
+                        set_nz_from_bus;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#e6#, dsize) then
+                    --zp
+                    d_print("inc");
+                    a4_zp;
+                    if exec_cycle = T4 then
+                        set_nz_from_bus;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#f6#, dsize) then
+                    --zp, x
+                    d_print("inc");
+                    a4_zp_x;
+                    if exec_cycle = T5 then
+                        set_nz_from_bus;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#ee#, dsize) then
+                    --abs
+                    d_print("inc");
+                    a4_abs;
+                    if exec_cycle = T5 then
+                        set_nz_from_bus;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#fe#, dsize) then
+                    --abs, x
+                    d_print("inc");
+                    a4_abs_x;
+                    if exec_cycle = T6 then
+                        set_nz_from_bus;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#46#, dsize) then
+                    --zp
+                    d_print("lsr");
+                    a4_zp;
+                    if exec_cycle = T4 then
+                        set_zc_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#56#, dsize) then
+                    --zp, x
+                    d_print("lsr");
+                    a4_zp_x;
+                    if exec_cycle = T5 then
+                        set_zc_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#4e#, dsize) then
+                    --abs
+                    d_print("lsr");
+                    a4_abs;
+                    if exec_cycle = T5 then
+                        set_zc_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#5e#, dsize) then
+                    --abs, x
+                    d_print("lsr");
+                    a4_abs_x;
+                    if exec_cycle = T6 then
+                        set_zc_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#26#, dsize) then
+                    --zp
+                    d_print("rol");
+                    a4_zp;
+                    if exec_cycle = T4 then
+                        set_nzc_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#36#, dsize) then
+                    --zp, x
+                    d_print("rol");
+                    a4_zp_x;
+                    if exec_cycle = T5 then
+                        set_nzc_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#2e#, dsize) then
+                    --abs
+                    d_print("rol");
+                    a4_abs;
+                    if exec_cycle = T5 then
+                        set_nzc_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#3e#, dsize) then
+                    --abs, x
+                    d_print("rol");
+                    a4_abs_x;
+                    if exec_cycle = T6 then
+                        set_nzc_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#66#, dsize) then
+                    --zp
+                    d_print("ror");
+                    a4_zp;
+                    if exec_cycle = T4 then
+                        set_nzc_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#76#, dsize) then
+                    --zp, x
+                    d_print("ror");
+                    a4_zp_x;
+                    if exec_cycle = T5 then
+                        set_nzc_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#6e#, dsize) then
+                    --abs
+                    d_print("ror");
+                    a4_abs;
+                    if exec_cycle = T5 then
+                        set_nzc_from_alu;
+                    end if;
+
+                elsif instruction  = conv_std_logic_vector(16#7e#, dsize) then
+                    --abs, x
+                    d_print("ror");
+                    a4_abs_x;
+                    if exec_cycle = T6 then
+                        set_nzc_from_alu;
+                    end if;
+
+
+                ----------------------------------------
+                --A.5. miscellaneous oprations.
+                ----------------------------------------
+
+                -- A.5.1 push/pull
+                elsif instruction = conv_std_logic_vector(16#08#, dsize) then
+                    d_print("php");
+                    a51_push;
+                    if exec_cycle = T2 then
+                        stat_bus_oe_n <= '0';
+                    end if;
+
+                elsif instruction = conv_std_logic_vector(16#48#, dsize) then
+                    d_print("pha");
+                    a51_push;
+                    if exec_cycle = T2 then
+                        front_oe(acc_cmd, '0');
+                    end if;
+
+                elsif instruction = conv_std_logic_vector(16#28#, dsize) then
+                    d_print("plp");
+                    a52_pull;
+                    if exec_cycle = T3 then
+                        stat_dec_oe_n <= '1';
+                        stat_bus_all_n <= '0';
+                    end if;
+
+                elsif instruction = conv_std_logic_vector(16#68#, dsize) then
+                    d_print("pla");
+                    a52_pull;
+                    if exec_cycle = T3 then
+                        front_we(acc_cmd, '0');
+                        set_nz_from_bus;
+                    end if;
+
+
+                ----------------------------------------
+                -- A.5.3 jsr
+                ----------------------------------------
+                elsif instruction = conv_std_logic_vector(16#20#, dsize) then
+                    if exec_cycle = T1 then
+                        d_print("jsr abs 2");
+                        --fetch opcode.
+                        fetch_next;
+                        dbuf_int_oe_n <= '0';
+                        --latch adl
+                        dl_al_we_n <= '0';
+                        next_cycle <= T2;
+                    elsif exec_cycle = T2 then
+                        d_print("jsr 3");
+                        fetch_stop;
+                        dbuf_int_oe_n <= '1';
+                        dl_al_we_n <= '1';
+
+                       --push return addr high into stack.
+                        sp_push_n <= '0';
+                        sp_oe_n <= '0';
+                        front_oe(pch_cmd, '0');
+                        back_oe(sp_cmd, '0');
+                        back_we(sp_cmd, '0');
+                        r_nw <= '0';
+                        next_cycle <= T3;
+                    elsif exec_cycle = T3 then
+                        d_print("jsr 4");
+                        front_oe(pch_cmd, '1');
+
+                       --push return addr low into stack.
+                        sp_push_n <= '0';
+                        sp_oe_n <= '0';
+                        front_oe(pcl_cmd, '0');
+                        back_oe(sp_cmd, '0');
+                        back_we(sp_cmd, '0');
+                        r_nw <= '0';
+
+                        next_cycle <= T4;
+                    elsif exec_cycle = T4 then
+                        d_print("jsr 5");
+                        sp_push_n <= '1';
+                        sp_oe_n <= '1';
+                        front_oe(pcl_cmd, '1');
+                        back_oe(sp_cmd, '1');
+                        back_we(sp_cmd, '1');
+                        r_nw <= '1';
+
+                        --fetch last op.
+                        back_oe(pch_cmd, '0');
+                        back_oe(pcl_cmd, '0');
+                        dbuf_int_oe_n <= '0';
+                        dl_ah_we_n <= '0';
+
+                        next_cycle <= T5;
+                    elsif exec_cycle = T5 then
+                        d_print("jsr 6");
+
+                        back_oe(pch_cmd, '1');
+                        back_oe(pcl_cmd, '1');
+                        dbuf_int_oe_n <= '1';
+                        dl_ah_we_n <= '1';
+
+                        --load/output  pch
+                        ad_oe_n <= '1';
+                        dl_dh_oe_n <= '0';
+                        front_we(pch_cmd, '0');
+
+                        --load pcl.
+                        dl_al_oe_n <= '0';
+                        back_we(pcl_cmd, '0');
+
+                        next_cycle <= T0;
+                    end if; --if exec_cycle = T1 then
+
+                -- A.5.4 break
+                elsif instruction = conv_std_logic_vector(16#00#, dsize) then
+
+                ----------------------------------------
+                -- A.5.5 return from interrupt
+                ----------------------------------------
+                elsif instruction = conv_std_logic_vector(16#40#, dsize) then
+                    if exec_cycle = T1 then
+                        d_print("rti 2");
+                        fetch_stop;
+
+                        --pop stack (decrement only)
+                        back_oe(sp_cmd, '0');
+                        back_we(sp_cmd, '0');
+                        sp_pop_n <= '0';
+                        sp_oe_n <= '0';
+
+                        next_cycle <= T2;
+                    elsif exec_cycle = T2 then
+                        d_print("rti 3");
+
+                        --pop p (status)
+                        back_oe(sp_cmd, '0');
+                        back_we(sp_cmd, '0');
+                        sp_pop_n <= '0';
+                        sp_oe_n <= '0';
+
+                        --load status reg
+                        stat_dec_oe_n <= '1';
+                        dbuf_int_oe_n <= '0';
+                        stat_bus_all_n <= '0';
+
+                        next_cycle <= T3;
+                    elsif exec_cycle = T3 then
+                        d_print("rti 4");
+                        stat_bus_all_n <= '1';
+
+                        --pop pcl
+                        back_oe(sp_cmd, '0');
+                        back_we(sp_cmd, '0');
+                        sp_pop_n <= '0';
+                        sp_oe_n <= '0';
+
+                        --load lo addr.
+                        dbuf_int_oe_n <= '0';
+                        front_we(pcl_cmd, '0');
+
+                        next_cycle <= T4;
+                    elsif exec_cycle = T4 then
+                        d_print("rti 5");
+                        --stack decrement stop.
+                        back_we(sp_cmd, '1');
+                        sp_pop_n <= '1';
+                        front_we(pcl_cmd, '1');
+
+                        --pop pch
+                        back_oe(sp_cmd, '0');
+                        sp_oe_n <= '0';
+                        --load hi addr.
+                        dbuf_int_oe_n <= '0';
+                        front_we(pch_cmd, '0');
+
+                        next_cycle <= T5;
+                    elsif exec_cycle = T5 then
+                        d_print("rti 6");
+                        back_oe(sp_cmd, '1');
+                        sp_oe_n <= '1';
+                        --load hi addr.
+                        dbuf_int_oe_n <= '1';
+                        front_we(pch_cmd, '1');
+
+                        --increment pc.
+                        next_cycle <= T0;
+                    end if; --if exec_cycle = T1 then
+
+                ----------------------------------------
+                -- A.5.6 jmp
+                ----------------------------------------
+                elsif instruction = conv_std_logic_vector(16#4c#, dsize) then
+                    --abs
+                    if exec_cycle = T1 then
+                        d_print("jmp 2");
+                        --fetch next opcode (abs low).
+                        fetch_next;
+
+                        --latch abs low data.
+                        dbuf_int_oe_n <= '0';
+                        dl_al_we_n <= '0';
+                        next_cycle <= T2;
+                    elsif exec_cycle = T2 then
+                        d_print("jmp 3");
+                        dl_al_we_n <= '1';
+
+                        --fetch abs hi
+                        fetch_next;
+
+                        --latch  in dlh
+                        dbuf_int_oe_n <= '0';
+                        dl_ah_we_n <= '0';
+                        ---load pch.
+                        front_we(pch_cmd, '0');
+
+                        next_cycle <= T0;
+                    end if;
+
+                elsif instruction = conv_std_logic_vector(16#6c#, dsize) then
+                    --jmp (indir)
+                    if exec_cycle = T1 then
+                        d_print("jmp 2");
+                        --fetch next opcode (abs low).
+                        fetch_next;
+
+                        --latch abs low data.
+                        dbuf_int_oe_n <= '0';
+                        dl_al_we_n <= '0';
+                        next_cycle <= T2;
+                    elsif exec_cycle = T2 then
+                        d_print("jmp 3");
+                        dl_al_we_n <= '1';
+
+                        --fetch abs hi
+                        fetch_next;
+
+                        --latch  in dlh
+                        dbuf_int_oe_n <= '0';
+                        dl_ah_we_n <= '0';
+                        next_cycle <= T3;
+
+                    elsif exec_cycle = T3 then
+                        fetch_stop;
+                        dl_ah_we_n <= '1';
+
+                        --IAH/IAL > ADL
+                        dl_ah_oe_n <= '0';
+                        dl_al_oe_n <= '0';
+                        front_we(pcl_cmd, '0');
+                        next_cycle <= T4;
+
+                    elsif exec_cycle = T4 then
+                        dl_ah_oe_n <= '0';
+                        dl_al_oe_n <= '0';
+                        front_we(pcl_cmd, '1');
+
+                        --IAH/IAL+1 > ADH
+                        front_we(pch_cmd, '0');
+                        indir_n <= '0';
+
+                        next_cycle <= T0;
+
+                    end if;
+
+
+                ----------------------------------------
+                -- A.5.7 return from soubroutine
+                ----------------------------------------
+                elsif instruction = conv_std_logic_vector(16#60#, dsize) then
+                    if exec_cycle = T1 then
+                        d_print("rts 2");
+                        fetch_stop;
+
+                        --pop stack (decrement only)
+                        back_oe(sp_cmd, '0');
+                        back_we(sp_cmd, '0');
+                        sp_pop_n <= '0';
+                        sp_oe_n <= '0';
+
+                        next_cycle <= T2;
+                    elsif exec_cycle = T2 then
+                        d_print("rts 3");
+
+                        --pop pcl
+                        back_oe(sp_cmd, '0');
+                        back_we(sp_cmd, '0');
+                        sp_pop_n <= '0';
+                        sp_oe_n <= '0';
+
+                        --load lo addr.
+                        dbuf_int_oe_n <= '0';
+                        front_we(pcl_cmd, '0');
+
+                        next_cycle <= T3;
+                    elsif exec_cycle = T3 then
+                        d_print("rts 4");
+                        --stack decrement stop.
+                        back_we(sp_cmd, '1');
+                        sp_pop_n <= '1';
+                        front_we(pcl_cmd, '1');
+
+                        --pop pch
+                        back_oe(sp_cmd, '0');
+                        sp_oe_n <= '0';
+                        --load hi addr.
+                        dbuf_int_oe_n <= '0';
+                        front_we(pch_cmd, '0');
+
+                        next_cycle <= T4;
+                    elsif exec_cycle = T4 then
+                        d_print("rts 5");
+                        back_oe(sp_cmd, '1');
+                        sp_oe_n <= '1';
+                        --load hi addr.
+                        dbuf_int_oe_n <= '1';
+                        front_we(pch_cmd, '1');
+                        --empty cycle.
+                        --complying h/w manual...
+                        next_cycle <= T5;
+                    elsif exec_cycle = T5 then
+                        d_print("rts 6");
+
+                        --increment pc.
+                        fetch_next;
+                        next_cycle <= T0;
+                    end if; --if exec_cycle = T1 then
+
+                ----------------------------------------
+                -- A.5.8 branch operations
+                ----------------------------------------
+                elsif instruction = conv_std_logic_vector(16#90#, dsize) then
+                    d_print("bcc");
+                    a58_branch (st_C, '0');
+
+                elsif instruction = conv_std_logic_vector(16#b0#, dsize) then
+                    d_print("bcs");
+                    a58_branch (st_C, '1');
+
+                elsif instruction = conv_std_logic_vector(16#f0#, dsize) then
+                    d_print("beq");
+                    a58_branch (st_Z, '1');
+
+                elsif instruction = conv_std_logic_vector(16#30#, dsize) then
+                    d_print("bmi");
+                    a58_branch (st_N, '1');
+
+                elsif instruction = conv_std_logic_vector(16#d0#, dsize) then
+                    d_print("bne");
+                    a58_branch (st_Z, '0');
+
+                elsif instruction = conv_std_logic_vector(16#10#, dsize) then
+                    d_print("bpl");
+                    a58_branch (st_N, '0');
+
+                elsif instruction = conv_std_logic_vector(16#50#, dsize) then
+                    d_print("bvc");
+                    a58_branch (st_V, '0');
+
+                elsif instruction = conv_std_logic_vector(16#70#, dsize) then
+                    d_print("bvs");
+                    a58_branch (st_V, '1');
+
+                else
+                    ---unknown instruction!!!!
+                    assert false 
+                        report "======== unknow instruction " 
+                            & conv_hex8(conv_integer(instruction)) 
+                        severity failure;
+                end if; --if instruction = conv_std_logic_vector(16#0a#, dsize) 
+
+            elsif exec_cycle = R0 then
+                d_print(string'("reset"));
+
+                front_we(pch_cmd, '1');
+                back_we(pcl_cmd, '1');
+
+                --initialize port...
+                inst_we_n <= '1';
+                ad_oe_n <= '1';
+                dbuf_int_oe_n <= '1';
+                dl_al_we_n <= '1';
+                dl_ah_we_n <= '1';
+                dl_al_oe_n <= '1';
+                dl_ah_oe_n <= '1';
+                dl_dh_oe_n <= '1';
+                pcl_inc_n <= '1';
+                pcl_cmd <= "1111";
+                pch_cmd <= "1111";
+                sp_cmd <= "1111";
+                sp_oe_n <= '1';
+                sp_push_n <= '1';
+                sp_pop_n <= '1';
+                acc_cmd <= "1111";
+                x_cmd <= "1111";
+                y_cmd <= "1111";
+
+                abs_xy_n <= '1';
+                pg_next_n <= '1';
+                zp_n <= '1';
+                zp_xy_n <= '1';
+                rel_calc_n <= '1';
+                indir_n <= '1';
+                indir_x_n <= '1';
+                indir_y_n <= '1';
+                arith_en_n <= '1';
+
+                stat_dec_oe_n <= '1';
+                stat_bus_oe_n <= '1';
+                stat_set_flg_n <= '1';
+                stat_flg <= '1';
+                stat_bus_all_n <= '1';
+                stat_bus_nz_n <= '1';
+                stat_alu_we_n <= '1';
+
+                r_vec_oe_n <= '1';
+                n_vec_oe_n <= '1';
+                i_vec_oe_n <= '1';
+                nmi_handled_n <= '1';
+                r_nw <= '1';
+
+                next_cycle <= R1;
+            elsif exec_cycle = R1 or exec_cycle = N1 then
+                pcl_cmd <= "1111";
+                pcl_inc_n <= '1';
+                inst_we_n <= '1';
+                dl_al_oe_n <= '1';
+
+                --push pch.
+                d_print("R1");
+                ad_oe_n <= '0';
+                sp_push_n <= '0';
+                sp_oe_n <= '0';
+                pch_cmd <= "0111";
+                --front_oe(pch_cmd, '0');
+                back_oe(sp_cmd, '0');
+                back_we(sp_cmd, '0');
+                r_nw <= '0';
+
+                if exec_cycle = R1 then
+                    next_cycle <= R2;
+                elsif exec_cycle = N1 then
+                    next_cycle <= N2;
+                end if;
+
+            elsif exec_cycle = R2 or exec_cycle = N2 then
+                front_oe(pch_cmd, '1');
+
+               --push pcl.
+                sp_push_n <= '0';
+                sp_oe_n <= '0';
+                front_oe(pcl_cmd, '0');
+                back_oe(sp_cmd, '0');
+                back_we(sp_cmd, '0');
+                r_nw <= '0';
+
+                if exec_cycle = R2 then
+                    next_cycle <= R3;
+                elsif exec_cycle = N2 then
+                    next_cycle <= N3;
+                end if;
+
+            elsif exec_cycle = R3 or exec_cycle = N3 then
+                front_oe(pcl_cmd, '1');
+
+               --push status.
+                sp_push_n <= '0';
+                sp_oe_n <= '0';
+                stat_bus_oe_n <= '0';
+                back_oe(sp_cmd, '0');
+                back_we(sp_cmd, '0');
+                r_nw <= '0';
+
+                if exec_cycle = R3 then
+                    next_cycle <= R4;
+                elsif exec_cycle = N3 then
+                    next_cycle <= N4;
+                end if;
+
+            elsif exec_cycle = R4 or exec_cycle = N4 then
+                stat_bus_oe_n <= '1';
+                sp_push_n <= '1';
+                sp_oe_n <= '1';
+                front_oe(pcl_cmd, '1');
+                back_oe(sp_cmd, '1');
+                back_we(sp_cmd, '1');
+
+                --fetch reset vector low
+                r_nw <= '1';
+                dbuf_int_oe_n <= '0';
+                front_we(pcl_cmd, '0');
+
+                if exec_cycle = R4 then
+                    r_vec_oe_n <= '0';
+                    next_cycle <= R5;
+                elsif exec_cycle = N4 then
+                    n_vec_oe_n <= '0';
+                    next_cycle <= N5;
+                end if;
+                
+            elsif exec_cycle = R5 or exec_cycle = N5 then
+                front_we(pcl_cmd, '1');
+
+                --fetch reset vector hi
+                front_we(pch_cmd, '0');
+                indir_n <= '0';
+
+                if exec_cycle = N5 then
+                    nmi_handled_n <= '0';
+                end if;
+                --start execute cycle.
+                next_cycle <= T0;
+
+            elsif exec_cycle(5) = '1' then
+                ---pc increment and next page.
+                d_print(string'("pch next page..."));
+                --pcl stop increment
+                pcl_inc_n <= '1';
+                back_we(pcl_cmd, '1');
+                --pch increment
+                back_we(pch_cmd, '0');
+                back_oe(pch_cmd, '0');
+
+                if ('0' & exec_cycle(4 downto 0) = T0) then
+                    --do the t0 identical routine.
+                    disable_pins;
+                    inst_we_n <= '1';
+                    r_nw <= '1';
+
+                elsif ('0' & exec_cycle(4 downto 0) = T1) then
+                    --if fetch cycle, preserve instrution register
+                    inst_we_n <= '1';
+
+                    --TODO: must handle for jmp case???
+                elsif ('0' & exec_cycle(4 downto 0) = T2) then
+                    --disable previous we_n gate.
+                    --t1 cycle is fetch low oprand.
+                    dl_al_we_n <= '1';
+                elsif ('0' & exec_cycle(4 downto 0) = T3) then
+                    --t2 cycle is fetch high oprand.
+                    dl_ah_we_n <= '1';
+                end if;
+
+            end if; --if exec_cycle = T0 then
+
+        end if; --if (set_clk'event and set_clk = '1') 
+
+    end process;
+
+end rtl;
+
diff --git a/de1_nes/cpu/mos6502.vhd b/de1_nes/cpu/mos6502.vhd
new file mode 100644 (file)
index 0000000..a8d7326
--- /dev/null
@@ -0,0 +1,550 @@
+library ieee;
+use ieee.std_logic_1164.all;
+
+entity mos6502 is 
+    generic (   dsize : integer := 8;
+                asize : integer :=16
+            );
+    port (  input_clk   : in std_logic; --phi0 input pin.
+            rdy         : in std_logic;
+            rst_n       : in std_logic;
+            irq_n       : in std_logic;
+            nmi_n       : in std_logic;
+            dbe         : in std_logic;
+            r_nw        : out std_logic;
+            phi1        : out std_logic;
+            phi2        : out std_logic;
+            addr        : out std_logic_vector ( asize - 1 downto 0);
+            d_io        : inout std_logic_vector ( dsize - 1 downto 0)
+    );
+end mos6502;
+
+architecture rtl of mos6502 is
+
+    ----------------------------------------------
+    ------------ decoder declaration -------------
+    ----------------------------------------------
+component decoder
+    generic (dsize : integer := 8);
+    port (  set_clk         : in std_logic;
+            trig_clk        : in std_logic;
+            res_n           : in std_logic;
+            irq_n           : in std_logic;
+            nmi_n           : in std_logic;
+            rdy             : in std_logic;
+            instruction     : in std_logic_vector (dsize - 1 downto 0);
+            exec_cycle      : in std_logic_vector (5 downto 0);
+            next_cycle      : out std_logic_vector (5 downto 0);
+            status_reg      : inout std_logic_vector (dsize - 1 downto 0);
+            inst_we_n       : out std_logic;
+            ad_oe_n         : out std_logic;
+            dbuf_int_oe_n   : out std_logic;
+            dl_al_we_n      : out std_logic;
+            dl_ah_we_n      : out std_logic;
+            dl_al_oe_n      : out std_logic;
+            dl_ah_oe_n      : out std_logic;
+            dl_dh_oe_n      : out std_logic;
+            pcl_inc_n       : out std_logic;
+            pch_inc_n       : out std_logic;
+            pcl_cmd         : out std_logic_vector(3 downto 0);
+            pch_cmd         : out std_logic_vector(3 downto 0);
+            sp_cmd          : out std_logic_vector(3 downto 0);
+            sp_oe_n         : out std_logic;
+            sp_push_n       : out std_logic;
+            sp_pop_n        : out std_logic;
+            acc_cmd         : out std_logic_vector(3 downto 0);
+            x_cmd           : out std_logic_vector(3 downto 0);
+            y_cmd           : out std_logic_vector(3 downto 0);
+            abs_xy_n        : out std_logic;
+            ea_carry        : in  std_logic;
+            pg_next_n       : out std_logic;
+            zp_n            : out std_logic;
+            zp_xy_n         : out std_logic;
+            rel_calc_n      : out std_logic;
+            indir_n         : out std_logic;
+            indir_x_n       : out std_logic;
+            indir_y_n       : out std_logic;
+            arith_en_n      : out std_logic;
+            stat_dec_oe_n   : out std_logic;
+            stat_bus_oe_n   : out std_logic;
+            stat_set_flg_n  : out std_logic;
+            stat_flg        : out std_logic;
+            stat_bus_all_n  : out std_logic;
+            stat_bus_nz_n   : out std_logic;
+            stat_alu_we_n   : out std_logic;
+            r_vec_oe_n      : out std_logic;
+            n_vec_oe_n      : out std_logic;
+            i_vec_oe_n      : out std_logic;
+            r_nw            : out std_logic
+            ;---for parameter check purpose!!!
+            check_bit     : out std_logic_vector(1 to 5)
+        );
+end component;
+
+component alu
+    generic (   dsize : integer := 8
+            );
+    port (  clk             : in std_logic;
+            pcl_inc_n       : in std_logic;
+            pch_inc_n       : in std_logic;
+            sp_oe_n         : in std_logic;
+            sp_push_n       : in std_logic;
+            sp_pop_n        : in std_logic;
+            abs_xy_n        : in std_logic;
+            pg_next_n       : in std_logic;
+            zp_n            : in std_logic;
+            zp_xy_n         : in std_logic;
+            rel_calc_n      : in std_logic;
+            indir_n         : in std_logic;
+            indir_x_n       : in std_logic;
+            indir_y_n       : in std_logic;
+            arith_en_n      : in std_logic;
+            instruction     : in std_logic_vector (dsize - 1 downto 0);
+            exec_cycle      : in std_logic_vector (5 downto 0);
+            int_d_bus       : inout std_logic_vector (dsize - 1 downto 0);
+            acc_out         : in std_logic_vector (dsize - 1 downto 0);
+            index_bus       : in std_logic_vector (dsize - 1 downto 0);
+            bal             : in std_logic_vector (dsize - 1 downto 0);
+            bah             : in std_logic_vector (dsize - 1 downto 0);
+            addr_back       : out std_logic_vector (dsize - 1 downto 0);
+            acc_in          : out std_logic_vector (dsize - 1 downto 0);
+            abl             : out std_logic_vector (dsize - 1 downto 0);
+            abh             : out std_logic_vector (dsize - 1 downto 0);
+            pcl_inc_carry   : out std_logic;
+            ea_carry        : out std_logic;
+            carry_in        : in std_logic;
+            negative        : out std_logic;
+            zero            : out std_logic;
+            carry_out       : out std_logic;
+            overflow        : out std_logic
+    );
+end component;
+
+    ----------------------------------------------
+    ------------ register declaration ------------
+    ----------------------------------------------
+component d_flip_flop
+    generic (
+            dsize : integer := 8
+            );
+    port (  
+            clk     : in std_logic;
+            res_n   : in std_logic;
+            set_n   : in std_logic;
+            we_n    : in std_logic;
+            d       : in std_logic_vector (dsize - 1 downto 0);
+            q       : out std_logic_vector (dsize - 1 downto 0)
+        );
+end component;
+
+component dual_dff
+    generic (
+            dsize : integer := 8
+            );
+    port (  
+            clk             : in std_logic;
+            res_n           : in std_logic;
+            set_n           : in std_logic;
+            gate_cmd        : in std_logic_vector (3 downto 0);
+            front_port      : inout std_logic_vector (dsize - 1 downto 0);
+            back_in_port    : in std_logic_vector (dsize - 1 downto 0);
+            back_out_port   : out std_logic_vector (dsize - 1 downto 0)
+        );
+end component;
+
+component data_bus_buffer
+    generic (
+            dsize : integer := 8
+            );
+    port (  
+            clk         : in std_logic;
+            r_nw        : in std_logic;
+            int_oe_n    : in std_logic;
+            int_dbus : inout std_logic_vector (dsize - 1 downto 0);
+            ext_dbus : inout std_logic_vector (dsize - 1 downto 0)
+        );
+end component;
+
+component input_data_latch
+    generic (
+            dsize : integer := 8
+            );
+    port (  
+            clk         : in std_logic;
+            oe_n        : in std_logic;
+            we_n        : in std_logic;
+            int_dbus    : in std_logic_vector (dsize - 1 downto 0);
+            alu_bus     : out std_logic_vector (dsize - 1 downto 0)
+        );
+end component;
+
+component tri_state_buffer
+    generic (
+            dsize : integer := 8
+            );
+    port (  
+            oe_n    : in std_logic;
+            d       : in std_logic_vector (dsize - 1 downto 0);
+            q       : out std_logic_vector (dsize - 1 downto 0)
+        );
+end component;
+
+component processor_status 
+    generic (
+            dsize : integer := 8
+            );
+    port (  
+            clk         : in std_logic;
+            res_n       : in std_logic;
+            dec_oe_n    : in std_logic;
+            bus_oe_n    : in std_logic;
+            set_flg_n   : in std_logic;
+            flg_val     : in std_logic;
+            load_bus_all_n      : in std_logic;
+            load_bus_nz_n       : in std_logic;
+            set_from_alu_n      : in std_logic;
+            alu_n       : in std_logic;
+            alu_v       : in std_logic;
+            alu_z       : in std_logic;
+            alu_c       : in std_logic;
+            stat_c      : out std_logic;
+            dec_val     : inout std_logic_vector (dsize - 1 downto 0);
+            int_dbus    : inout std_logic_vector (dsize - 1 downto 0)
+        );
+end component;
+
+    ----------------------------------------------
+    ------------ signal declareration ------------
+    ----------------------------------------------
+    signal set_clk : std_logic;
+    signal trigger_clk : std_logic;
+
+    signal exec_cycle : std_logic_vector(5 downto 0);
+    signal next_cycle : std_logic_vector(5 downto 0);
+    signal status_reg : std_logic_vector (dsize - 1 downto 0);
+
+    -------------------------------
+    -------- control lines --------
+    -------------------------------
+    signal inst_we_n : std_logic;
+    signal inst_rst_n : std_logic;
+    signal ad_oe_n : std_logic;
+
+    signal dbuf_r_nw : std_logic;
+    signal dbuf_int_oe_n : std_logic;
+
+    signal dl_al_we_n : std_logic;
+    signal dl_ah_we_n : std_logic;
+    signal dl_al_oe_n : std_logic;
+    signal dl_ah_oe_n : std_logic;
+    signal dl_dh_oe_n : std_logic;
+
+    signal pcl_inc_n : std_logic;
+    signal pch_inc_n : std_logic;
+    signal pcl_inc_carry : std_logic;
+    signal abs_xy_n        : std_logic;
+    signal ea_carry        : std_logic;
+    signal pg_next_n       : std_logic;
+    signal zp_n            : std_logic;
+    signal zp_xy_n         : std_logic;
+    signal rel_calc_n      : std_logic;
+    signal indir_n         : std_logic;
+    signal indir_x_n       : std_logic;
+    signal indir_y_n       : std_logic;
+    signal arith_en_n      : std_logic;
+                    
+    signal alu_n : std_logic;
+    signal alu_z : std_logic;
+    signal alu_c : std_logic;
+    signal alu_v : std_logic;
+    signal stat_c : std_logic;
+
+    ----control line for dual port registers.
+    signal pcl_cmd : std_logic_vector(3 downto 0);
+    signal pch_cmd : std_logic_vector(3 downto 0);
+    signal sp_cmd : std_logic_vector(3 downto 0);
+    signal acc_cmd : std_logic_vector(3 downto 0);
+    signal x_cmd : std_logic_vector(3 downto 0);
+    signal y_cmd : std_logic_vector(3 downto 0);
+    signal sp_oe_n : std_logic;
+    signal sp_push_n : std_logic;
+    signal sp_pop_n  : std_logic;
+
+    ---status register
+    signal stat_dec_oe_n : std_logic;
+    signal stat_bus_oe_n : std_logic;
+    signal stat_set_flg_n : std_logic;
+    signal stat_flg : std_logic;
+    signal stat_bus_all_n : std_logic;
+    signal stat_bus_nz_n : std_logic;
+    signal stat_alu_we_n : std_logic;
+
+    -------------------------------
+    ------------ buses ------------
+    -------------------------------
+    signal instruction : std_logic_vector(dsize - 1 downto 0);
+    
+    signal bah : std_logic_vector(dsize - 1 downto 0);
+    signal bal : std_logic_vector(dsize - 1 downto 0);
+    signal index_bus : std_logic_vector(dsize - 1 downto 0);
+    signal idl_h_out : std_logic_vector(dsize - 1 downto 0);
+
+    signal acc_out : std_logic_vector(dsize - 1 downto 0);
+    signal acc_in : std_logic_vector(dsize - 1 downto 0);
+    signal addr_back : std_logic_vector(dsize - 1 downto 0);
+
+    --not used bus.
+    signal null_bus : std_logic_vector(dsize - 1 downto 0);
+
+    --address bus
+    signal abh : std_logic_vector(dsize - 1 downto 0);
+    signal abl : std_logic_vector(dsize - 1 downto 0);
+
+    ---internal data bus
+    signal int_d_bus : std_logic_vector(dsize - 1 downto 0);
+
+    ---reset vectors---
+    signal r_vec_oe_n : std_logic;
+    signal n_vec_oe_n : std_logic;
+    signal i_vec_oe_n : std_logic;
+    signal reset_l : std_logic_vector(dsize - 1 downto 0);
+    signal reset_h : std_logic_vector(dsize - 1 downto 0);
+    signal nmi_l : std_logic_vector(dsize - 1 downto 0);
+    signal nmi_h : std_logic_vector(dsize - 1 downto 0);
+    signal irq_l : std_logic_vector(dsize - 1 downto 0);
+    signal irq_h : std_logic_vector(dsize - 1 downto 0);
+
+    signal check_bit     : std_logic_vector(1 to 5);
+
+begin
+
+
+    -- clock generate.
+    phi1 <= input_clk;
+    phi2 <= not input_clk;
+    set_clk <= input_clk;
+    trigger_clk <= not input_clk;
+
+    r_nw <= dbuf_r_nw;
+    reset_l <= "11111100";
+    reset_h <= "11111111";
+    nmi_l <= "11111010";
+    nmi_h <= "11111111";
+    irq_l <= "11111110";
+    irq_h <= "11111111";
+
+    --instruction register is reset when handling exceptions(nmi/irq cycle).
+    inst_rst_n <= '0' when rst_n = '0' else
+                  '0' when (exec_cycle(3) or exec_cycle(4)) = '1' else
+                  '1';
+
+    --------------------------------------------------
+    ------------------- instances --------------------
+    --------------------------------------------------
+
+    dec_inst : decoder generic map (dsize) 
+            port map(set_clk, 
+                    trigger_clk, 
+                    rst_n, 
+                    irq_n, 
+                    nmi_n, 
+                    rdy, 
+                    instruction, 
+                    exec_cycle,
+                    next_cycle,
+                    status_reg, 
+                    inst_we_n, 
+                    ad_oe_n, 
+                    dbuf_int_oe_n,
+                    dl_al_we_n,
+                    dl_ah_we_n,
+                    dl_al_oe_n,
+                    dl_ah_oe_n,
+                    dl_dh_oe_n,
+                    pcl_inc_n,
+                    pch_inc_n,
+                    pcl_cmd,
+                    pch_cmd,
+                    sp_cmd,
+                    sp_oe_n,
+                    sp_push_n,
+                    sp_pop_n,
+                    acc_cmd,
+                    x_cmd,
+                    y_cmd,
+                    abs_xy_n,
+                    ea_carry,
+                    pg_next_n,
+                    zp_n,
+                    zp_xy_n,
+                    rel_calc_n,
+                    indir_n,
+                    indir_x_n,
+                    indir_y_n,
+                    arith_en_n,
+                    stat_dec_oe_n, 
+                    stat_bus_oe_n, 
+                    stat_set_flg_n, 
+                    stat_flg, 
+                    stat_bus_all_n, 
+                    stat_bus_nz_n, 
+                    stat_alu_we_n, 
+                    r_vec_oe_n,
+                    n_vec_oe_n,
+                    i_vec_oe_n,
+                    dbuf_r_nw
+                    , check_bit --check bit.
+                    );
+
+    alu_inst : alu generic map (dsize) 
+            port map (trigger_clk, 
+                    pcl_inc_n,
+                    pch_inc_n,
+                    sp_oe_n,
+                    sp_push_n,
+                    sp_pop_n,
+                    abs_xy_n,
+                    pg_next_n,
+                    zp_n,
+                    zp_xy_n,
+                    rel_calc_n,
+                    indir_n,
+                    indir_x_n,
+                    indir_y_n,
+                    arith_en_n,
+                    instruction,
+                    exec_cycle,
+                    int_d_bus,
+                    acc_out,
+                    index_bus,
+                    bal,
+                    bah,
+                    addr_back,
+                    acc_in,
+                    abl,
+                    abh,
+                    pcl_inc_carry,
+                    ea_carry,
+                    stat_c,
+                    alu_n,
+                    alu_z,
+                    alu_c,
+                    alu_v 
+                    );
+
+    --cpu execution cycle number
+    exec_cycle_inst : d_flip_flop generic map (5) 
+            port map(trigger_clk, '1', '1', '0', 
+                    next_cycle(4 downto 0), exec_cycle(4 downto 0));
+
+    --exec_cycle top bit is phc carry flag.
+    exec_cycle(5) <= pcl_inc_carry;
+
+    --io data buffer
+    dbus_buf : data_bus_buffer generic map (dsize) 
+            port map(set_clk, dbuf_r_nw, dbuf_int_oe_n, int_d_bus, d_io);
+
+    --address operand data buffer.
+    idl_l : input_data_latch generic map (dsize) 
+            port map(set_clk, dl_al_oe_n, dl_al_we_n, int_d_bus, bal);
+    idl_h : input_data_latch generic map (dsize) 
+            port map(set_clk, '0', dl_ah_we_n, int_d_bus, idl_h_out);
+    ---only DLH has b-bus side output.
+    idl_h_a_buf : tri_state_buffer generic map (dsize)
+            port map (dl_ah_oe_n, idl_h_out, bah);
+    idl_h_d_buf : tri_state_buffer generic map (dsize)
+            port map (dl_dh_oe_n, idl_h_out, int_d_bus);
+
+    -------- registers --------
+    ir : d_flip_flop generic map (dsize) 
+            port map(trigger_clk, inst_rst_n, '1', inst_we_n, d_io, instruction);
+
+    pcl_inst : dual_dff generic map (dsize) 
+            port map(trigger_clk, rst_n, '1', pcl_cmd, int_d_bus, addr_back, bal);
+    pch_inst : dual_dff generic map (dsize) 
+            port map(trigger_clk, rst_n, '1', pch_cmd, int_d_bus, addr_back, bah);
+
+    --status register
+    status_register : processor_status generic map (dsize) 
+            port map (trigger_clk, rst_n, 
+                    stat_dec_oe_n, stat_bus_oe_n, 
+                    stat_set_flg_n, stat_flg, stat_bus_all_n, stat_bus_nz_n, 
+                    stat_alu_we_n, alu_n, alu_v, alu_z, alu_c, stat_c,
+                    status_reg, int_d_bus);
+
+
+    sp : dual_dff generic map (dsize) 
+            port map(trigger_clk, rst_n, '1', sp_cmd, int_d_bus, addr_back, bal);
+
+    x : dual_dff generic map (dsize) 
+            port map(trigger_clk, rst_n, '1', x_cmd, int_d_bus, null_bus, index_bus);
+    y : dual_dff generic map (dsize) 
+            port map(trigger_clk, rst_n, '1', y_cmd, int_d_bus, null_bus, index_bus);
+
+    acc : dual_dff generic map (dsize) 
+            port map(trigger_clk, rst_n, '1', acc_cmd, int_d_bus, acc_in, acc_out);
+
+    --adh output is controlled by decoder.
+    adh_buf : tri_state_buffer generic map (dsize)
+            port map (ad_oe_n, abh, addr(asize - 1 downto dsize));
+    adl_buf : tri_state_buffer generic map (dsize)
+            port map (ad_oe_n, abl, addr(dsize - 1 downto 0));
+
+    null_bus <= (others => 'Z');
+
+    ----gating reset vector.
+    res_l_buf : tri_state_buffer generic map (dsize)
+            port map (r_vec_oe_n, reset_l, bal);
+    res_h_buf : tri_state_buffer generic map (dsize)
+            port map (r_vec_oe_n, reset_h, bah);
+    nmi_l_buf : tri_state_buffer generic map (dsize)
+            port map (n_vec_oe_n, nmi_l, bal);
+    nmi_h_buf : tri_state_buffer generic map (dsize)
+            port map (n_vec_oe_n, nmi_h, bah);
+    irq_l_buf : tri_state_buffer generic map (dsize)
+            port map (i_vec_oe_n, irq_l, bal);
+    irq_h_buf : tri_state_buffer generic map (dsize)
+            port map (i_vec_oe_n, irq_h, bah);
+
+    reset_p : process (rst_n)
+    begin
+        if (rst_n = '0') then
+
+        end if;
+    end process;
+
+
+------------------------------------------------------------
+------------------------ for debug... ----------------------
+------------------------------------------------------------
+
+    dbg_p : process (set_clk)
+use std.textio.all;
+--use ieee.std_logic_textio.all;
+use ieee.std_logic_unsigned.conv_integer;
+
+procedure d_print(msg : string) is
+variable out_l : line;
+begin
+--    write(out_l, msg);
+--    writeline(output, out_l);
+end  procedure;
+
+function conv_hex8(ival : integer) return string is
+variable tmp1, tmp2 : integer;
+variable hex_chr: string (1 to 16) := "0123456789abcdef";
+begin
+    tmp2 := (ival mod 16 ** 2) / 16 ** 1;
+    tmp1 := ival mod 16 ** 1;
+    return hex_chr(tmp2 + 1) & hex_chr(tmp1 + 1);
+end;
+    begin
+        if (set_clk = '0' and exec_cycle = "000000") then
+            --show pc on the T0 (fetch) cycle.
+            d_print("pc : " & conv_hex8(conv_integer(abh)) 
+                    & conv_hex8(conv_integer(abl)));
+        end if;
+    end process;
+
+end rtl;
+
index 0908aff..f9e2980 100644 (file)
@@ -40,9 +40,14 @@ set_global_assignment -name LL_ROOT_REGION ON -section_id "Root Region"
 set_global_assignment -name LL_MEMBER_STATE LOCKED -section_id "Root Region"\r
 set_location_assignment PIN_D12 -to base_clk\r
 set_location_assignment PIN_R22 -to rst_n\r
+set_global_assignment -name INCREMENTAL_VECTOR_INPUT_SOURCE de1_nes.vwf\r
+set_global_assignment -name VHDL_FILE ppu/ppu.vhd\r
+set_global_assignment -name VHDL_FILE ppu/render.vhd\r
+set_global_assignment -name VHDL_FILE cpu/mos6502.vhd\r
+set_global_assignment -name VHDL_FILE cpu/decoder.vhd\r
+set_global_assignment -name VHDL_FILE cpu/alu.vhd\r
 set_global_assignment -name VHDL_FILE ppu/ppu_registers.vhd\r
 set_global_assignment -name VHDL_FILE cpu/cpu_registers.vhd\r
 set_global_assignment -name VHDL_FILE clock/clock_divider.vhd\r
 set_global_assignment -name VHDL_FILE de1_nes.vhd\r
-set_global_assignment -name VECTOR_WAVEFORM_FILE de1_nes.vwf\r
-set_global_assignment -name INCREMENTAL_VECTOR_INPUT_SOURCE de1_nes.vwf
\ No newline at end of file
+set_global_assignment -name VECTOR_WAVEFORM_FILE de1_nes.vwf
\ No newline at end of file
index ea3644e..35822df 100644 (file)
@@ -8,15 +8,66 @@ ptn_Child2=Document-1
 ptn_Child3=Document-2\r
 ptn_Child4=Document-3\r
 ptn_Child5=Document-4\r
+ptn_Child6=Document-5\r
+ptn_Child7=Document-6\r
+ptn_Child8=Document-7\r
 [ProjectWorkspace.Frames.ChildFrames.Document-0]\r
 ptn_Child1=ViewFrame-0\r
 [ProjectWorkspace.Frames.ChildFrames.Document-0.ViewFrame-0]\r
-DocPathName=de1_nes.vwf\r
-DocumentCLSID={4a9ed22a-e60b-11d1-a0bd-0020affa43f2}\r
+DocPathName=clock/clock_divider.vhd\r
+DocumentCLSID={ca385d57-a4c7-11d1-a098-0020affa43f2}\r
 IsChildFrameDetached=False\r
 IsActiveChildFrame=False\r
 ptn_Child1=StateMap\r
 [ProjectWorkspace.Frames.ChildFrames.Document-0.ViewFrame-0.StateMap]\r
-AFC_SIM_AP_NAME=de1_nes\r
-AFC_PROJ_DB_PATH=D:/daisuke/nes/repo/motonesfpga/de1_nes/db/de1_nes.quartus_db\r
+AFC_IN_REPORT=False\r
+[ProjectWorkspace.Frames.ChildFrames.Document-1]\r
+ptn_Child1=ViewFrame-0\r
+[ProjectWorkspace.Frames.ChildFrames.Document-1.ViewFrame-0]\r
+DocPathName=cpu/decoder.vhd\r
+DocumentCLSID={ca385d57-a4c7-11d1-a098-0020affa43f2}\r
+IsChildFrameDetached=False\r
+IsActiveChildFrame=False\r
+ptn_Child1=StateMap\r
+[ProjectWorkspace.Frames.ChildFrames.Document-1.ViewFrame-0.StateMap]\r
+AFC_IN_REPORT=False\r
+[ProjectWorkspace.Frames.ChildFrames.Document-2]\r
+ptn_Child1=ViewFrame-0\r
+[ProjectWorkspace.Frames.ChildFrames.Document-2.ViewFrame-0]\r
+DocPathName=cpu/alu.vhd\r
+DocumentCLSID={ca385d57-a4c7-11d1-a098-0020affa43f2}\r
+IsChildFrameDetached=False\r
+IsActiveChildFrame=False\r
+ptn_Child1=StateMap\r
+[ProjectWorkspace.Frames.ChildFrames.Document-2.ViewFrame-0.StateMap]\r
+AFC_IN_REPORT=False\r
+[ProjectWorkspace.Frames.ChildFrames.Document-3]\r
+ptn_Child1=ViewFrame-0\r
+[ProjectWorkspace.Frames.ChildFrames.Document-3.ViewFrame-0]\r
+DocPathName=cpu/mos6502.vhd\r
+DocumentCLSID={ca385d57-a4c7-11d1-a098-0020affa43f2}\r
+IsChildFrameDetached=False\r
+IsActiveChildFrame=False\r
+ptn_Child1=StateMap\r
+[ProjectWorkspace.Frames.ChildFrames.Document-3.ViewFrame-0.StateMap]\r
+AFC_IN_REPORT=False\r
+[ProjectWorkspace.Frames.ChildFrames.Document-4]\r
+ptn_Child1=ViewFrame-0\r
+[ProjectWorkspace.Frames.ChildFrames.Document-4.ViewFrame-0]\r
+DocPathName=ppu/ppu.vhd\r
+DocumentCLSID={ca385d57-a4c7-11d1-a098-0020affa43f2}\r
+IsChildFrameDetached=False\r
+IsActiveChildFrame=False\r
+ptn_Child1=StateMap\r
+[ProjectWorkspace.Frames.ChildFrames.Document-4.ViewFrame-0.StateMap]\r
+AFC_IN_REPORT=False\r
+[ProjectWorkspace.Frames.ChildFrames.Document-5]\r
+ptn_Child1=ViewFrame-0\r
+[ProjectWorkspace.Frames.ChildFrames.Document-5.ViewFrame-0]\r
+DocPathName=ppu/render.vhd\r
+DocumentCLSID={ca385d57-a4c7-11d1-a098-0020affa43f2}\r
+IsChildFrameDetached=False\r
+IsActiveChildFrame=False\r
+ptn_Child1=StateMap\r
+[ProjectWorkspace.Frames.ChildFrames.Document-5.ViewFrame-0.StateMap]\r
 AFC_IN_REPORT=False\r
diff --git a/de1_nes/ppu/ppu.vhd b/de1_nes/ppu/ppu.vhd
new file mode 100644 (file)
index 0000000..c6c7b9c
--- /dev/null
@@ -0,0 +1,462 @@
+library ieee;
+use ieee.std_logic_1164.all;
+
+entity ppu is 
+    port (  clk         : in std_logic;
+            ce_n        : in std_logic;
+            rst_n       : in std_logic;
+            r_nw        : in std_logic;
+            cpu_addr    : in std_logic_vector (2 downto 0);
+            cpu_d       : inout std_logic_vector (7 downto 0);
+            vblank_n    : out std_logic;
+            rd_n        : out std_logic;
+            wr_n        : out std_logic;
+            ale         : out std_logic;
+            vram_ad     : inout std_logic_vector (7 downto 0);
+            vram_a      : out std_logic_vector (13 downto 8);
+            vga_clk     : in std_logic;
+            h_sync_n    : out std_logic;
+            v_sync_n    : out std_logic;
+            r           : out std_logic_vector(3 downto 0);
+            g           : out std_logic_vector(3 downto 0);
+            b           : out std_logic_vector(3 downto 0)
+    );
+end ppu;
+
+architecture rtl of ppu is
+
+component ppu_render
+    port (  clk         : in std_logic;
+            rst_n       : in std_logic;
+            rd_n        : out std_logic;
+            wr_n        : out std_logic;
+            ale         : out std_logic;
+            vram_ad     : inout std_logic_vector (7 downto 0);
+            vram_a      : out std_logic_vector (13 downto 8);
+            pos_x       : out std_logic_vector (8 downto 0);
+            pos_y       : out std_logic_vector (8 downto 0);
+            r           : out std_logic_vector (3 downto 0);
+            g           : out std_logic_vector (3 downto 0);
+            b           : out std_logic_vector (3 downto 0);
+            ppu_ctrl        : in std_logic_vector (7 downto 0);
+            ppu_mask        : in std_logic_vector (7 downto 0);
+            read_status     : in std_logic;
+            ppu_status      : out std_logic_vector (7 downto 0);
+            ppu_scroll_x    : in std_logic_vector (7 downto 0);
+            ppu_scroll_y    : in std_logic_vector (7 downto 0);
+            r_nw            : in std_logic;
+            oam_bus_ce_n    : in std_logic;
+            plt_bus_ce_n    : in std_logic;
+            oam_plt_addr    : in std_logic_vector (7 downto 0);
+            oam_plt_data    : inout std_logic_vector (7 downto 0);
+            v_bus_busy_n    : out std_logic
+    );
+end component;
+
+component vga_ctl
+    port (  ppu_clk     : in std_logic;
+            vga_clk     : in std_logic;
+            rst_n       : in std_logic;
+            pos_x       : in std_logic_vector (8 downto 0);
+            pos_y       : in std_logic_vector (8 downto 0);
+            nes_r       : in std_logic_vector (3 downto 0);
+            nes_g       : in std_logic_vector (3 downto 0);
+            nes_b       : in std_logic_vector (3 downto 0);
+            h_sync_n    : out std_logic;
+            v_sync_n    : out std_logic;
+            r           : out std_logic_vector(3 downto 0);
+            g           : out std_logic_vector(3 downto 0);
+            b           : out std_logic_vector(3 downto 0)
+    );
+end component;
+
+component d_flip_flop
+    generic (
+            dsize : integer := 8
+            );
+    port (
+            clk     : in std_logic;
+            res_n   : in std_logic;
+            set_n   : in std_logic;
+            we_n    : in std_logic;
+            d       : in std_logic_vector (dsize - 1 downto 0);
+            q       : out std_logic_vector (dsize - 1 downto 0)
+        );
+end component;
+
+component counter_register
+    generic (
+        dsize       : integer := 8;
+        inc         : integer := 1
+    );
+    port (  clk         : in std_logic;
+            rst_n       : in std_logic;
+            ce_n        : in std_logic;
+            we_n        : in std_logic;
+            d           : in std_logic_vector(dsize - 1 downto 0);
+            q           : out std_logic_vector(dsize - 1 downto 0)
+    );
+end component;
+
+procedure d_print(msg : string) is
+use std.textio.all;
+--use ieee.std_logic_textio.all;
+variable out_l : line;
+begin
+--    write(out_l, msg);
+--    writeline(output, out_l);
+end  procedure;
+
+signal pos_x       : std_logic_vector (8 downto 0);
+signal pos_y       : std_logic_vector (8 downto 0);
+signal nes_r       : std_logic_vector (3 downto 0);
+signal nes_g       : std_logic_vector (3 downto 0);
+signal nes_b       : std_logic_vector (3 downto 0);
+
+constant dsize     : integer := 8;
+
+constant PPUCTRL   : std_logic_vector(2 downto 0) := "000";
+constant PPUMASK   : std_logic_vector(2 downto 0) := "001";
+constant PPUSTATUS : std_logic_vector(2 downto 0) := "010";
+constant OAMADDR   : std_logic_vector(2 downto 0) := "011";
+constant OAMDATA   : std_logic_vector(2 downto 0) := "100";
+constant PPUSCROLL : std_logic_vector(2 downto 0) := "101";
+constant PPUADDR   : std_logic_vector(2 downto 0) := "110";
+constant PPUDATA   : std_logic_vector(2 downto 0) := "111";
+
+constant PPUNEN     : integer := 7;  --nmi enable
+constant ST_VBL     : integer := 7;  --vblank
+
+signal clk_n            : std_logic;
+
+signal ppu_clk_cnt_res_n    : std_logic;
+signal ppu_clk_cnt          : std_logic_vector(1 downto 0);
+
+signal ppu_ctrl_we_n    : std_logic;
+signal ppu_mask_we_n    : std_logic;
+signal oam_addr_ce_n    : std_logic;
+signal oam_addr_we_n    : std_logic;
+signal oam_data_we_n    : std_logic;
+signal ppu_scroll_x_we_n    : std_logic;
+signal ppu_scroll_y_we_n    : std_logic;
+signal ppu_scroll_cnt_ce_n  : std_logic;
+signal ppu_addr_we_n        : std_logic;
+signal ppu_addr_cnt_ce_n    : std_logic;
+signal ppu_data_we_n    : std_logic;
+
+signal ppu_ctrl         : std_logic_vector (dsize - 1 downto 0);
+signal ppu_mask         : std_logic_vector (dsize - 1 downto 0);
+signal read_status      : std_logic;
+signal ppu_status       : std_logic_vector (dsize - 1 downto 0);
+signal ppu_stat_out     : std_logic_vector (dsize - 1 downto 0);
+signal oam_addr         : std_logic_vector (dsize - 1 downto 0);
+signal oam_data         : std_logic_vector (dsize - 1 downto 0);
+signal ppu_scroll_x     : std_logic_vector (dsize - 1 downto 0);
+signal ppu_scroll_y     : std_logic_vector (dsize - 1 downto 0);
+signal ppu_scroll_cnt   : std_logic_vector (0 downto 0);
+signal ppu_addr         : std_logic_vector (13 downto 0);
+signal ppu_addr_in      : std_logic_vector (13 downto 0);
+signal ppu_addr_cnt     : std_logic_vector (0 downto 0);
+signal ppu_data         : std_logic_vector (dsize - 1 downto 0);
+signal ppu_data_out     : std_logic_vector (dsize - 1 downto 0);
+signal read_data_n      : std_logic;
+signal ppu_latch_rst_n  : std_logic;
+signal v_bus_busy_n     : std_logic;
+
+signal oam_bus_ce_n     : std_logic;
+signal plt_bus_ce_n     : std_logic;
+
+signal oam_plt_addr     : std_logic_vector (7 downto 0);
+signal oam_plt_data     : std_logic_vector (7 downto 0);
+
+begin
+
+    render_inst : ppu_render port map (clk, rst_n,
+            rd_n, wr_n, ale, vram_ad, vram_a,
+            pos_x, pos_y, nes_r, nes_g, nes_b,
+            ppu_ctrl, ppu_mask, read_status, ppu_status, ppu_scroll_x, ppu_scroll_y,
+            r_nw, oam_bus_ce_n, plt_bus_ce_n, 
+            oam_plt_addr, oam_plt_data, v_bus_busy_n);
+
+    vga_inst : vga_ctl port map (clk, vga_clk, rst_n, 
+            pos_x, pos_y, nes_r, nes_g, nes_b,
+            h_sync_n, v_sync_n, r, g, b);
+
+    --PPU registers.
+    clk_n <= not clk;
+
+    ppu_clk_cnt_inst : counter_register generic map (2, 1)
+            port map (clk_n, ppu_clk_cnt_res_n, '0', '1', (others => '0'), ppu_clk_cnt); 
+
+    ppu_ctrl_inst : d_flip_flop generic map(dsize)
+            port map (clk_n, rst_n, '1', ppu_ctrl_we_n, cpu_d, ppu_ctrl);
+
+    ppu_mask_inst : d_flip_flop generic map(dsize)
+            port map (clk_n, rst_n, '1', ppu_mask_we_n, cpu_d, ppu_mask);
+
+    ppu_status_inst : d_flip_flop generic map(dsize)
+            port map (read_status, rst_n, '1', '0', ppu_status, ppu_stat_out);
+
+    oma_addr_inst : counter_register generic map(dsize, 1)
+            port map (clk_n, rst_n, oam_addr_ce_n, oam_addr_we_n, cpu_d, oam_addr);
+    oma_data_inst : d_flip_flop generic map(dsize)
+            port map (clk_n, rst_n, '1', oam_data_we_n, cpu_d, oam_data);
+
+    ppu_scroll_x_inst : d_flip_flop generic map(dsize)
+            port map (clk_n, rst_n, '1', ppu_scroll_x_we_n, cpu_d, ppu_scroll_x);
+    ppu_scroll_y_inst : d_flip_flop generic map(dsize)
+            port map (clk_n, rst_n, '1', ppu_scroll_y_we_n, cpu_d, ppu_scroll_y);
+    ppu_scroll_cnt_inst : counter_register generic map (1, 1)
+            port map (clk_n, ppu_latch_rst_n, ppu_scroll_cnt_ce_n, 
+                                            '1', (others => '0'), ppu_scroll_cnt);
+
+    ppu_addr_inst : counter_register generic map(14, 1)
+            port map (clk_n, rst_n, ppu_data_we_n, ppu_addr_we_n, ppu_addr_in, ppu_addr);
+    ppu_addr_cnt_inst : counter_register generic map (1, 1)
+            port map (clk_n, ppu_latch_rst_n, ppu_addr_cnt_ce_n, 
+                                            '1', (others => '0'), ppu_addr_cnt);
+    ppu_data_inst : d_flip_flop generic map(dsize)
+            port map (clk_n, rst_n, '1', ppu_data_we_n, cpu_d, ppu_data);
+
+    ppu_data_out_inst : d_flip_flop generic map(dsize)
+            port map (clk_n, rst_n, '1', read_data_n, ppu_data, ppu_data_out);
+
+
+    reg_set_p : process (rst_n, ce_n, r_nw, cpu_addr, cpu_d, 
+                        ppu_status(ST_VBL), ppu_ctrl(PPUNEN))
+    begin
+        if (rst_n = '0') then
+            ppu_latch_rst_n <= '0';
+            vblank_n <= '1';
+        end if;
+
+        if (ppu_status(ST_VBL)'event or ppu_ctrl(PPUNEN)'event) then
+            if (ppu_status(ST_VBL) = '1' and ppu_ctrl(PPUNEN) = '1') then
+                --start vblank.
+                vblank_n <= '0';
+            else
+                --clear flag.
+                vblank_n <= '1';
+            end if;
+        end if;
+
+        if (rst_n = '1' and ce_n = '0') then
+
+            --register set.
+            if(cpu_addr = PPUCTRL) then
+                ppu_ctrl_we_n <= '0';
+            else
+                ppu_ctrl_we_n <= '1';
+            end if;
+
+            if(cpu_addr = PPUMASK) then
+                ppu_mask_we_n <= '0';
+            else
+                ppu_mask_we_n <= '1';
+            end if;
+
+            if(cpu_addr = PPUSTATUS and r_nw = '1') then
+                --reading status resets ppu_addr/scroll cnt.
+                ppu_latch_rst_n <= '0';
+                --notify reading status
+                read_status <= '1';
+            else
+                ppu_latch_rst_n <= '1';
+                read_status <= '0';
+            end if;
+
+            if(cpu_addr = OAMADDR) then
+                oam_addr_we_n <= '0';
+            else
+                oam_addr_we_n <= '1';
+            end if;
+
+            if(cpu_addr = OAMDATA) then
+                oam_data_we_n <= '0';
+            else
+                oam_data_we_n <= '1';
+            end if;
+
+            if(cpu_addr = PPUSCROLL) then
+                ppu_scroll_cnt_ce_n <= '0';
+                if (ppu_scroll_cnt(0) = '0') then
+                    ppu_scroll_x_we_n <= '0';
+                    ppu_scroll_y_we_n <= '1';
+                else
+                    ppu_scroll_y_we_n <= '0';
+                    ppu_scroll_x_we_n <= '1';
+                end if;
+            else
+                ppu_scroll_x_we_n <= '1';
+                ppu_scroll_y_we_n <= '1';
+                ppu_scroll_cnt_ce_n <= '1';
+            end if;
+
+            if(cpu_addr = PPUADDR) then
+                ppu_addr_cnt_ce_n <= '0';
+                ppu_addr_we_n <= '0';
+                if (ppu_addr_cnt(0) = '0') then
+                    ppu_addr_in <= cpu_d(5 downto 0) & ppu_addr(7 downto 0);
+                else
+                    ppu_addr_in <= ppu_addr(13 downto 8) & cpu_d;
+                end if;
+            else
+                ppu_addr_cnt_ce_n <= '1';
+                ppu_addr_we_n <= '1';
+            end if;
+
+            if (cpu_addr = PPUDATA and r_nw = '0') then
+                read_data_n <= '0';
+            else
+                read_data_n <= '1';
+            end if;
+        else
+            ppu_ctrl_we_n    <= '1';
+            ppu_mask_we_n    <= '1';
+            oam_addr_we_n    <= '1';
+            oam_data_we_n    <= '1';
+            ppu_scroll_x_we_n    <= '1';
+            ppu_scroll_y_we_n    <= '1';
+            ppu_scroll_cnt_ce_n  <= '1';
+            ppu_addr_we_n        <= '1';
+            ppu_addr_cnt_ce_n    <= '1';
+            read_status <= '0';
+            read_data_n <= '1';
+        end if; --if (rst_n = '1' and ce_n = '0') 
+
+    end process;
+
+    --cpu and ppu clock timing adjustment...
+    clk_cnt_set_p : process (rst_n, ce_n, r_nw, cpu_addr, cpu_d, clk, 
+                                oam_plt_data, vram_ad, ppu_stat_out)
+    begin
+        if (rst_n = '1' and ce_n = '0') then
+            --set counter=0 on register write.   
+            if (ce_n'event or r_nw'event or cpu_addr'event or (cpu_d'event and r_nw = '0')) then
+                ppu_clk_cnt_res_n <= '0';
+                --d_print("write event");
+            end if;
+
+            --start counter.
+            if (clk'event and clk = '0') then
+                if (ppu_clk_cnt = "10") then
+                    ppu_clk_cnt_res_n <= '0';
+                elsif (ppu_clk_cnt = "00") then
+                    ppu_clk_cnt_res_n <= '1';
+                end if;
+                --d_print("clk event");
+            end if;
+
+            --oam data set
+            if (cpu_addr = OAMDATA and ppu_clk_cnt = "00") then
+                oam_bus_ce_n <= '0';
+                oam_plt_addr <= oam_addr;
+                if (r_nw = '1') then
+                    oam_plt_data <= (others => 'Z');
+                    cpu_d <= oam_plt_data;
+                else
+                    oam_plt_data <= cpu_d;
+                end if;
+                --address increment for burst write. 
+                oam_addr_ce_n <= '0';
+            else
+                cpu_d <= (others => 'Z');
+                oam_addr_ce_n <= '1';
+                oam_bus_ce_n <= '1';
+            end if;
+
+            --vram address access.
+            if (cpu_addr = PPUADDR and ppu_clk_cnt = "00") then
+                if (ppu_addr_cnt(0) = '0') then
+                    --load addr high
+                    ale <= '0';
+                else
+                    --load addr low and output vram/plt bus.
+
+                    --if address is 3fxx, set palette table.
+                    if (ppu_addr(13 downto 8) = "111111") then
+                        oam_plt_addr <= cpu_d;
+                        ale <= '0';
+                    else
+                        vram_ad <= cpu_d;
+                        vram_a <= ppu_addr(13 downto 8);
+                        ale <= '1';
+                    end if;
+                end if;
+            elsif (cpu_addr = PPUDATA and ppu_clk_cnt = "01") then
+                --for burst write.
+                if (ppu_addr(13 downto 8) = "111111") then
+                    oam_plt_addr <= ppu_addr(7 downto 0);
+                    ale <= '0';
+                else
+                    vram_a <= ppu_addr(13 downto 8);
+                    vram_ad <= ppu_addr(7 downto 0);
+                    ale <= '1';
+                end if;
+            else
+                ale <= '0';
+            end if;
+
+            if (cpu_addr = PPUDATA and ppu_clk_cnt = "00") then
+                ppu_data_we_n <= '0';
+                vram_a <= ppu_addr(13 downto 8);
+                if (ppu_addr(13 downto 8) = "111111") then
+                    --case palette tbl.
+                    plt_bus_ce_n <= '0';
+                    if (r_nw = '0') then
+                        oam_plt_data <= cpu_d;
+                    else
+                        oam_plt_data <= (others => 'Z');
+                        cpu_d <= oam_plt_data;
+                    end if;
+                    rd_n <= '1';
+                    wr_n <= '1';
+                else
+                    rd_n <= not r_nw;
+                    wr_n <= r_nw;
+                    plt_bus_ce_n <= '1';
+                    if (r_nw = '0') then
+                        vram_ad <= cpu_d;
+                    else
+                        vram_ad <= (others => 'Z');
+                        cpu_d <= vram_ad;
+                    end if;
+                end if;
+            else
+                plt_bus_ce_n <= '1';
+                ppu_data_we_n <= '1';
+                rd_n <= '1';
+                wr_n <= '1';
+            end if;
+
+            --sustain cpu output data when reading.
+            if (cpu_addr = PPUDATA and r_nw = '1' and ppu_clk_cnt /= "00") then
+                cpu_d <= ppu_data;
+            end if;
+            if (cpu_addr = OAMDATA and r_nw = '1' and ppu_clk_cnt /= "00") then
+                cpu_d <= oam_data;
+            end if;
+
+            if(cpu_addr = PPUSTATUS and r_nw = '1') then
+                cpu_d <= ppu_stat_out;
+            end if;
+
+        else
+            ppu_data_we_n    <= '1';
+            plt_bus_ce_n <= '1';
+            ppu_clk_cnt_res_n <= '0';
+            oam_bus_ce_n     <= '1';
+            oam_addr_ce_n <= '1';
+
+            rd_n <= 'Z';
+            wr_n <= 'Z';
+            ale <= 'Z';
+            oam_plt_data <= (others => 'Z');
+            vram_ad <= (others => 'Z');
+            vram_a <= (others => 'Z');
+            cpu_d <= (others => 'Z');
+        end if;
+    end process;
+
+end rtl;
+
diff --git a/de1_nes/ppu/render.vhd b/de1_nes/ppu/render.vhd
new file mode 100644 (file)
index 0000000..616dd31
--- /dev/null
@@ -0,0 +1,1026 @@
+library ieee;
+use ieee.std_logic_1164.all;
+use ieee.std_logic_arith.conv_std_logic_vector;
+use ieee.std_logic_unsigned.all;
+
+entity ppu_render is 
+    port (  clk         : in std_logic;
+            rst_n       : in std_logic;
+            rd_n        : out std_logic;
+            wr_n        : out std_logic;
+            ale         : out std_logic;
+            vram_ad     : inout std_logic_vector (7 downto 0);
+            vram_a      : out std_logic_vector (13 downto 8);
+            pos_x       : out std_logic_vector (8 downto 0);
+            pos_y       : out std_logic_vector (8 downto 0);
+            r           : out std_logic_vector (3 downto 0);
+            g           : out std_logic_vector (3 downto 0);
+            b           : out std_logic_vector (3 downto 0);
+            ppu_ctrl        : in std_logic_vector (7 downto 0);
+            ppu_mask        : in std_logic_vector (7 downto 0);
+            read_status     : in std_logic;
+            ppu_status      : out std_logic_vector (7 downto 0);
+            ppu_scroll_x    : in std_logic_vector (7 downto 0);
+            ppu_scroll_y    : in std_logic_vector (7 downto 0);
+            r_nw            : in std_logic;
+            oam_bus_ce_n    : in std_logic;
+            plt_bus_ce_n    : in std_logic;
+            oam_plt_addr    : in std_logic_vector (7 downto 0);
+            oam_plt_data    : inout std_logic_vector (7 downto 0);
+            v_bus_busy_n    : out std_logic
+    );
+end ppu_render;
+
+architecture rtl of ppu_render is
+
+component counter_register
+    generic (
+        dsize       : integer := 8;
+        inc         : integer := 1
+    );
+    port (  clk         : in std_logic;
+            rst_n       : in std_logic;
+            ce_n        : in std_logic;
+            we_n        : in std_logic;
+            d           : in std_logic_vector(dsize - 1 downto 0);
+            q           : out std_logic_vector(dsize - 1 downto 0)
+    );
+end component;
+
+component shift_register
+    generic (
+        dsize : integer := 8;
+        shift : integer := 1
+    );
+    port (  clk         : in std_logic;
+            rst_n       : in std_logic;
+            ce_n        : in std_logic;
+            we_n        : in std_logic;
+            d           : in std_logic_vector(dsize - 1 downto 0);
+            q           : out std_logic_vector(dsize - 1 downto 0)
+    );
+end component;
+
+component d_flip_flop
+    generic (
+            dsize : integer := 8
+            );
+    port (  
+            clk     : in std_logic;
+            res_n   : in std_logic;
+            set_n   : in std_logic;
+            we_n    : in std_logic;
+            d       : in std_logic_vector (dsize - 1 downto 0);
+            q       : out std_logic_vector (dsize - 1 downto 0)
+        );
+end component;
+
+component tri_state_buffer
+    generic (
+            dsize : integer := 8
+            );
+    port (  
+            oe_n    : in std_logic;
+            d       : in std_logic_vector (dsize - 1 downto 0);
+            q       : out std_logic_vector (dsize - 1 downto 0)
+        );
+end component;
+
+component ram
+    generic (abus_size : integer := 16; dbus_size : integer := 8);
+    port (  ce_n, oe_n, we_n  : in std_logic;   --select pin active low.
+            addr              : in std_logic_vector (abus_size - 1 downto 0);
+            d_io              : inout std_logic_vector (dbus_size - 1 downto 0)
+    );
+end component;
+
+procedure d_print(msg : string) is
+use std.textio.all;
+--use ieee.std_logic_textio.all;
+variable out_l : line;
+begin
+--    write(out_l, msg);
+--    writeline(output, out_l);
+end  procedure;
+
+function conv_hex8(ival : integer) return string is
+variable tmp1, tmp2 : integer;
+variable hex_chr: string (1 to 16) := "0123456789abcdef";
+begin
+    tmp2 := (ival mod 16 ** 2) / 16 ** 1;
+    tmp1 := ival mod 16 ** 1;
+    return hex_chr(tmp2 + 1) & hex_chr(tmp1 + 1);
+end;
+
+function conv_hex8(ival : std_logic_vector) return string is
+begin
+    return conv_hex8(conv_integer(ival));
+end;
+
+function conv_hex16(ival : integer) return string is
+variable tmp1, tmp2 : integer;
+variable hex_chr: string (1 to 16) := "0123456789abcdef";
+begin
+    tmp2 := ival / 256;
+    tmp1 := ival mod 256;
+    return conv_hex8(tmp2) & conv_hex8(tmp1);
+end;
+
+function conv_hex16(ival : std_logic_vector) return string is
+begin
+    return conv_hex16(conv_integer(ival));
+end;
+
+constant X_SIZE       : integer := 9;
+constant dsize        : integer := 8;
+constant asize        : integer := 14;
+constant HSCAN_MAX    : integer := 341;
+constant VSCAN_MAX    : integer := 262;
+constant HSCAN        : integer := 256;
+constant VSCAN        : integer := 240;
+constant HSCAN_NEXT_START    : integer := 320;
+constant HSCAN_NEXT_EXTRA    : integer := 336;
+
+
+constant PPUBNA    : integer := 1;  --base name address
+constant PPUVAI    : integer := 2;  --vram address increment
+constant PPUSPA    : integer := 3;  --sprite pattern table address
+constant PPUBPA    : integer := 4;  --background pattern table address
+constant PPUSPS    : integer := 5;  --sprite size
+constant PPUMS     : integer := 6;  --ppu master/slave
+constant PPUNEN    : integer := 7;  --nmi enable
+
+constant PPUGS     : integer := 0;  --grayscale
+constant PPUSBL    : integer := 1;  --show 8 left most bg pixel
+constant PPUSSL    : integer := 2;  --show 8 left most sprite pixel
+constant PPUSBG    : integer := 3;  --show bg
+constant PPUSSP    : integer := 4;  --show sprie
+constant PPUIR     : integer := 5;  --intensify red
+constant PPUIG     : integer := 6;  --intensify green
+constant PPUIB     : integer := 7;  --intensify blue
+
+constant SPRHFL     : integer := 6;  --flip sprigte horizontally
+constant SPRVFL     : integer := 7;  --flip sprigte vertically
+
+constant ST_SOF     : integer := 5;  --sprite overflow
+constant ST_SP0     : integer := 6;  --sprite 0 hits
+constant ST_VBL     : integer := 7;  --vblank
+
+subtype nes_color_data  is std_logic_vector (11 downto 0);
+type nes_color_array    is array (0 to 63) of nes_color_data;
+--ref: http://hlc6502.web.fc2.com/NesPal2.htm
+constant nes_color_palette : nes_color_array := (
+        conv_std_logic_vector(16#777#, 12), 
+        conv_std_logic_vector(16#20b#, 12), 
+        conv_std_logic_vector(16#20b#, 12), 
+        conv_std_logic_vector(16#61a#, 12), 
+        conv_std_logic_vector(16#927#, 12), 
+        conv_std_logic_vector(16#b13#, 12), 
+        conv_std_logic_vector(16#a30#, 12), 
+        conv_std_logic_vector(16#740#, 12), 
+        conv_std_logic_vector(16#450#, 12), 
+        conv_std_logic_vector(16#360#, 12), 
+        conv_std_logic_vector(16#360#, 12), 
+        conv_std_logic_vector(16#364#, 12), 
+        conv_std_logic_vector(16#358#, 12), 
+        conv_std_logic_vector(16#000#, 12), 
+        conv_std_logic_vector(16#000#, 12), 
+        conv_std_logic_vector(16#000#, 12),
+        conv_std_logic_vector(16#bbb#, 12), 
+        conv_std_logic_vector(16#46f#, 12), 
+        conv_std_logic_vector(16#44f#, 12), 
+        conv_std_logic_vector(16#94f#, 12), 
+        conv_std_logic_vector(16#d4c#, 12), 
+        conv_std_logic_vector(16#d46#, 12), 
+        conv_std_logic_vector(16#e50#, 12), 
+        conv_std_logic_vector(16#c70#, 12), 
+        conv_std_logic_vector(16#880#, 12), 
+        conv_std_logic_vector(16#5a0#, 12), 
+        conv_std_logic_vector(16#4a1#, 12), 
+        conv_std_logic_vector(16#4a6#, 12), 
+        conv_std_logic_vector(16#49c#, 12), 
+        conv_std_logic_vector(16#000#, 12), 
+        conv_std_logic_vector(16#000#, 12), 
+        conv_std_logic_vector(16#000#, 12),
+        conv_std_logic_vector(16#fff#, 12), 
+        conv_std_logic_vector(16#6af#, 12), 
+        conv_std_logic_vector(16#58f#, 12), 
+        conv_std_logic_vector(16#a7f#, 12), 
+        conv_std_logic_vector(16#f6f#, 12), 
+        conv_std_logic_vector(16#f6b#, 12), 
+        conv_std_logic_vector(16#f73#, 12), 
+        conv_std_logic_vector(16#fa0#, 12), 
+        conv_std_logic_vector(16#ed2#, 12), 
+        conv_std_logic_vector(16#9e0#, 12), 
+        conv_std_logic_vector(16#7f4#, 12), 
+        conv_std_logic_vector(16#7e9#, 12), 
+        conv_std_logic_vector(16#6de#, 12), 
+        conv_std_logic_vector(16#777#, 12), 
+        conv_std_logic_vector(16#000#, 12), 
+        conv_std_logic_vector(16#000#, 12),
+        conv_std_logic_vector(16#fff#, 12), 
+        conv_std_logic_vector(16#9df#, 12), 
+        conv_std_logic_vector(16#abf#, 12), 
+        conv_std_logic_vector(16#cbf#, 12), 
+        conv_std_logic_vector(16#ebf#, 12), 
+        conv_std_logic_vector(16#fbe#, 12), 
+        conv_std_logic_vector(16#fcb#, 12), 
+        conv_std_logic_vector(16#fda#, 12), 
+        conv_std_logic_vector(16#ff9#, 12), 
+        conv_std_logic_vector(16#cf8#, 12), 
+        conv_std_logic_vector(16#afa#, 12), 
+        conv_std_logic_vector(16#afc#, 12), 
+        conv_std_logic_vector(16#aff#, 12), 
+        conv_std_logic_vector(16#aaa#, 12), 
+        conv_std_logic_vector(16#000#, 12), 
+        conv_std_logic_vector(16#000#, 12)
+        );
+
+signal clk_n            : std_logic;
+
+--timing adjust
+signal io_cnt           : std_logic_vector(0 downto 0);
+
+--vram i/o
+signal io_oe_n          : std_logic;
+signal d_oe_n           : std_logic;
+
+signal cnt_x_en_n    : std_logic;
+signal cnt_x_res_n   : std_logic;
+signal cnt_y_en_n    : std_logic;
+signal cnt_y_res_n   : std_logic;
+
+signal cur_x            : std_logic_vector(X_SIZE - 1 downto 0);
+signal cur_y            : std_logic_vector(X_SIZE - 1 downto 0);
+signal next_x           : std_logic_vector(X_SIZE - 1 downto 0);
+signal next_y           : std_logic_vector(X_SIZE - 1 downto 0);
+
+signal nt_we_n          : std_logic;
+signal disp_nt          : std_logic_vector (dsize - 1 downto 0);
+
+signal attr_ce_n        : std_logic;
+signal attr_we_n        : std_logic;
+signal attr_val         : std_logic_vector (dsize - 1 downto 0);
+signal disp_attr_we_n   : std_logic;
+signal disp_attr        : std_logic_vector (dsize - 1 downto 0);
+
+signal ptn_en_n         : std_logic;
+
+signal ptn_l_we_n       : std_logic;
+signal ptn_l_in         : std_logic_vector (dsize - 1 downto 0);
+signal ptn_l_val        : std_logic_vector (dsize - 1 downto 0);
+signal disp_ptn_l_in    : std_logic_vector (dsize * 2 - 1 downto 0);
+signal disp_ptn_l       : std_logic_vector (dsize * 2 - 1 downto 0);
+
+signal ptn_h_we_n       : std_logic;
+signal ptn_h_in         : std_logic_vector (dsize * 2 - 1 downto 0);
+signal disp_ptn_h       : std_logic_vector (dsize * 2 - 1 downto 0);
+
+--signals for palette / oam access from cpu
+signal r_n              : std_logic;
+signal vram_addr        : std_logic_vector (asize - 1 downto 0);
+
+--palette
+signal plt_ram_ce_n     : std_logic;
+signal plt_r_n          : std_logic;
+signal plt_w_n          : std_logic;
+signal plt_addr         : std_logic_vector (4 downto 0);
+signal plt_data         : std_logic_vector (dsize - 1 downto 0);
+
+--primari / secondary oam
+signal oam_ram_ce_n     : std_logic;
+signal oam_r_n          : std_logic;
+signal oam_w_n          : std_logic;
+signal oam_addr         : std_logic_vector (dsize - 1 downto 0);
+signal oam_data         : std_logic_vector (dsize - 1 downto 0);
+
+signal s_oam_ram_ce_n   : std_logic;
+signal s_oam_r_n        : std_logic;
+signal s_oam_w_n        : std_logic;
+signal s_oam_addr_cpy_ce_n      : std_logic;
+signal s_oam_addr_cpy_n         : std_logic;
+signal s_oam_addr       : std_logic_vector (4 downto 0);
+signal s_oam_addr_cpy   : std_logic_vector (4 downto 0);
+signal s_oam_data       : std_logic_vector (dsize - 1 downto 0);
+
+signal p_oam_cnt_res_n  : std_logic;
+signal p_oam_cnt_ce_n   : std_logic;
+signal p_oam_cnt_wrap_n : std_logic;
+signal s_oam_cnt_ce_n   : std_logic;
+signal p_oam_cnt        : std_logic_vector (dsize - 1 downto 0);
+signal s_oam_cnt        : std_logic_vector (4 downto 0);
+signal p_oam_addr_in    : std_logic_vector (dsize - 1 downto 0);
+signal oam_ev_status    : std_logic_vector (2 downto 0);
+
+--oam evaluation status
+constant EV_STAT_COMP       : std_logic_vector (2 downto 0) := "000";
+constant EV_STAT_CP1        : std_logic_vector (2 downto 0) := "001";
+constant EV_STAT_CP2        : std_logic_vector (2 downto 0) := "010";
+constant EV_STAT_CP3        : std_logic_vector (2 downto 0) := "011";
+constant EV_STAT_PRE_COMP   : std_logic_vector (2 downto 0) := "100";
+
+----------sprite registers.
+type oam_pin_array    is array (0 to 7) of std_logic;
+type oam_reg_array    is array (0 to 7) of std_logic_vector (dsize - 1 downto 0);
+
+signal spr_x_we_n       : oam_pin_array;
+signal spr_x_ce_n       : oam_pin_array;
+signal spr_attr_we_n    : oam_pin_array;
+signal spr_ptn_l_we_n   : oam_pin_array;
+signal spr_ptn_h_we_n   : oam_pin_array;
+signal spr_ptn_ce_n     : oam_pin_array;
+
+signal spr_x_cnt        : oam_reg_array;
+signal spr_attr         : oam_reg_array;
+signal spr_ptn_l        : oam_reg_array;
+signal spr_ptn_h        : oam_reg_array;
+
+signal spr_y_we_n       : std_logic;
+signal spr_tile_we_n    : std_logic;
+signal spr_y_tmp        : std_logic_vector (dsize - 1 downto 0);
+signal spr_tile_tmp     : std_logic_vector (dsize - 1 downto 0);
+signal spr_ptn_in       : std_logic_vector (dsize - 1 downto 0);
+
+
+begin
+
+    clk_n <= not clk;
+
+    cnt_x_en_n <= '0';
+
+    ale <= io_cnt(0) when ppu_mask(PPUSBG) = '1' and
+                (cur_y < conv_std_logic_vector(VSCAN, X_SIZE) or 
+                cur_y = conv_std_logic_vector(VSCAN_MAX - 1, X_SIZE)) else
+           io_cnt(0) when ppu_mask(PPUSSP) = '1' and
+                (cur_y < conv_std_logic_vector(VSCAN, X_SIZE) or 
+                cur_y = conv_std_logic_vector(VSCAN_MAX - 1, X_SIZE)) else
+           'Z';
+    rd_n <= io_cnt(0) when ppu_mask(PPUSBG) = '1' and
+                (cur_y < conv_std_logic_vector(VSCAN, X_SIZE) or 
+                cur_y = conv_std_logic_vector(VSCAN_MAX - 1, X_SIZE)) else
+            io_cnt(0) when ppu_mask(PPUSSP) = '1' and
+                (cur_y < conv_std_logic_vector(VSCAN, X_SIZE) or 
+                cur_y = conv_std_logic_vector(VSCAN_MAX - 1, X_SIZE)) else
+            'Z';
+    wr_n <= '1' when ppu_mask(PPUSBG) = '1' and
+                (cur_y < conv_std_logic_vector(VSCAN, X_SIZE) or 
+                cur_y = conv_std_logic_vector(VSCAN_MAX - 1, X_SIZE)) else
+            '1' when ppu_mask(PPUSSP) = '1' and
+                (cur_y < conv_std_logic_vector(VSCAN, X_SIZE) or 
+                cur_y = conv_std_logic_vector(VSCAN_MAX - 1, X_SIZE)) else
+            'Z';
+    io_oe_n <= not io_cnt(0) when ppu_mask(PPUSBG) = '1' and
+                (cur_y < conv_std_logic_vector(VSCAN, X_SIZE) or 
+                cur_y = conv_std_logic_vector(VSCAN_MAX - 1, X_SIZE)) else
+               not io_cnt(0) when ppu_mask(PPUSSP) = '1' and
+                (cur_y < conv_std_logic_vector(VSCAN, X_SIZE) or 
+                cur_y = conv_std_logic_vector(VSCAN_MAX - 1, X_SIZE)) else
+               '1';
+    d_oe_n <= '0' when ppu_mask(PPUSBG) = '1' and
+                (cur_y < conv_std_logic_vector(VSCAN, X_SIZE) or 
+                cur_y = conv_std_logic_vector(VSCAN_MAX - 1, X_SIZE)) else
+              '0' when ppu_mask(PPUSSP) = '1' and
+                (cur_y < conv_std_logic_vector(VSCAN, X_SIZE) or 
+                cur_y = conv_std_logic_vector(VSCAN_MAX - 1, X_SIZE)) else
+              '1';
+    v_bus_busy_n <= d_oe_n;
+
+    io_cnt_inst : counter_register generic map (1, 1)
+            port map (clk, cnt_x_res_n, '0', '1', (others => '0'), io_cnt);
+
+    ---x pos is 8 cycle ahead of current pos.
+    next_x <= cur_x + "000010000" 
+                    when cur_x <  conv_std_logic_vector(HSCAN_NEXT_START, X_SIZE) else
+              cur_x + "011000000";
+    next_y <= cur_y 
+                    when cur_x <=  conv_std_logic_vector(HSCAN, X_SIZE) else
+              "000000000" 
+                    when cur_y = conv_std_logic_vector(VSCAN_MAX - 1, X_SIZE) else
+              cur_y + "000000001";
+
+    --current x,y pos
+    cur_x_inst : counter_register generic map (X_SIZE, 1)
+            port map (clk_n, cnt_x_res_n, cnt_x_en_n, '1', (others => '0'), cur_x);
+    cur_y_inst : counter_register generic map (X_SIZE, 1)
+            port map (clk_n, cnt_y_res_n, cnt_y_en_n, '1', (others => '0'), cur_y);
+
+    nt_inst : d_flip_flop generic map(dsize)
+            port map (clk_n, rst_n, '1', nt_we_n, vram_ad, disp_nt);
+
+    at_inst : d_flip_flop generic map(dsize)
+            port map (clk_n, rst_n, '1', attr_we_n, vram_ad, attr_val);
+
+    disp_at_inst : shift_register generic map(dsize, 2)
+            port map (clk_n, rst_n, attr_ce_n, disp_attr_we_n, attr_val, disp_attr);
+
+    --chr rom data's bit is stored in opposite direction.
+    --reverse bit when loading...
+    ptn_l_in <= (vram_ad(0) & vram_ad(1) & vram_ad(2) & vram_ad(3) & 
+                 vram_ad(4) & vram_ad(5) & vram_ad(6) & vram_ad(7));
+    ptn_h_in <= (vram_ad(0) & vram_ad(1) & vram_ad(2) & vram_ad(3) & 
+                 vram_ad(4) & vram_ad(5) & vram_ad(6) & vram_ad(7)) & 
+                disp_ptn_h (dsize downto 1);
+
+    ptn_en_n <= '1' when cur_x = conv_std_logic_vector(0, X_SIZE) else
+                '0' when cur_x <= conv_std_logic_vector(HSCAN_NEXT_EXTRA, X_SIZE) else
+                '1';
+
+    ptn_l_inst : d_flip_flop generic map(dsize)
+            port map (clk_n, rst_n, '1', ptn_l_we_n, ptn_l_in, ptn_l_val);
+
+    disp_ptn_l_in <= ptn_l_val & disp_ptn_l (dsize downto 1);
+    disp_ptn_l_inst : shift_register generic map(dsize * 2, 1)
+            port map (clk_n, rst_n, ptn_en_n, ptn_h_we_n, disp_ptn_l_in, disp_ptn_l);
+
+    ptn_h_inst : shift_register generic map(dsize * 2, 1)
+            port map (clk_n, rst_n, ptn_en_n, ptn_h_we_n, ptn_h_in, disp_ptn_h);
+
+    --vram i/o
+    vram_io_buf : tri_state_buffer generic map (dsize)
+            port map (io_oe_n, vram_addr(dsize - 1 downto 0), vram_ad);
+
+    vram_a_buf : tri_state_buffer generic map (6)
+            port map (d_oe_n, vram_addr(asize - 1 downto dsize), vram_a);
+
+    pos_x <= cur_x;
+    pos_y <= cur_y;
+
+    ---palette ram
+    r_n <= not r_nw;
+
+    plt_ram_ce_n <= clk when plt_bus_ce_n = '0' and r_nw = '0' else 
+                    '0' when plt_bus_ce_n = '0' and r_nw = '1' else
+                    '0' when ppu_mask(PPUSBG) = '1' and 
+                            (cur_x < conv_std_logic_vector(HSCAN, X_SIZE)) and 
+                            (cur_y < conv_std_logic_vector(VSCAN, X_SIZE)) else
+                    '1';
+
+    plt_addr <= oam_plt_addr(4 downto 0) when plt_bus_ce_n = '0' else
+                "1" & spr_attr(0)(1 downto 0) & spr_ptn_h(0)(0) & spr_ptn_l(0)(0)
+                    when ppu_mask(PPUSSP) = '1' and
+                        (cur_x < conv_std_logic_vector(HSCAN, X_SIZE)) and
+                        (cur_y < conv_std_logic_vector(VSCAN, X_SIZE)) and
+                        spr_x_cnt(0) = "00000000" and 
+                        (spr_ptn_h(0)(0) or spr_ptn_l(0)(0)) = '1' else
+                "1" & spr_attr(1)(1 downto 0) & spr_ptn_h(1)(0) & spr_ptn_l(1)(0)
+                    when ppu_mask(PPUSSP) = '1' and
+                        (cur_x < conv_std_logic_vector(HSCAN, X_SIZE)) and
+                        (cur_y < conv_std_logic_vector(VSCAN, X_SIZE)) and
+                        spr_x_cnt(1) = "00000000" and 
+                        (spr_ptn_h(1)(0) or spr_ptn_l(1)(0)) = '1' else
+                "1" & spr_attr(2)(1 downto 0) & spr_ptn_h(2)(0) & spr_ptn_l(2)(0)
+                    when ppu_mask(PPUSSP) = '1' and 
+                        (cur_x < conv_std_logic_vector(HSCAN, X_SIZE)) and
+                        (cur_y < conv_std_logic_vector(VSCAN, X_SIZE)) and
+                        spr_x_cnt(2) = "00000000" and
+                        (spr_ptn_h(2)(0) or spr_ptn_l(2)(0)) = '1' else
+                "1" & spr_attr(3)(1 downto 0) & spr_ptn_h(3)(0) & spr_ptn_l(3)(0)
+                    when ppu_mask(PPUSSP) = '1' and
+                        (cur_x < conv_std_logic_vector(HSCAN, X_SIZE)) and
+                        (cur_y < conv_std_logic_vector(VSCAN, X_SIZE)) and
+                        spr_x_cnt(3) = "00000000" and
+                        (spr_ptn_h(3)(0) or spr_ptn_l(3)(0)) = '1' else
+                "1" & spr_attr(4)(1 downto 0) & spr_ptn_h(4)(0) & spr_ptn_l(4)(0)
+                    when ppu_mask(PPUSSP) = '1' and
+                        (cur_x < conv_std_logic_vector(HSCAN, X_SIZE)) and
+                        (cur_y < conv_std_logic_vector(VSCAN, X_SIZE)) and
+                        spr_x_cnt(4) = "00000000" and
+                        (spr_ptn_h(4)(0) or spr_ptn_l(4)(0)) = '1' else
+                "1" & spr_attr(5)(1 downto 0) & spr_ptn_h(5)(0) & spr_ptn_l(5)(0)
+                    when ppu_mask(PPUSSP) = '1' and
+                        (cur_x < conv_std_logic_vector(HSCAN, X_SIZE)) and
+                        (cur_y < conv_std_logic_vector(VSCAN, X_SIZE)) and
+                        spr_x_cnt(5) = "00000000" and
+                        (spr_ptn_h(5)(0) or spr_ptn_l(5)(0)) = '1' else
+                "1" & spr_attr(6)(1 downto 0) & spr_ptn_h(6)(0) & spr_ptn_l(6)(0)
+                    when ppu_mask(PPUSSP) = '1' and
+                        (cur_x < conv_std_logic_vector(HSCAN, X_SIZE)) and
+                        (cur_y < conv_std_logic_vector(VSCAN, X_SIZE)) and
+                        spr_x_cnt(6) = "00000000" and
+                        (spr_ptn_h(6)(0) or spr_ptn_l(6)(0)) = '1' else
+                "1" & spr_attr(7)(1 downto 0) & spr_ptn_h(7)(0) & spr_ptn_l(7)(0)
+                    when ppu_mask(PPUSSP) = '1' and
+                        (cur_x < conv_std_logic_vector(HSCAN, X_SIZE)) and
+                        (cur_y < conv_std_logic_vector(VSCAN, X_SIZE)) and
+                        spr_x_cnt(7) = "00000000" and
+                        (spr_ptn_h(7)(0) or spr_ptn_l(7)(0)) = '1' else
+                "0" & disp_attr(1 downto 0) & disp_ptn_h(0) & disp_ptn_l(0) 
+                    when ppu_mask(PPUSBG) = '1' and cur_y(4) = '0' and
+                        (cur_x < conv_std_logic_vector(HSCAN, X_SIZE)) and
+                        (cur_y < conv_std_logic_vector(VSCAN, X_SIZE)) else
+                "0" & disp_attr(5 downto 4) & disp_ptn_h(0) & disp_ptn_l(0)
+                    when ppu_mask(PPUSBG) = '1' and cur_y(4) = '1' and
+                        (cur_x < conv_std_logic_vector(HSCAN, X_SIZE)) and
+                        (cur_y < conv_std_logic_vector(VSCAN, X_SIZE)) else
+                (others => 'Z');
+
+    plt_r_n <= not r_nw when plt_bus_ce_n = '0' else
+                '0' when ppu_mask(PPUSBG) = '1' else
+                '1';
+    plt_w_n <= r_nw when plt_bus_ce_n = '0' else
+                '1';
+    plt_d_buf_w : tri_state_buffer generic map (dsize)
+            port map (r_nw, oam_plt_data, plt_data);
+    plt_d_buf_r : tri_state_buffer generic map (dsize)
+            port map (r_n, plt_data, oam_plt_data);
+    palette_inst : ram generic map (5, dsize)
+            port map (plt_ram_ce_n, plt_r_n, plt_w_n, plt_addr, plt_data);
+
+    ---primary oam
+    oam_ram_ce_n <= clk when oam_bus_ce_n = '0' and r_nw = '0' else
+                    '0' when oam_bus_ce_n = '0' and r_nw = '1' else
+                    '0' when ppu_mask(PPUSSP) = '1' and
+                             cur_x > conv_std_logic_vector(64, X_SIZE) and
+                             cur_x <= conv_std_logic_vector(256, X_SIZE) and
+                             p_oam_cnt_wrap_n = '1' else
+                    '1';
+    oam_addr <= oam_plt_addr when oam_bus_ce_n = '0' else
+                p_oam_addr_in when ppu_mask(PPUSSP) = '1' and 
+                        (cur_y < conv_std_logic_vector(VSCAN, X_SIZE) or 
+                        cur_y = conv_std_logic_vector(VSCAN_MAX - 1, X_SIZE)) and
+                         cur_x > conv_std_logic_vector(64, X_SIZE) and 
+                         cur_x <= conv_std_logic_vector(256, X_SIZE) else
+                (others => 'Z');
+    oam_r_n <= not r_nw when oam_bus_ce_n = '0' else
+                '0' when ppu_mask(PPUSSP) = '1' and 
+                        (cur_y < conv_std_logic_vector(VSCAN, X_SIZE) or 
+                        cur_y = conv_std_logic_vector(VSCAN_MAX - 1, X_SIZE)) and
+                         cur_x > conv_std_logic_vector(64, X_SIZE) and 
+                         cur_x <= conv_std_logic_vector(256, X_SIZE) else
+                '1';
+    oam_w_n <= r_nw when oam_bus_ce_n = '0' else
+                '1';
+    oam_d_buf_w : tri_state_buffer generic map (dsize)
+            port map (r_nw, oam_plt_data, oam_data);
+    oam_d_buf_r : tri_state_buffer generic map (dsize)
+            port map (r_n, oam_data, oam_plt_data);
+    primary_oam_inst : ram generic map (dsize, dsize)
+            port map (oam_ram_ce_n, oam_r_n, oam_w_n, oam_addr, oam_data);
+
+    ---secondary oam
+    p_oam_cnt_inst : counter_register generic map (dsize, 4)
+            port map (clk_n, p_oam_cnt_res_n, p_oam_cnt_ce_n, '1', (others => '0'), p_oam_cnt);
+    s_oam_cnt_inst : counter_register generic map (5, 1)
+            port map (clk_n, p_oam_cnt_res_n, s_oam_cnt_ce_n, '1', (others => '0'), s_oam_cnt);
+    s_oam_addr_cpy_inst : counter_register generic map (5, 1)
+            port map (clk_n, p_oam_cnt_res_n, s_oam_addr_cpy_ce_n, 
+                    '1', (others => '0'), s_oam_addr_cpy);
+
+    s_oam_ram_ce_n <= clk when ppu_mask(PPUSSP) = '1' and cur_x(0) = '1' and
+                                cur_x > "000000001" and
+                                cur_x <= conv_std_logic_vector(64, X_SIZE) else
+                      clk when ppu_mask(PPUSSP) = '1' and cur_x(0) = '1' and
+                                cur_x > conv_std_logic_vector(64, X_SIZE) and
+                                cur_x <= conv_std_logic_vector(256, X_SIZE) and
+                                p_oam_cnt_wrap_n = '1' else
+                      '0' when ppu_mask(PPUSSP) = '1' and
+                                cur_x > conv_std_logic_vector(256, X_SIZE) and
+                                cur_x <= conv_std_logic_vector(320, X_SIZE) and
+                                s_oam_addr_cpy_n = '0' else
+                    '1';
+
+    secondary_oam_inst : ram generic map (5, dsize)
+            port map (s_oam_ram_ce_n, s_oam_r_n, s_oam_w_n, s_oam_addr, s_oam_data);
+
+    spr_y_inst : d_flip_flop generic map(dsize)
+            port map (clk_n, p_oam_cnt_res_n, '1', spr_y_we_n, s_oam_data, spr_y_tmp);
+    spr_tile_inst : d_flip_flop generic map(dsize)
+            port map (clk_n, p_oam_cnt_res_n, '1', spr_tile_we_n, s_oam_data, spr_tile_tmp);
+
+
+   --reverse bit when NOT SPRHFL is set (.nes file format bit endian).
+   spr_ptn_in <= vram_ad when spr_attr(conv_integer(s_oam_addr_cpy(4 downto 2)))(SPRHFL) = '1' else
+                (vram_ad(0) & vram_ad(1) & vram_ad(2) & vram_ad(3) & 
+                 vram_ad(4) & vram_ad(5) & vram_ad(6) & vram_ad(7));
+    --array instances...
+    spr_inst : for i in 0 to 7 generate
+        spr_x_inst : counter_register generic map(dsize, 16#ff#)
+                port map (clk_n, rst_n, spr_x_ce_n(i), spr_x_we_n(i), s_oam_data, spr_x_cnt(i));
+
+        spr_attr_inst : d_flip_flop generic map(dsize)
+                port map (clk_n, rst_n, '1', spr_attr_we_n(i), s_oam_data, spr_attr(i));
+
+        spr_ptn_l_inst : shift_register generic map(dsize, 1)
+                port map (clk_n, rst_n, spr_ptn_ce_n(i), spr_ptn_l_we_n(i), spr_ptn_in, spr_ptn_l(i));
+
+        spr_ptn_h_inst : shift_register generic map(dsize, 1)
+                port map (clk_n, rst_n, spr_ptn_ce_n(i), spr_ptn_h_we_n(i), spr_ptn_in, spr_ptn_h(i));
+    end generate;
+
+    clk_p : process (rst_n, clk, read_status)
+
+procedure output_rgb is
+variable pl_addr : integer;
+variable pl_index : integer;
+variable dot_output : boolean;
+begin
+    dot_output := false;
+
+    --first show sprite.
+    if (ppu_mask(PPUSSP) = '1') then
+        for i in 0 to 7 loop
+            if (spr_x_cnt(i) = "00000000") then
+                if ((spr_ptn_h(i)(0) or spr_ptn_l(i)(0)) = '1') then
+                    dot_output := true;
+                    exit;
+                end if;
+            end if;
+        end loop;
+    end if;
+
+    if (dot_output = true and ppu_mask(PPUSBG) = '1' and 
+            (disp_ptn_h(0) or disp_ptn_l(0)) = '1') then
+        --raise sprite 0 hit.
+        ppu_status(ST_SP0) <= '1';
+    end if;
+
+    --first color in the palette is transparent color.
+    if (ppu_mask(PPUSBG) = '1' and dot_output = false and 
+            (disp_ptn_h(0) or disp_ptn_l(0)) = '1') then
+        dot_output := true;
+--        d_print("output_rgb");
+--        d_print("pl_addr:" & conv_hex8(pl_addr));
+--        d_print("pl_index:" & conv_hex8(pl_index));
+--        d_print("rgb:" &
+--            conv_hex16(nes_color_palette(pl_index)));
+    end if;
+
+    if (dot_output = true) then
+        pl_index := conv_integer(plt_data(5 downto 0));
+        b <= nes_color_palette(pl_index) (11 downto 8);
+        g <= nes_color_palette(pl_index) (7 downto 4);
+        r <= nes_color_palette(pl_index) (3 downto 0);
+    else
+        b <= (others => '0');
+        g <= (others => '0');
+        r <= (others => '0');
+    end if; --if (dot_output = false) then
+end;
+
+    begin
+        if (rst_n = '0') then
+            cnt_x_res_n <= '0';
+            cnt_y_res_n <= '0';
+            nt_we_n <= '1';
+
+            ppu_status <= (others => '0');
+
+            b <= (others => '0');
+            g <= (others => '0');
+            r <= (others => '0');
+        else
+            if (clk'event) then
+                --x pos reset.
+                if (clk = '0' and 
+                        cur_x = conv_std_logic_vector(HSCAN_MAX - 1, X_SIZE)) then
+                    cnt_x_res_n <= '0';
+
+                    --y pos reset.
+                    if (cur_y = conv_std_logic_vector(VSCAN_MAX - 1, X_SIZE)) then
+                        cnt_y_res_n <= '0';
+                    else
+                        cnt_y_res_n <= '1';
+                    end if;
+                else
+                    cnt_x_res_n <= '1';
+                    cnt_y_res_n <= '1';
+                end if;
+            end if; --if (clk'event) then
+
+            if (clk'event and clk = '1') then
+                --y pos increment.
+                if (cur_x = conv_std_logic_vector(HSCAN_MAX - 1, X_SIZE)) then
+                    cnt_y_en_n <= '0';
+                else
+                    cnt_y_en_n <= '1';
+                end if;
+            end if; --if (clk'event) then
+
+            if (clk'event and clk = '0') then
+                d_print("-");
+            end if;
+
+            if (clk'event and clk = '1') then
+
+                --fetch bg pattern and display.
+                if (ppu_mask(PPUSBG) = '1' and 
+                        (cur_y < conv_std_logic_vector(VSCAN, X_SIZE) or 
+                        cur_y = conv_std_logic_vector(VSCAN_MAX - 1, X_SIZE))) then
+                    d_print("*");
+                    d_print("cur_x: " & conv_hex16(conv_integer(cur_x)));
+                    d_print("cur_y: " & conv_hex16(conv_integer(cur_y)));
+
+                    ----fetch next tile byte.
+                    if (cur_x (2 downto 0) = "001" ) then
+                        --vram addr is incremented every 8 cycle.
+                        --name table at 0x2000
+                        vram_addr(9 downto 0) 
+                            <= next_y(dsize - 1 downto 3) 
+                                & next_x(dsize - 1 downto 3);
+                        vram_addr(asize - 1 downto 10) <= "10" & ppu_ctrl(PPUBNA downto 0);
+                    end if;
+                    if (cur_x (2 downto 0) = "010" ) then
+                        nt_we_n <= '0';
+                    else
+                        nt_we_n <= '1';
+                    end if;
+
+                    ----fetch attr table byte.
+                    if (cur_x (4 downto 0) = "00011" ) then
+                        --attribute table is loaded every 32 cycle.
+                        --attr table at 0x23c0
+                        vram_addr(dsize - 1 downto 0) <= "11000000" +
+                                ("00" & next_y(7 downto 5) & next_x(7 downto 5));
+                        vram_addr(asize - 1 downto dsize) <= "10" &
+                                ppu_ctrl(PPUBNA downto 0) & "11";
+                    end if;--if (cur_x (2 downto 0) = "010" ) then
+                    if (cur_x (4 downto 0) = "00100" ) then
+                        attr_we_n <= '0';
+                    else
+                        attr_we_n <= '1';
+                    end if;
+                    if (cur_x (4 downto 0) = "00000" ) then
+                        disp_attr_we_n <= '0';
+                    else
+                        disp_attr_we_n <= '1';
+                    end if;
+                    ---attribute is shifted every 16 bit.
+                    if (cur_x (3 downto 0) = "0000" ) then
+                        attr_ce_n <= '0';
+                    else
+                        attr_ce_n <= '1';
+                    end if;
+                    
+                    --visible area bg image
+                    if ((cur_x <= conv_std_logic_vector(HSCAN, X_SIZE)) or
+                        cur_x > conv_std_logic_vector(HSCAN_NEXT_START, X_SIZE)) then
+
+                        ----fetch pattern table low byte.
+                        if (cur_x (2 downto 0) = "101" ) then
+                            --vram addr is incremented every 8 cycle.
+                            vram_addr <= "0" & ppu_ctrl(PPUBPA) & 
+                                            disp_nt(dsize - 1 downto 0) 
+                                                & "0"  & next_y(2  downto 0);
+                        end if;--if (cur_x (2 downto 0) = "100" ) then
+                        if (cur_x (2 downto 0) = "110" ) then
+                            ptn_l_we_n <= '0';
+                        else
+                            ptn_l_we_n <= '1';
+                        end if;
+
+                        ----fetch pattern table high byte.
+                        if (cur_x (2 downto 0) = "111" ) then
+                            --vram addr is incremented every 8 cycle.
+                            vram_addr <= "0" & ppu_ctrl(PPUBPA) & 
+                                            disp_nt(dsize - 1 downto 0) 
+                                                & "0"  & next_y(2  downto 0) + "00000000001000";
+                        end if; --if (cur_x (2 downto 0) = "110" ) then
+                        if (cur_x (2 downto 0) = "000" and cur_x /= "000000000") then
+                            ptn_h_we_n <= '0';
+                        else
+                            ptn_h_we_n <= '1';
+                        end if;--if (cur_x (2 downto 0) = "001" ) then
+                    end if; --if (cur_x <= conv_std_logic_vector(HSCAN, X_SIZE)) and
+                end if;--if (ppu_mask(PPUSBG) = '1') then
+
+                --fetch sprite and display.
+                if (ppu_mask(PPUSSP) = '1' and
+                        (cur_y < conv_std_logic_vector(VSCAN, X_SIZE) or 
+                        cur_y = conv_std_logic_vector(VSCAN_MAX - 1, X_SIZE))) then
+                    --secondary oam clear
+                    if (cur_x /= "000000000" and cur_x <= conv_std_logic_vector(64, X_SIZE)) then
+                        if (cur_x(0) = '0') then
+                            --write secondary oam on even cycle
+                            s_oam_r_n <= '1';
+                            s_oam_w_n <= '0';
+                            s_oam_addr <= cur_x(5 downto 1);
+                            s_oam_data <= (others => '1');
+                        end if;
+                        p_oam_cnt_res_n <= '0';
+                        p_oam_cnt_ce_n <= '1';
+                        s_oam_cnt_ce_n <= '1';
+                        p_oam_cnt_wrap_n <= '1';
+                        oam_ev_status <= EV_STAT_COMP;
+
+                    --sprite evaluation and secondary oam copy.
+                    elsif (cur_x > conv_std_logic_vector(64, X_SIZE) and 
+                            cur_x <= conv_std_logic_vector(256, X_SIZE)) then
+                        p_oam_cnt_res_n <= '1';
+
+                        --TODO: sprite evaluation is simplified!!
+                        --not complying the original NES spec at
+                        --http://wiki.nesdev.com/w/index.php/PPU_sprite_evaluation
+                        --e.g., when overflow happens, it just ignore subsequent entry.
+                        --old secondary sprite entry.
+                        if (p_oam_cnt = "00000000" and cur_x > conv_std_logic_vector(192, X_SIZE)) then
+                            p_oam_cnt_wrap_n <= '0';
+                        end if;
+
+                        --odd cycle copy from primary oam
+                        if (cur_x(0) = '1') then
+                            if (oam_ev_status = EV_STAT_COMP) then
+                                p_oam_addr_in <= p_oam_cnt;
+                                p_oam_cnt_ce_n <= '1';
+                                s_oam_cnt_ce_n <= '1';
+                            elsif (oam_ev_status = EV_STAT_CP1) then
+                                p_oam_addr_in <= p_oam_cnt + "00000001";
+                                s_oam_cnt_ce_n <= '1';
+
+                            elsif (oam_ev_status = EV_STAT_CP2) then
+                                p_oam_addr_in <= p_oam_cnt + "00000010";
+                                s_oam_cnt_ce_n <= '1';
+
+                            elsif (oam_ev_status = EV_STAT_CP3) then
+                                oam_ev_status <= EV_STAT_PRE_COMP;
+                                p_oam_addr_in <= p_oam_cnt + "00000011";
+                                s_oam_cnt_ce_n <= '1';
+                            end if;
+                        else
+                        --even cycle copy to secondary oam (if y is in range.)
+                            s_oam_r_n <= '1';
+                            s_oam_w_n <= '0';
+                            s_oam_addr <= s_oam_cnt;
+                            s_oam_data <= oam_data;
+
+                            if (oam_ev_status = EV_STAT_COMP) then
+                                --check y range.
+                                if (cur_y < "000000110" and oam_data <= cur_y + "000000001") or 
+                                    (cur_y >= "000000110" and oam_data <= cur_y + "000000001" and 
+                                             oam_data >= cur_y - "000000110") then
+                                    oam_ev_status <= EV_STAT_CP1;
+                                    s_oam_cnt_ce_n <= '0';
+                                    --copy remaining oam entry.
+                                    p_oam_cnt_ce_n <= '1';
+                                else
+                                    --goto next entry
+                                    p_oam_cnt_ce_n <= '0';
+                                end if;
+                            elsif (oam_ev_status = EV_STAT_CP1) then
+                                s_oam_cnt_ce_n <= '0';
+                                oam_ev_status <= EV_STAT_CP2;
+                            elsif (oam_ev_status = EV_STAT_CP2) then
+                                s_oam_cnt_ce_n <= '0';
+                                oam_ev_status <= EV_STAT_CP3;
+                            elsif (oam_ev_status = EV_STAT_CP3) then
+                                s_oam_cnt_ce_n <= '0';
+                            elsif (oam_ev_status = EV_STAT_PRE_COMP) then
+                                oam_ev_status <= EV_STAT_COMP;
+                                s_oam_cnt_ce_n <= '0';
+                                p_oam_cnt_ce_n <= '0';
+                            end if;
+                        end if;--if (cur_x(0) = '1') then
+
+                        --prepare for next step
+                        s_oam_addr_cpy_n <= '1';
+                        spr_y_we_n <= '1';
+                        spr_tile_we_n <= '1';
+                        spr_x_we_n <= "11111111";
+                        spr_attr_we_n <= "11111111";
+                        spr_ptn_l_we_n <= "11111111";
+                        spr_ptn_h_we_n <= "11111111";
+                        spr_x_ce_n <= "11111111";
+                        spr_ptn_ce_n <= "11111111";
+
+                    --sprite pattern fetch
+                    elsif (cur_x > conv_std_logic_vector(256, X_SIZE) and 
+                            cur_x <= conv_std_logic_vector(320, X_SIZE)) then
+
+                        s_oam_addr_cpy_n <= '0';
+                        s_oam_r_n <= '0';
+                        s_oam_w_n <= '1';
+                        s_oam_addr <= s_oam_addr_cpy;
+
+                        ----fetch y-cordinate from secondary oam
+                        if (cur_x (2 downto 0) = "001" ) then
+                            s_oam_addr_cpy_ce_n <= '0';
+                            spr_y_we_n <= '0';
+                        else
+                            spr_y_we_n <= '1';
+                        end if;
+
+                        ----fetch tile number
+                        if (cur_x (2 downto 0) = "010" ) then
+                            spr_tile_we_n <= '0';
+                        else
+                            spr_tile_we_n <= '1';
+                        end if;
+
+                        ----fetch attribute
+                        if (cur_x (2 downto 0) = "011" ) then
+                            spr_attr_we_n(conv_integer(s_oam_addr_cpy(4 downto 2))) <= '0';
+                        else
+                            spr_attr_we_n(conv_integer(s_oam_addr_cpy(4 downto 2))) <= '1';
+                        end if;--if (cur_x (2 downto 0) = "010" ) then
+
+                        ----fetch x-cordinate
+                        if (cur_x (2 downto 0) = "100" ) then
+                            s_oam_addr_cpy_ce_n <= '1';
+                            spr_x_we_n(conv_integer(s_oam_addr_cpy(4 downto 2))) <= '0';
+                        else
+                            spr_x_we_n(conv_integer(s_oam_addr_cpy(4 downto 2))) <= '1';
+                        end if;
+
+                        ----fetch pattern table low byte.
+                        if (cur_x (2 downto 0) = "101" ) then
+                            if (spr_attr(conv_integer(s_oam_addr_cpy(4 downto 2)))(SPRVFL) = '0') then
+                                vram_addr <= "0" & ppu_ctrl(PPUSPA) & 
+                                            spr_tile_tmp(dsize - 1 downto 0) & "0" & 
+                                            (next_y(2 downto 0) - spr_y_tmp(2 downto 0));
+                            else
+                                --flip sprite vertically.
+                                vram_addr <= "0" & ppu_ctrl(PPUSPA) & 
+                                            spr_tile_tmp(dsize - 1 downto 0) & "0" & 
+                                            (spr_y_tmp(2 downto 0) - next_y(2 downto 0) - "001");
+                            end if;
+                        end if;
+
+                        if (cur_x (2 downto 0) = "110" ) then
+                            spr_ptn_l_we_n(conv_integer(s_oam_addr_cpy(4 downto 2))) <= '0';
+                        else
+                            spr_ptn_l_we_n(conv_integer(s_oam_addr_cpy(4 downto 2))) <= '1';
+                        end if;
+
+                        ----fetch pattern table high byte.
+                        if (cur_x (2 downto 0) = "111" ) then
+                            if (spr_attr(conv_integer(s_oam_addr_cpy(4 downto 2)))(SPRVFL) = '0') then
+                                vram_addr <= "0" & ppu_ctrl(PPUSPA) & 
+                                            spr_tile_tmp(dsize - 1 downto 0) & "0" & 
+                                            (next_y(2 downto 0) - spr_y_tmp(2 downto 0))
+                                                + "00000000001000";
+                            else
+                                --flip sprite vertically.
+                                vram_addr <= "0" & ppu_ctrl(PPUSPA) & 
+                                            spr_tile_tmp(dsize - 1 downto 0) & "0"  & 
+                                            (spr_y_tmp(2 downto 0) - next_y(2 downto 0))
+                                                + "00000000000111";
+                            end if;
+                        end if;
+
+                        if (cur_x (2 downto 0) = "000") then
+                            spr_ptn_h_we_n(conv_integer(s_oam_addr_cpy(4 downto 2))) <= '0';
+                            s_oam_addr_cpy_ce_n <= '0';
+                        else
+                            spr_ptn_h_we_n(conv_integer(s_oam_addr_cpy(4 downto 2) - "001")) <= '1';
+                        end if;
+
+                    elsif (cur_x > conv_std_logic_vector(320, X_SIZE)) then
+                        --clear last write enable.
+                        spr_ptn_h_we_n <= "11111111";
+                    end if;--if (cur_x /= "000000000" and cur_x <= conv_std_logic_vector(64, X_SIZE))
+
+                    --display sprite.
+                    if ((cur_x < conv_std_logic_vector(HSCAN, X_SIZE)) and
+                        (cur_y < conv_std_logic_vector(VSCAN, X_SIZE))) then
+                        --start counter.
+                        if (cur_x = "000000000") then
+                            spr_x_ce_n <= "00000000";
+                        end if;
+
+                        for i in 0 to 7 loop
+                            if (spr_x_cnt(i) = "00000000") then
+                                --active sprite, start shifting..
+                                spr_x_ce_n(i) <= '1';
+                                spr_ptn_ce_n(i) <= '0';
+                            end if;
+                        end loop;
+                    end if; --if ((cur_x < conv_std_logic_vector(HSCAN, X_SIZE)) 
+                end if; --if (ppu_mask(PPUSSP) = '1') then
+
+                if (ppu_mask(PPUSBG) = '1' or ppu_mask(PPUSSP) = '1') then
+                    --output visible area only.
+                    if ((cur_x < conv_std_logic_vector(HSCAN, X_SIZE)) and
+                        (cur_y < conv_std_logic_vector(VSCAN, X_SIZE))) then
+                        --output image.
+                        output_rgb;
+                    end if;
+                else
+                    b <= (others => '1');
+                    g <= (others => '0');
+                    r <= (others => '1');
+                end if;--if (ppu_mask(PPUSBG) = '1' or ppu_mask(PPUSSP) = '1') then
+
+                --flag operation
+                if ((cur_x = conv_std_logic_vector(1, X_SIZE)) and
+                    (cur_y = conv_std_logic_vector(VSCAN + 1, X_SIZE))) then
+                    --vblank start
+                    ppu_status(ST_VBL) <= '1';
+                elsif ((cur_x = conv_std_logic_vector(1, X_SIZE)) and
+                    (cur_y = conv_std_logic_vector(VSCAN_MAX - 1, X_SIZE))) then
+                    ppu_status(ST_SP0) <= '0';
+                    --vblank end
+                    ppu_status(ST_VBL) <= '0';
+                    --TODO: sprite overflow is not inplemented!
+                    ppu_status(ST_SOF) <= '0';
+                end if;
+            end if; --if (clk'event and clk = '1') then
+
+            if (read_status'event and read_status = '1') then
+                --reading ppu status clears vblank bit.
+                ppu_status(ST_VBL) <= '0';
+            end if;
+
+        end if;--if (rst_n = '0') then
+    end process;
+
+end rtl;
+