I've got a simulation that simply takes an address as an input and 64 clock cycles later it simply outputs it on another port. For some reason, when I register the output data, it is not delayed by a clock cycle (see waveform). Is this some crazy part of the standard or did I find a bug in the delta step of my simulator?
Testbench:
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
entity bug_report_tb is
end bug_report_tb;
architecture TB of bug_report_tb is
-- MIG UI signal declarations
signal app_addr : std_logic_vector(29 downto 0);
signal app_en : std_logic;
signal app_rdy : std_logic;
signal app_rd_data : std_logic_vector(29 downto 0);
signal app_rd_data_r : std_logic_vector(app_rd_data'RANGE);
signal ui_rst : std_logic;
signal ui_clk : std_logic;
begin
process(ui_rst,ui_clk)
begin
if ui_rst = '1' then
app_en <= '0';
app_addr <= (others => '0');
elsif rising_edge(ui_clk) then
app_en <= '0';
if app_rdy = '1' then
app_en <= '1';
if app_en = '1' then
app_addr <= std_logic_vector(unsigned(app_addr)+1);
end if;
end if;
end if;
end process;
process(ui_clk)
begin
if rising_edge(ui_clk) then
app_rd_data_r <= app_rd_data;
end if;
end process;
--*********************************************************
module : entity work.bug_report_mod
port map
(
ui_clk => ui_clk,
ui_rst => ui_rst,
app_rd_data => app_rd_data,
app_rdy => app_rdy,
app_en => app_en,
app_addr => app_addr
);
end TB;
Module:
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
entity bug_report_mod is
port
(
ui_clk : out std_logic;
ui_rst : out std_logic;
app_rd_data : out std_logic_vector(29 downto 0);
app_rdy : out std_logic;
app_en : in std_logic;
app_addr : in std_logic_vector(29 downto 0)
);
end bug_report_mod;
architecture behavioral of bug_report_mod is
signal clk : std_logic;
signal reset : std_logic := '1';
signal app_en_sr : std_logic_vector(63 downto 0) := (others => '0');
signal dly_counter : unsigned(6 downto 0);
signal rdy_counter : unsigned(6 downto 0);
signal app_rdy_int : std_logic;
type int_array is array(natural range <>) of integer;
signal addr_array : int_array(63 downto 0);
begin
process
begin
clk <= '1'; wait for 2.5 ns;
clk <= '0'; wait for 2.5 ns;
end process;
ui_clk <= clk;
ui_rst <= reset;
app_rdy <= app_rdy_int;
process
begin
wait for 50 ns;
wait until clk'event and clk = '1';
reset <= '0';
wait for 2 ms;
end process;
process(clk)
begin
if rising_edge(clk) then
if app_en_sr(63) = '1' then
app_rd_data <= std_logic_vector(to_unsigned(addr_array(63),app_rd_data'LENGTH));
end if;
end if;
end process;
process(clk,reset)
begin
if reset = '1' then
app_rdy_int <= '0';
rdy_counter <= (others => '0');
dly_counter <= (others => '0');
elsif rising_edge(clk) then
app_en_sr <= app_en_sr(62 downto 0) & (app_en and app_rdy_int);
addr_array <= addr_array(62 downto 0) & (to_integer(unsigned(app_addr))*4);
rdy_counter <= ('0' & rdy_counter(5 downto 0)) + 1;
app_rdy_int <= not rdy_counter(6) and dly_counter(3);
if dly_counter(3) = '0' then
dly_counter <= dly_counter + 1;
end if;
end if;
end process;
end behavioral;
Best Answer
You have done a very strange thing: The DUT is generating its own clock!
This means that when both the clock and the data propagate out to the testbench, the data will have already changed before the clock edge is processed, effectively creating the "zero delay" effect you're seeing.
While it's true that real hardware wouldn't behave this way, it doesn't at all surprise me that most if not all simulators would do exactly the same thing with this code.
Try generating the clock (and the reset) in the testbench (the usual scenario) and I think you'll see the expected behavior.
The alternative would be to add a nominal delay to the assignment of the output data bus inside the module
in order to correctly model this interface.