Electronic – MD5 VHDL pipeline

fpgavhdl

I am trying to implement a 3-stage MD5 pipeline according to this link. In particular the algoritms on page 31. There is also another document which describes data forwarding. This is done in an FPGA (Terasic DE2-115). There is no schematics in this project, only VHDL code.

library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

entity md5core is
    port (
        CLOCK_50        : in std_logic;
        SW              : in std_logic_vector(17 downto 17)
    );
end entity md5core;

architecture md5core_rtl of md5core is
type r_array is array(0 to 64) of std_logic_vector(7 downto 0);
constant R        : r_array := ( x"07", x"0c", x"11", x"16", x"07", x"0c", x"11", x"16", x"07", x"0c", x"11", x"16", x"07", x"0c", x"11", 
                                 x"16", x"05", x"09", x"0e", x"14", x"05", x"09", x"0e", x"14", x"05", x"09", x"0e", x"14", x"05", x"09",
                                 x"0e", x"14", x"04", x"0b", x"10", x"17", x"04", x"0b", x"10", x"17", x"04", x"0b", x"10", x"17", x"04",
                                 x"0b", x"10", x"17", x"06", x"0a", x"0f", x"15", x"06", x"0a", x"0f", x"15", x"06", x"0a", x"0f", x"15",
                                 x"06", x"0a", x"0f", x"15", others => x"00");

type k_array is array(0 to 66) of std_logic_vector(31 downto 0);
constant K        : k_array := (x"d76aa478", x"e8c7b756", x"242070db", x"c1bdceee",
                                x"f57c0faf", x"4787c62a", x"a8304613", x"fd469501",
                                x"698098d8", x"8b44f7af", x"ffff5bb1", x"895cd7be",
                                x"6b901122", x"fd987193", x"a679438e", x"49b40821", 
                                x"f61e2562", x"c040b340", x"265e5a51", x"e9b6c7aa",
                                x"d62f105d", x"02441453", x"d8a1e681", x"e7d3fbc8",
                                x"21e1cde6", x"c33707d6", x"f4d50d87", x"455a14ed",
                                x"a9e3e905", x"fcefa3f8", x"676f02d9", x"8d2a4c8a",
                                x"fffa3942", x"8771f681", x"6d9d6122", x"fde5380c",
                                x"a4beea44", x"4bdecfa9", x"f6bb4b60", x"bebfbc70",
                                x"289b7ec6", x"eaa127fa", x"d4ef3085", x"04881d05",
                                x"d9d4d039", x"e6db99e5", x"1fa27cf8", x"c4ac5665",
                                x"f4292244", x"432aff97", x"ab9423a7", x"fc93a039",
                                x"655b59c3", x"8f0ccc92", x"ffeff47d", x"85845dd1",
                                x"6fa87e4f", x"fe2ce6e0", x"a3014314", x"4e0811a1",
                                x"f7537e82", x"bd3af235", x"2ad7d2bb", x"eb86d391", others => x"00000000");

type g_array is array(0 to 64) of integer range 0 to 15;
constant g_arr      : g_array := (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
                                          1, 6, 11, 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12,
                                          5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2,
                                          0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9, 0);                                               

type w_array is array(0 to 15) of std_logic_vector(31 downto 0);
signal W            : w_array;

constant AA        : std_logic_vector(31 downto 0) := x"67452301";
constant BB        : std_logic_vector(31 downto 0) := x"EFCDAB89";
constant CC        : std_logic_vector(31 downto 0) := x"98BADCFE";
constant DD        : std_logic_vector(31 downto 0) := x"10325476";

signal res_A    : std_logic_vector(31 downto 0) := x"00000000";
signal res_B    : std_logic_vector(31 downto 0) := x"00000000";
signal res_C    : std_logic_vector(31 downto 0) := x"00000000";
signal res_D    : std_logic_vector(31 downto 0) := x"00000000";

type in_str_t is array(0 to 5) of std_logic_vector(7 downto 0);
constant in_str    : in_str_t := (x"68", x"65", x"6c", x"6c", x"6f", x"6f");

type pad_str_t    is array(0 to 63) of std_logic_vector(7 downto 0);
signal pad_str    : pad_str_t;

type state_t is (start, padding, init_w, state_1, state_2, state_3, state_4, done);
signal state    : state_t;

signal a, b, c, d, f    : std_logic_vector(31 downto 0) := x"00000000";
signal i                : integer range 0 to 64 := 0;
signal g                        : integer range 0 to 15 := 0;
--signal tmp_b              : std_logic_vector(31 downto 0);

signal akw                  : std_logic_vector(31 downto 0);
signal ak                   : std_logic_vector(31 downto 0);
signal b_tmp                : std_logic_vector(31 downto 0);
begin

    --tmp_b <= std_logic_vector(unsigned(b) + rotate_left(unsigned(a) + unsigned(f) + unsigned(K(i)) + unsigned(W(g)), to_integer(unsigned(R(i)))));

    pipe_p : process(CLOCK_50, SW, a, b, c, d, i)
    begin
        if SW(17) = '0' then
--          ak <= std_logic_vector(unsigned(K(2)) + unsigned(BB));
--          akw <= std_logic_vector(unsigned(W(0)) + 1 + unsigned(K(2)) + unsigned(BB));
            b_tmp <= BB;
        elsif rising_edge(CLOCK_50) and state = state_1 then
            if i = 0 then
                ak <= std_logic_vector(unsigned(K(0)) + unsigned(a));
            elsif i = 1 then
                ak <= std_logic_vector(unsigned(K(1)) + unsigned(a));
                akw <= std_logic_vector(unsigned(W(0)) + unsigned(ak));
            elsif i = 2 then
                ak <= std_logic_vector(unsigned(K(2)) + unsigned(a));
                akw <= std_logic_vector(unsigned(W(1)) + unsigned(ak));
                b_tmp <= std_logic_vector(unsigned(b) + (rotate_left(unsigned(akw) + unsigned(f), to_integer(unsigned(R(0))))));
            else
                ak <= std_logic_vector(unsigned(K(i)) + unsigned(a));
                akw <= std_logic_vector(unsigned(W(g_arr(i-1))) + unsigned(ak));
                b_tmp <= std_logic_vector(unsigned(b) + (rotate_left(unsigned(akw) + unsigned(f), to_integer(unsigned(R(i-2))))));
            end if;
        end if;
    end process pipe_p;


    md5_f_p : process(state, a, b, c, d, i)
    begin 
        case state is
            when state_1 =>
                if i = 0 or i > 4 then
                    f <= (b and c) or ((not b) and d);
                    g <= g_arr(i);
                end if;

            when state_2 =>
            f <= (d and b) or ((not d) and c);
                g <= g_arr(i);

            when state_3 =>
                f <= b xor c xor d;
            g <= g_arr(i);

            when state_4 =>
                f <= c xor (b or (not d));
            g <= g_arr(i);

            when others =>
                f <= x"00000000";
                g <= 0;             

        end case;
    end process md5_f_p;

     md5_p : process(CLOCK_50, SW, a, b, c, d, f, g)
     begin
        if SW(17) = '0' then
            state <= start;
                i <= 0;
                a <= AA;
            b <= BB;
            c <= CC;
            d <= DD;                
            W <= (others => x"00000000");
                pad_str <= (others => x"00");
                --tmp_b := BB;
        elsif rising_edge(CLOCK_50) then
            case state is            
                when start =>

                    pad_str(0) <= in_str(0);
                    pad_str(1) <= in_str(1);
                    pad_str(2) <= in_str(2);
                    pad_str(3) <= in_str(3);
                    pad_str(4) <= in_str(4);
                    pad_str(5) <= in_str(5);
                    state <= padding;

                when padding =>
                    pad_str(6) <= "10000000";
                    pad_str(56) <= std_logic_vector(to_unsigned(in_str'length*8, 8));
                          state <= init_w;

                when init_w =>                
                    W(0) <= pad_str(3) & pad_str(2) & pad_str(1) & pad_str(0);
                    W(1) <= pad_str(7) & pad_str(6) & pad_str(5) & pad_str(4);
                    W(14) <= pad_str(59) & pad_str(58) & pad_str(57) & pad_str(56);
                          state <= state_1;

                when state_1 =>
                          if i = 16 then
                              state <= state_2;
                          else 
                        if i > 2 then
                                    --tmp_b := b;
                                    a <= d;
                                    c <= b;
                                    d <= c;
                                    b <= b_tmp;

--                                  d <= c;
--                                  b <= b_tmp;
--                                  c <= b;
--                                  a <= d;
                                end if;
                                i <= i + 1;
                    end if;

                when state_2 =>
                    if i = 32 then
                        state <= state_3;
                          else                  
                        d <= c;
                        b <= b_tmp;
                                c <= b;
                        a <= d;
                        i <= i + 1;
                    end if;

                when state_3 =>
                    if i = 48 then
                                state <= state_4;
                          else
                        d <= c;
                        b <= b_tmp;
                                c <= b;
                        a <= d;
                        i <= i + 1;
                    end if;

                when state_4 =>
                    if i = 64 then
                                res_A <= std_logic_vector(unsigned(AA) + unsigned(a));
                                res_B <= std_logic_vector(unsigned(BB) + unsigned(b));
                                res_C <= std_logic_vector(unsigned(CC) + unsigned(c));
                                res_D <= std_logic_vector(unsigned(DD) + unsigned(d));
                                state <= done;
                    else
                        d <= c;
                        c <= b;
                                b <= b_tmp;
                        a <= d;
                        i <= i + 1;
                    end if;

                when done =>
                    state <= done;

                when others =>
                    state <= done;

            end case;
        end if;
    end process md5_p;
end architecture md5core_rtl;

Using this code, I get correct values for b in the first stage of round 0, but thereafter nothing seems to fit. As seen in this simulation, first stage in round 0 is correct, but thereafter not. This is when using a in this expression:

ak <= std_logic_vector(unsigned(K(0)) + unsigned(a)); -- using a

simulation

But… If I understand the second document correctly I should be using c instead of a (data forwarding), but then the first stage in round 0 doesn't work either. I.e when I do this, the first stage in round 0 gets the wrong numbers too.

ak <= std_logic_vector(unsigned(K(0)) + unsigned(c)); -- changed to c

For the particular string in the code (helloo) the following values are correct (first 3 stages of round 0).

i:0 => a:271733878, b:3679623978, c:4023233417, d:2562383102, f:2562383102, g:0
i:1 => a:2562383102, b:268703616, c:3679623978, d:4023233417, f:3421032412, g:1
i:2 => a:4023233417, b:566857930, c:268703616, d:3679623978, f:4291410697, g:2

By the way, AKM in the document is akw in the code.

Any tips or suggestions on bringing me in the right direction would be very appreciated. Code would be ideal. If something is unclear, I'll edit the question and try to rectify that.

Best Answer

I think you misunderstood the paper's author's comments on pipelining the algorithm. You can't just pipeline the computation for B without also pipelining the rest of the process.

I would recommend, for the purposes of getting started, that you forget about the pipeline approach altogether, and just get the algorithm working with a non-pipelined implementation of the B computation.

Once you're getting correct results, and if you need more performance, then you can look into pipelining it. Then, you'll be able to see how the intermediate results line up on each clock cycle and what it takes to keep them in sync.

Related Topic