This is an old revision of the document!
This module provide RC4 encryption/decryption. Like other encryption algorithms, RC4 takes a human-readable message, called the plaintext, as well as a key and produces an encrypted message of the same length, called the cyphertext. In the decryption process, you provide the cyphertext and the same key, and the original plaintext is produced.
You can read about RC4 encryption at https://en.wikipedia.org/wiki/RC4. RC4 is very simple and can be implemented in about 10-15 lines of software code, that you can find in the Key-scheduling algorithm (KSA) and Pseudo-random generation algorithm (PRGA) sections at the Wikipedia link. Essentially, RC4 takes a key of any length of bytes, and uses it to create an endless stream of pseudo-random bytes. These bytes are XOR'd with the user message to produce the encrypted message (cyphertext).
As you may be aware, if we XOR a value with another value twice, we get back the original value (A ^ B ^ B == A
). Thus, if we use the same key to generate the same pseudo-random stream of bytes, and XOR'd them with the cyphertext, we will get back the original message. This makes RC4 symmetric, which means that you can use the provided module to perform either encryption or decryption.
Module Name = decrypt_rc4 | |||
---|---|---|---|
Parameter | Default Value | Description | |
BYTES_LEN | 16 | The length of byte stream the encryption engine will process. | |
Port Name | Direction | Width | Description |
clk | Input | 1 | 100 MHz Clock |
reset | Input | 1 | Active-high reset |
start | Input | 1 | Active-high, start encryption/decryption |
key | Input | 24 | Encryption key |
done | Output | 1 | Active-high for one cycle when the encryption/decryption completes |
bytes_in | Input | BYTES_LEN * 8 | Input bytes (plaintext for encryption, cyphertext for decryption) |
bytes_out | Output | BYTES_LEN * 8 | Output bytes (cyphertext for encryption, plaintext for decryption) |
Click the link below to download the file.
module decrypt_rc4 #( parameter BYTES_LEN = 16 ) ( input wire logic clk, // Clock input wire logic reset, // Active-high reset input wire logic start, // Start encryption/decryption input wire logic [23:0] key, // 3 byte key input wire logic [(BYTES_LEN * 8) - 1:0] bytes_in, // byte stream in output logic [(BYTES_LEN * 8) - 1:0] bytes_out, // byte stream out output logic done // Active-high done ); // This module implements the following RC4 encryption/decrpytion algorithm: // for i from 0 to 255 (LOOP1) // S[i] := i // endfor // j := 0 // for i from 0 to 255 (LOOP2) // j := (j + S[i] + key[i mod keylength]) mod 256 // swap values of S[i] and S[j] // endfor // i := 0 // j := 0 // while GeneratingOutput: (LOOP3) // i := (i + 1) mod 256 // j := (j + S[i]) mod 256 // swap values of S[i] and S[j] // K := S[(S[i] + S[j]) mod 256] // output K // endwhile typedef enum {S_INIT, S_LOOP1, S_LOOP2_readSi, S_LOOP2_readSj, S_LOOP2_write, S_LOOP3_init, S_LOOP3_readSi, S_LOOP3_readSj, S_LOOP3_writeSi, S_LOOP3_writeSj, S_LOOP3_readK, S_update_text_out, S_DONE} StateType; StateType cs; logic [7:0] i; // i Variable register logic [7:0] j; // j Variable registers logic [7:0] j_calc; // Combinational-logic calculate of j variable (LOOP2) // Signals to access dual-port S[] array logic [7:0] ram_addr_a; logic [7:0] ram_addr_b; logic ram_we_a; logic ram_we_b; logic [7:0] ram_data_in_a; logic [7:0] ram_data_in_b; logic [7:0] ram_data_out_a; logic [7:0] ram_data_out_b; logic [7:0] Si_saved; // Register to save S[i] read value logic [7:0] Sj_saved; // Register to save S[j] read value logic [7:0] i_calc_loop3; // Combinational logic to calculate i variable (LOOP3) logic [7:0] j_calc_loop3; // Combinational logic to calculate j variable (LOOP3) logic [7:0] K_lookup; // Address to lookup K-value: (S[i] + S[j]) logic [31:0] msg_byte_idx; // Index to count which byte of input stream is being processed. // The most significant byte is processed first. ////////////////////////////// Outputs ////////////////////////////////////// always_ff @(posedge clk) begin if (cs == S_update_text_out) begin bytes_out[msg_byte_idx * 8 +: 8] <= bytes_in[msg_byte_idx * 8 +: 8] ^ ram_data_out_a; end end ////////////////////////////// STATE MACHINE //////////////////////////////// always_ff @(posedge clk) begin done <= 1'b0; if (reset) begin cs <= S_INIT; end else begin case(cs) S_INIT: if (start) cs <= S_LOOP1; S_LOOP1: if (i == 254) cs <= S_LOOP2_readSi; S_LOOP2_readSi: cs <= S_LOOP2_readSj; S_LOOP2_readSj: cs <= S_LOOP2_write; S_LOOP2_write: if (i == 255) cs <= S_LOOP3_init; else cs <= S_LOOP2_readSi; S_LOOP3_init: cs <= S_LOOP3_readSi; S_LOOP3_readSi: cs <= S_LOOP3_readSj; S_LOOP3_readSj: cs <= S_LOOP3_writeSi; S_LOOP3_writeSi: cs <= S_LOOP3_writeSj; S_LOOP3_writeSj: cs <= S_LOOP3_readK; S_LOOP3_readK: cs <= S_update_text_out; S_update_text_out: begin if (msg_byte_idx == 0) begin cs <= S_INIT; done <= 1'b1; end else begin cs <= S_LOOP3_readSi; end end endcase end end ////////////////////////////// Datapath variables //////////////////////////////// // Update i, j and curent byte index (msg_byte_idx) always_ff @(posedge clk) begin case(cs) S_INIT: begin i <= 8'b0; j <= 8'b0; end S_LOOP1: begin i <= i + 2; end S_LOOP2_readSj: begin j <= j_calc; end S_LOOP2_write: begin i <= i + 1; end S_LOOP3_init: begin i <= 0; j <= 0; msg_byte_idx <= (BYTES_LEN - 1); end S_LOOP3_readSi: begin i <= i_calc_loop3; end S_LOOP3_readSj: begin j <= j_calc_loop3; end S_update_text_out: begin msg_byte_idx <= msg_byte_idx - 1; end endcase end // Save S[i] and S[j] values read from S[] memory always_ff @(posedge clk) begin if (cs == S_LOOP2_readSj) Si_saved <= ram_data_out_a; if (cs == S_LOOP3_readSj) Si_saved <= ram_data_out_a; if (cs == S_LOOP3_writeSi) Sj_saved <= ram_data_out_a; end assign i_calc_loop3 = i + 1; assign j_calc = j + ram_data_out_a + key[(i % 3) * 8 +: 8]; assign j_calc_loop3 = (j + ram_data_out_a); assign K_lookup = Si_saved + Sj_saved; // Signals to access S[] memory always_comb begin ram_addr_a = 8'bxxxxxxxx; ram_addr_b = 8'bxxxxxxxx; ram_data_in_a = 8'bxxxxxxxx; ram_data_in_b = 8'bxxxxxxxx; ram_we_a = 0; ram_we_b = 0; case(cs) S_LOOP1: begin ram_we_a = 1; ram_we_b = 1; ram_addr_a = i; ram_addr_b = i + 1; ram_data_in_a = i; ram_data_in_b = i + 1; end S_LOOP2_readSi: begin ram_addr_a = i; end S_LOOP2_readSj: begin ram_addr_a = j_calc; end S_LOOP2_write: begin ram_we_a = 1; ram_addr_a = i; ram_data_in_a = ram_data_out_a; ram_we_b = 1; ram_addr_b = j; ram_data_in_b = Si_saved; end S_LOOP3_readSi: begin ram_addr_a = i_calc_loop3; end S_LOOP3_readSj: begin ram_addr_a = j_calc_loop3; end S_LOOP3_writeSi: begin ram_we_a = 1; ram_addr_a = i; ram_data_in_a = ram_data_out_a; end S_LOOP3_writeSj: begin ram_we_a = 1; ram_addr_a = j; ram_data_in_a = Si_saved; end S_LOOP3_readK: begin ram_addr_a = K_lookup; end endcase end dual_port_ram #(.ADDR_WIDTH(8), .DATA_WIDTH(8)) ram_inst ( .clk_a(clk), .clk_b(clk), .en_a(1'b1), .en_b(1'b1), .we_a(ram_we_a), .we_b(ram_we_b), .addr_a(ram_addr_a), .addr_b(ram_addr_b), .data_in_a(ram_data_in_a), .data_in_b(ram_data_in_b), .data_out_a(ram_data_out_a), .data_out_b(ram_data_out_b) ); endmodule module dual_port_ram #( parameter ADDR_WIDTH=10, parameter DATA_WIDTH=32 )( input wire logic clk_a, input wire logic clk_b, input wire logic en_a, input wire logic en_b, input wire logic we_a, input wire logic we_b, input wire logic [ADDR_WIDTH-1:0] addr_a, input wire logic [ADDR_WIDTH-1:0] addr_b, input wire logic [DATA_WIDTH-1:0] data_in_a, input wire logic [DATA_WIDTH-1:0] data_in_b, output logic [DATA_WIDTH-1:0] data_out_a, output logic [DATA_WIDTH-1:0] data_out_b ); logic [DATA_WIDTH-1:0] ram [(2**ADDR_WIDTH)-1:0]; always_ff @(posedge clk_a) begin if (en_a) begin if (we_a) ram[addr_a] <= data_in_a; data_out_a <= ram[addr_a]; end end always @(posedge clk_b) begin if (en_a) if (we_b) ram[addr_b] <= data_in_b; data_out_b <= ram[addr_b]; end endmodule