Quartus II/ ModelSim을 이용한 구현 - 3. CLA(Carry Look-ahead Adder) (verilog)

2023. 1. 28. 16:11기록지/컴퓨터구조(computer architecture)

Carry Look-ahead Adder 구현

 

원리

4-bits Carry Look-ahead Adder

Ripple carry adder (RCA)가 계산이 완료 될 때까지 시간이 많이 걸리는 단점을 보완하기 위해 입력 a, b 그리고 carry in이 주어질 때, 모든 올림수가 동시에 구해져 계산시간을 단축시키는 가산기이다. Carry를 계산하기 위해 carry만을 계산해주는 별도의 carry look-ahead block이 존재한다.

 

32-bits CLA with clock

Timing Analysis

구현한 logic이 제대로 동작할 수 있는 조건을 찾기 위하여 해당 circuit delay를 분석하는 과정이다. 일반적으로 여기서 찾는 조건은 최대 동작 주파수이다.

 

 

설계 검증

Flow Summary
RTL viewer
waveform (unsigned)
Flow Summary
RTL viewer
waveform (Hexadecimal)
최대 동작 주파수: 146.82MHz
Flow Summary

 

RTL viewer
waveform (Hexadecimal)

Input값이 5ns delay 되었을 뿐 모든 결과 값은 32-bits CLA wave form과 일치한다.

 

최대 동작 주파수: 83.88MHz

 module cla4(a,b,ci,s,co);
 
	input [3:0] a,b;
	input ci;
	output [3:0] s;
	output co;
	
	wire c1,c2,c3;
	fa_v2 U0_fa (.a(a[0]),.b(b[0]),.ci(ci),.s(s[0]));
	fa_v2 U1_fa (.a(a[1]),.b(b[1]),.ci(c1),.s(s[1]));
	fa_v2 U2_fa (.a(a[2]),.b(b[2]),.ci(c2),.s(s[2]));
	fa_v2 U3_fa (.a(a[3]),.b(b[3]),.ci(c3),.s(s[3]));
	clb4 U4_clba (.a(a),.b(b),.ci(ci),.c1(c1),.c2(c2),.c3(c3),.co(co));
	
endmodule
module clb4(a,b,ci,c1,c2,c3,co);

	input [3:0] a,b;
	input ci;
	output c1,c2,c3,co;
	
	wire[3:0] g, p; //generate, propagate
	
	wire w0_c1;
	wire w0_c2, w1_c2;
	wire w0_c3, w1_c3, w2_c3;
	wire w0_co, w1_co, w2,co, w3_co;
	
	//Generate
	_and2 U0_and2 (.a(a[0]),.b(b[0]),.y(g[0]));
	_and2 U1_and2 (.a(a[1]),.b(b[1]),.y(g[1]));
	_and2 U2_and2 (.a(a[2]),.b(b[2]),.y(g[2]));
	_and2 U3_and2 (.a(a[3]),.b(b[3]),.y(g[3]));
	
	//Propagate
	_or2 U4_or2 (.a(a[0]),.b(b[0]),.y(p[0]));
	_or2 U5_or2 (.a(a[1]),.b(b[1]),.y(p[1]));
	_or2 U6_or2 (.a(a[2]),.b(b[2]),.y(p[2]));
	_or2 U7_or2 (.a(a[3]),.b(b[3]),.y(p[3]));
	
	
	//c1= g[0] | (p[0] & ci);
	_and2 U8_and2 (.a(p[0]),.b(ci),.y(w0_c1));
	_or2 U9_or2 (.a(g[0]),.b(w0_c1),.y(c1));
	
	//c2= g[1]
	//   | (p[1] & g[0])
	//   | (p[1] & p[0] & ci);
	_and2 U10_and2 (.a(p[1]),.b(g[0]),.y(w0_c2));
	_and3 U11_and3 (.a(p[1]),.b(p[0]),.c(ci),.y(w1_c2));
	_or3 U12_or3 (.a(g[1]),.b(w0_c2),.c(w1_c2),.y(c2));
	
	//c3= g[2]
	//   | (p[2] & g[1])
	//   | (p[2] & p[1] & g[0])
   //   | (p[2] & p[1] & p[0] & ci);
	_and2 U13_and2 (.a(p[2]),.b(g[1]),.y(w0_c3));
	_and3 U14_and3 (.a(p[2]),.b(p[1]),.c(g[0]),.y(w1_c3));
	_and4 U15_and4 (.a(p[2]),.b(p[1]),.c(p[0]),.d(ci),.y(w2_c3));
	_or4 U16_or4 (.a(g[2]),.b(w0_c3),.c(w1_c3),.d(w2_c3),.y(c3));
	
	//co= g[3]
	//   | (p[3] & g[2])
	//   | (p[3] & p[2] & g[1])
   //   | (p[3] & p[2] & p[1] & g[0])
	//   | (p[3] & p[2] & p[1] & p[0] & ci);
	_and2 U17_and2 (.a(p[3]),.b(g[2]),.y(w0_co));
	_and3 U18_and3 (.a(p[3]),.b(p[2]),.c(g[1]),.y(w1_co));
	_and4 U19_and4 (.a(p[3]),.b(p[2]),.c(p[1]),.d(g[0]),.y(w2_co));
	_and5 U20_and5 (.a(p[3]),.b(p[2]),.c(p[1]),.d(p[0]),.e(ci),.y(w3_co));
	_or5 U21_or5 (.a(g[3]),.b(w0_co),.c(w1_co),.d(w2_co),.e(w3_co),.y(co));
	
endmodule

	

module fa_v2(a,b,ci,s);

	input a,b,ci;
	output s;
	
	wire w;
	_xor2 U0_Xor2(.a(a),.b(b),.y(w));
	_xor2 U1_Xor2(.a(w),.b(ci),.y(s));
	
endmodule

module _inv(a,y);
	input a;
	output y;
	assign y=~a;
endmodule

module _nand2(a,b,y);
	input a,b;
	output y;
	assign y=~(a&b);
endmodule

module _and2(a,b,y);
	input a,b;
	output y;
	assign y=a&b;
endmodule

module _or2(a,b,y);
	input a,b;
	output y;
	assign y=a|b;
endmodule

module _xor2(a,b,y);
	input a,b;
	output y;
	
	wire w1,w2,w3,w4;
	
	_inv _inv0(.a(a),.y(w1));
	_inv _inv1(.a(b),.y(w2));
	_and2 _and0(.a(a),.b(w2),.y(w3));
	_and2 _and1(.a(b),.b(w1),.y(w4));
	_or2 _or0(.a(w3),.b(w4),.y(y));
	
endmodule

module _and3(a,b,c,y);
input a,b,c;
output y;
assign y=a&b&c;
endmodule

module _and4(a,b,c,d,y);
input a,b,c,d;
output y;
assign y=a&b&c&d;
endmodule

module _and5(a,b,c,d,e,y);
input a,b,c,d,e;
output y;
assign y=a&b&c&d&e;
endmodule

module _or3(a,b,c,y);
input a,b,c;
output y;
assign y=a|b|c;
endmodule

module _or4(a,b,c,d,y);
input a,b,c,d;
output y;
assign y=a|b|c|d;
endmodule

module _or5(a,b,c,d,e,y);
input a,b,c,d,e;
output y;
assign y=a|b|c|d|e;
endmodule


`timescale 1ns/100ps

module tb_cla4;

	reg [3:0] tb_a, tb_b;
	reg tb_ci;
	wire [3:0] tb_s;
	wire tb_co;
	
	wire [4:0] tb_result;
	
	assign tb_result= {tb_co,tb_s};
	
	cla4 U0_cla4(.a(tb_a),.b(tb_b),.ci(tb_ci),.s(tb_s),.co(tb_co));
	
	initial
	begin
	tb_ci=0;tb_a=0;tb_b=0;
	#10; tb_a=4'h3; tb_b=4'h5; tb_ci=0;
	#10; tb_a=4'h7; tb_b=4'h9; tb_ci=0;
	#10; tb_a=4'h5; tb_b=4'h5; tb_ci=1;
	#10; tb_a=4'h8; tb_b=4'h7; tb_ci=1;
	#10; tb_a=4'hf; tb_b=4'hf; tb_ci=0;
	#10; tb_a=4'hf; tb_b=4'hf; tb_ci=1;	
	#10; $stop;
	end
	
endmodule

 module cla32(a,b,ci,s,co);
 
	input [31:0] a,b;
	input ci;
	output [31:0] s;
	output co;
	
	wire c1,c2,c3,c4,c5,c6,c7;
	
	cla4 U0_cla4(.a(a[3:0]),.b(b[3:0]),.ci(ci),.s(s[3:0]),.co(c1));
	cla4 U1_cla4(.a(a[7:4]),.b(b[7:4]),.ci(c1),.s(s[7:4]),.co(c2));
	cla4 U2_cla4(.a(a[11:8]),.b(b[11:8]),.ci(c2),.s(s[11:8]),.co(c3));
	cla4 U3_cla4(.a(a[15:12]),.b(b[15:12]),.ci(c3),.s(s[15:12]),.co(c4));
	cla4 U4_cla4(.a(a[19:16]),.b(b[19:16]),.ci(c4),.s(s[19:16]),.co(c5));
	cla4 U5_cla4(.a(a[23:20]),.b(b[23:20]),.ci(c5),.s(s[23:20]),.co(c6));
	cla4 U6_cla4(.a(a[27:24]),.b(b[27:24]),.ci(c6),.s(s[27:24]),.co(c7));
	cla4 U7_cla4(.a(a[31:28]),.b(b[31:28]),.ci(c7),.s(s[31:28]),.co(co));
	
endmodule


 `timescale 1ns/100ps
 
 module tb_cla_clk;
 
	reg			clock;
	reg [31:0]	tb_a, tb_b;
	reg			tb_ci;
	wire [31:0]	tb_s_cla;
	wire			tb_co_cla;
	
	parameter STEP =10;
	
	cla_clk U0_cla_clk(.clock(clock),.a(tb_a),.b(tb_b),.ci(tb_ci),.s_cla(tb_s_cla),.co_cla(tb_co_cla));
	
	
	always#(STEP/2) clock = ~clock;
	
	initial
	begin
	clock=1'b1; tb_a=32'h0;         tb_b=32'h0;          tb_ci=1'b0; 
	#(STEP-2); 	tb_a=32'hffff_ffff; tb_b=32'h0; 			  tb_ci=1'b1;
	#(STEP); 	tb_a=32'h0000_ffff; tb_b=32'hffff_0000;  tb_ci=1'b0;
	#(STEP);		tb_a=32'h135f_a562; tb_b=32'h3561_4642;  tb_ci=1'b0;
	#(STEP*2);	$stop;
	end
endmodule

	

 module cla_clk(clock, a,b,ci,s_cla,co_cla);
 
	input 			clock;
	input [31:0]	a,b;
	input 			ci;
	output [31:0]	s_cla;
	output 			co_cla;
	
	reg [31:0]		reg_a, reg_b;
	reg				reg_ci;
	reg [31:0]		reg_s_cla;
	reg				reg_co_cla;
	
	wire [31:0]		wire_s_cla;
	wire				wire_co_cla;
	
	always @ (posedge clock)
	begin
		reg_a			<= a;
		reg_b			<= b;
		reg_ci		<= ci;
		reg_s_cla	<= wire_s_cla;
		reg_co_cla	<= wire_co_cla;
	end
	
	cla32 U0_cla32 (.a(reg_a),.b(reg_b),.ci(reg_ci),.s(wire_s_cla),.co(wire_co_cla));
	
	assign s_cla = reg_s_cla;
	assign co_cla = reg_co_cla;
	
endmodule

module fa(a,b,ci,s,co); // full adder

	input a,b,ci; // input
	output s,co;  // output
	wire sm,c1,c2;  // wire
	
	//named mapping
	ha U0_ha(.a(b),.b(ci),.s(sm),.co(c1)); // instance of ha
	ha U1_ha(.a(a),.b(sm),.s(s),.co(c2)); // instance of ha
	_or2 U0_or(.a(c2),.b(c1),.y(co)); //instance of _or2
	
endmodule

module ha(a,b,s,co);  // half adder 

	input a,b;  // input
	output s,co;  // output
	
	//named mapping
	_and2 _and0(.a(a),.b(b),.y(co));   // instance of _and2
	_xor2 _xor0(.a(a),.b(b),.y(s));  //instance of _xor2
	
endmodule


module rca(a,b,ci,s,co);  // ripple carry adder

	input [3:0] a,b; // 4bit input
	input ci;
	output [3:0] s; // 4bit output
	output co;
	wire [2:0]c;  // 3bit wire
	//named mapping 
	fa U0_fa(.a(a[0]),.b(b[0]),.ci(ci),.s(s[0]),.co(c[0])); // instance of fa
	fa U1_fa(.a(a[1]),.b(b[1]),.ci(c[0]),.s(s[1]),.co(c[1]));
	fa U2_fa(.a(a[2]),.b(b[2]),.ci(c[1]),.s(s[2]),.co(c[2]));
	fa U3_fa(.a(a[3]),.b(b[3]),.ci(c[2]),.s(s[3]),.co(co));
	
endmodule
728x90