diff --git a/soc/hps_lattice_nx.py b/soc/hps_lattice_nx.py index a28af6b96..18758ade8 100644 --- a/soc/hps_lattice_nx.py +++ b/soc/hps_lattice_nx.py @@ -45,11 +45,11 @@ from migen import * from litex.soc.interconnect import wishbone +from litex.soc.cores.clock.common import * kB = 1024 - def initval_parameters(contents, width): """ In Radiant, initial values for LRAM are passed a sequence of parameters @@ -203,3 +203,431 @@ def add_init(self, data): offset = d * self.width_cascading * 64*kB + w * 64*kB chunk = data[offset:offset + 64*kB] self.lram_blocks[d][w].items += initval_parameters(chunk, self.width) + + +from collections import namedtuple +import logging +import pprint +from math import log10, pi +from cmath import phase + +logging.basicConfig(level=logging.INFO) + +io_i2 = namedtuple('io_i2',['io', 'i2', 'IPP_CTRL', 'BW_CTL_BIAS', 'IPP_SEL']) +nx_pll_param_permutation = namedtuple("nx_pll_param_permutation",[ + "C1","C2","C3","C4","C5","C6", + "IPP_CTRL","BW_CTL_BIAS","IPP_SEL","CSET","CRIPPLE","V2I_PP_RES","IPI_CMP"]) + + +# Lattice / NX PLL --------------------------------------------------------------------------------- + +class NXPLL(Module): + nclkouts_max = 5 + clki_div_range = ( 1, 128+1) + clkfb_div_range = ( 1, 128+1) + clko_div_range = ( 1, 128+1) + clki_freq_range = ( 10e6, 500e6) + clko_freq_range = ( 6.25e6, 800e6) + vco_in_freq_range = ( 10e6, 500e6) + vco_out_freq_range = ( 800e6, 1600e6) + instance_num = 0 + + def __init__(self, platform = None, create_output_port_clocks=False): + self.logger = logging.getLogger("NXPLL") + self.logger.info("Creating NXPLL.") + self.params = {} + self.reset = Signal() + self.locked = Signal() + self.params["o_LOCK"] = self.locked + self.clkin_freq = None + self.vcxo_freq = None + self.nclkouts = 0 + self.clkouts = {} + self.config = {} + self.name = 'PLL_' + str(NXPLL.instance_num) + NXPLL.instance_num += 1 + self.platform = platform + self.create_output_port_clocks = create_output_port_clocks + self.clk_names = [] + self.enable = [] + + self.calc_valid_io_i2() + self.calc_tf_coefficients() + + def register_clkin(self, clkin, freq): + (clki_freq_min, clki_freq_max) = self.clki_freq_range + assert freq >= clki_freq_min + assert freq <= clki_freq_max + self.clkin = Signal() + if isinstance(clkin, (Signal, ClockSignal)): + self.comb += self.clkin.eq(clkin) + else: + raise ValueError + self.clkin_freq = freq + register_clkin_log(self.logger, clkin, freq) + + def create_clkout(self, cd, freq, phase=0, margin=1e-2): + (clko_freq_min, clko_freq_max) = self.clko_freq_range + assert freq >= clko_freq_min + assert freq <= clko_freq_max + assert self.nclkouts < self.nclkouts_max + self.clkouts[self.nclkouts] = (cd.clk, freq, phase, margin) + create_clkout_log(self.logger, cd.name, freq, margin, self.nclkouts) + self.clk_names.append(cd.name) + self.nclkouts += 1 + + def compute_config(self): + config = {} + for clki_div in range(*self.clki_div_range): + config["clki_div"] = clki_div + for clkfb_div in range(*self.clkfb_div_range): + all_valid = True + vco_freq = self.clkin_freq/clki_div*clkfb_div + (vco_freq_min, vco_freq_max) = self.vco_out_freq_range + if vco_freq >= vco_freq_min and vco_freq <= vco_freq_max: + for n, (clk, f, p, m) in sorted(self.clkouts.items()): + valid = False + for d in range(*self.clko_div_range): + clk_freq = vco_freq/d + if abs(clk_freq - f) <= f*m: + config["clko{}_freq".format(n)] = clk_freq + config["clko{}_div".format(n)] = d + config["clko{}_phase".format(n)] = p + valid = True + break + if not valid: + all_valid = False + else: + all_valid = False + if all_valid: + config["vco"] = vco_freq + config["clkfb_div"] = clkfb_div + compute_config_log(self.logger, config) + return config + raise ValueError("No PLL config found") + + def calculate_analog_parameters(self, clki_freq, fb_div, bw_factor = 5): + config = {} + + params = self.calc_optimal_params(clki_freq, fb_div, 1, bw_factor) + config["p_CSET"] = params["CSET"] + config["p_CRIPPLE"] = params["CRIPPLE"] + config["p_V2I_PP_RES"] = params["V2I_PP_RES"] + config["p_IPP_SEL"] = params["IPP_SEL"] + config["p_IPP_CTRL"] = params["IPP_CTRL"] + config["p_BW_CTL_BIAS"] = params["BW_CTL_BIAS"] + config["p_IPI_CMP"] = params["IPI_CMP"] + + return config + + def do_finalize(self): + config = self.compute_config() + clkfb = Signal() + + self.params.update( + p_V2I_PP_ICTRL = "0b11111", # Hard coded in all reference files + p_IPI_CMPN = "0b0011", # Hard coded in all reference files + + p_V2I_1V_EN = "ENABLED", # Enabled = 1V (Default in references, but not the primitive), Disabled = 0.9V + p_V2I_KVCO_SEL = "60", # if (VOLTAGE == 0.9V) 85 else 60 + p_KP_VCO = "0b00011", # if (VOLTAGE == 0.9V) 0b11001 else 0b00011 + + p_PLLPD_N = "USED", + p_PLLRESET_ENA = "ENABLED", + p_REF_INTEGER_MODE = "ENABLED", # Ref manual has a discrepency so lets always set this value just in case + p_REF_MMD_DIG = "1", # Divider for the input clock, ie 'M' + + i_PLLRESET = self.reset, + i_REFCK = self.clkin, + o_LOCK = self.locked, + + # Use CLKOS5 & divider for feedback + p_SEL_FBK = "FBKCLK5", + p_ENCLK_CLKOS5 = "ENABLED", + p_DIVF = str(config["clkfb_div"]-1), # str(Actual value - 1) + p_DELF = str(config["clkfb_div"]-1), + p_CLKMUX_FB = "CMUX_CLKOS5", + i_FBKCK = clkfb, + o_CLKOS5 = clkfb, + + # Set feedback divider to 1 + p_FBK_INTEGER_MODE = "ENABLED", + p_FBK_MASK = "0b00000000", + p_FBK_MMD_DIG = "1", + ) + + analog_params = self.calculate_analog_parameters(self.clkin_freq, config["clkfb_div"]) + self.params.update(analog_params) + n_to_l = {0: "P", 1: "S", 2: "S2", 3:"S3", 4:"S4"} + + enables = [] + for n, (clk, f, p, m) in sorted(self.clkouts.items()): + div = config["clko{}_div".format(n)] + phase = int((1+p/360) * div) + letter = chr(n+65) + self.params["p_ENCLK_CLKO{}".format(n_to_l[n])] = "ENABLED" + self.params["p_DIV{}".format(letter)] = str(div-1) + self.params["p_PHI{}".format(letter)] = "0" + self.params["p_DEL{}".format(letter)] = str(phase - 1) + self.params["o_CLKO{}".format(n_to_l[n])] = clk + enables.append((self.clk_names[n], 1)) + + # In theory this really shouldn't be necessary, in practice + # the tooling seems to have suspicous clock latency values + # on generated clocks that are causing timing problems and Lattice + # hasn't responded to my support requests on the matter. + if self.platform and self.create_output_port_clocks: + self.platform.add_platform_command("create_clock -period {} -name {} [get_pins {}.PLL_inst/CLKO{}]".format(str(1/f*1e9), self.name + "_" + n_to_l[n],self.name, n_to_l[n])) + + self.enable = Record(enables) + for n, _ in sorted(self.clkouts.items()): + self.params["i_ENCLKO{}".format(n_to_l[n])] = getattr(self.enable, self.clk_names[n]) + + if self.platform and self.create_output_port_clocks: + i = 0 + self.specials += Instance("PLL", name = self.name, **self.params) + + # The gist of calculating the analog parameters is to run through all the + # permutations of the parameters and find the optimum set of values based + # on the transfer function of the PLL loop filter. There are constraints on + # on a few specific parameters, the open loop transfer function, and the closed loop + # transfer function. An optimal solution is chosen based on the bandwidth + # of the response relative to the input reference frequency of the PLL. + + # Later revs of the Lattice calculator BW_FACTOR is set to 10, may need to change it + def calc_optimal_params(self, fref, fbkdiv, M = 1, BW_FACTOR = 5): + print("Calculating Analog Paramters for a reference freqeuncy of " + str(fref*1e-6) + + " Mhz, feedback div " + str(fbkdiv) + ", and input div " + str(M) + "." + ) + + best_params = None + best_3db = 0 + + for params in self.transfer_func_coefficients: + closed_loop_peak = self.closed_loop_peak(fbkdiv, params) + if (closed_loop_peak["peak"] < 0.8 or + closed_loop_peak["peak"] > 1.35): + continue + + open_loop_crossing = self.open_loop_crossing(fbkdiv, params) + if open_loop_crossing["phase"] <= 45: + continue + + closed_loop_3db = self.closed_loop_3db(fbkdiv, params) + bw_factor = fref*1e6 / M / closed_loop_3db["f"] + if bw_factor < BW_FACTOR: + continue + + if best_3db < closed_loop_3db["f"]: + best_3db = closed_loop_3db["f"] + best_params = params + + print("Done calculating analog parameters:") + HDL_params = self.numerical_params_to_HDL_params(best_params) + pprint.pprint(HDL_params) + + return HDL_params + + + def numerical_params_to_HDL_params(self, params): + IPP_SEL_LUT = {1: 1, 2: 3, 3: 7, 4: 15} + ret = { + "CRIPPLE": str(int(params.CRIPPLE / 1e-12)) + "P", + "CSET": str(int((params.CSET / 4e-12)*4)) + "P", + "V2I_PP_RES": "{0:g}".format(params.V2I_PP_RES/1e3).replace(".","P") + "K", + "IPP_CTRL": "0b{0:04b}".format(int(params.IPP_CTRL / 1e-6 + 3)), + "IPI_CMP": "0b{0:04b}".format(int(params.IPI_CMP / .5e-6)), + "BW_CTL_BIAS": "0b{0:04b}".format(params.BW_CTL_BIAS), + "IPP_SEL": "0b{0:04b}".format(IPP_SEL_LUT[params.IPP_SEL]), + } + + return ret + + def calc_valid_io_i2(self): + # Valid permutations of IPP_CTRL, BW_CTL_BIAS, IPP_SEL, and IPI_CMP paramters are constrained + # by the following equation so we can narrow the problem space by calculating the + # them early in the process. + # ip = 5.0/3 * ipp_ctrl*bw_ctl_bias*ipp_sel + # ip/ipi_cmp == 50 +- 1e-4 + + self.valid_io_i2_permutations = [] + + # List out the valid values of each parameter + IPP_CTRL_VALUES = range(1,4+1) + IPP_CTRL_UNITS = 1e-6 + IPP_CTRL_VALUES = [element * IPP_CTRL_UNITS for element in IPP_CTRL_VALUES] + BW_CTL_BIAS_VALUES = range(1,15+1) + IPP_SEL_VALUES = range(1,4+1) + IPI_CMP_VALUES = range(1,15+1) + IPI_CMP_UNITS = 0.5e-6 + IPI_CMP_VALUES = [element * IPI_CMP_UNITS for element in IPI_CMP_VALUES] + + for IPP_CTRL in IPP_CTRL_VALUES: + for BW_CTL_BIAS in BW_CTL_BIAS_VALUES: + for IPP_SEL in IPP_SEL_VALUES: + for IPI_CMP in IPI_CMP_VALUES: + is_valid_io_i2 = self.is_valid_io_i2(IPP_CTRL, BW_CTL_BIAS, IPP_SEL, IPI_CMP) + if is_valid_io_i2 and self.is_unique_io(is_valid_io_i2['io']): + self.valid_io_i2_permutations.append( io_i2( + is_valid_io_i2['io'], is_valid_io_i2['i2'], + IPP_CTRL, BW_CTL_BIAS, IPP_SEL + ) ) + + def is_unique_io(self, io): + return not any(x.io == io for x in self.valid_io_i2_permutations) + + def is_valid_io_i2(self, IPP_CTRL, BW_CTL_BIAS, IPP_SEL, IPI_CMP): + tolerance = 1e-4 + ip = 5.0/3.0 * IPP_CTRL * BW_CTL_BIAS * IPP_SEL + i2 = IPI_CMP + if abs(ip/i2-50) < tolerance: + return {'io':ip,'i2':i2} + else: + return False + + def calc_tf_coefficients(self): + # Take the permutations of the various analog parameters + # then precalculate the coefficients of the transfer function. + # During the final calculations sub in the feedback divisor + # to get the final transfer functions. + + # (ABF+EC)s^2 + (A(F(G+1)+B) + ED)s + A(G+1) C1s^s + C2s + C3 + # tf = -------------------------------------------- = -------------------------- + # ns^2(CFs^2 + (DF+C)s + D) ns^2(C4s^2 + C5s + C6) + + # A = i2*g3*ki + # B = r1*c3 + # C = B*c2 + # D = c2+c3 + # E = io*ki*k1 + # F = r*cs + # G = k3 + # n = total divisor of the feedback signal (output + N) + + # Constants + c3 = 20e-12 + g3 = 0.2952e-3 + k1 = 6 + k3 = 100 + ki = 508e9 + r1 = 9.8e6 + B = r1*c3 + + # PLL Parameters + CSET_VALUES = range(2,17+1) + CSET_UNITS = 4e-12 + CSET_VALUES = [element * CSET_UNITS for element in CSET_VALUES] + CRIPPLE_VALUES = [1, 3, 5, 7, 9, 11, 13, 15] + CRIPPLE_UNITS = 1e-12 + CRIPPLE_VALUES = [element * CRIPPLE_UNITS for element in CRIPPLE_VALUES] + V2I_PP_RES_VALUES = [9000, 9300, 9700, 10000, 10300, 10700, 11000, 11300] + + self.transfer_func_coefficients = [] + + # Run through all the permutations and cache it all + for io_i2 in self.valid_io_i2_permutations: + for CSET in CSET_VALUES: + for CRIPPLE in CRIPPLE_VALUES: + for V2I_PP_RES in V2I_PP_RES_VALUES: + A = io_i2.i2*g3*ki + B = r1*c3 + C = B*CSET + D = CSET+c3 + E = io_i2.io*ki*k1 + F = V2I_PP_RES*CRIPPLE + G = k3 + + self.transfer_func_coefficients.append( nx_pll_param_permutation( + A*B*F+E*C, # C1 + A*(F*(G+1)+B)+E*D, # C2 + A*(G+1), # C3 + C*F, # C4 + D*F+C, # C5 + D, # C6 + io_i2.IPP_CTRL, io_i2.BW_CTL_BIAS, io_i2.IPP_SEL, + CSET, CRIPPLE, V2I_PP_RES, io_i2.i2 + )) + + def calc_tf(self, n, s, params): + return ( (params.C1 * s ** 2 + params.C2 * s + params.C3) / + ( n * s ** 2 * (params.C4 * s ** 2 + params.C5 * s + params.C6) ) ) + + def closed_loop_peak(self, fbkdiv, params): + f = 1e6 + step = 1.1 + step_divs = 0 + + peak_value = -99 + peak_f = 0 + + last_value = -99 + + while f < 1e9: + s = 1j * 2 * pi * f + tf_value = self.calc_tf(fbkdiv, s, params) + this_result = 20*log10(abs(tf_value/(1+tf_value))) + if this_result > peak_value: + peak_value = this_result + peak_f = f + + if this_result < last_value and step_divs < 5: + f = f/(step**2) + step = (step - 1) * .5 + 1 + step_divs = step_divs + 1 + elif this_result < last_value and step_divs == 5: + break + else: + last_value = this_result + f = f * step + + return {"peak":peak_value, "peak_freq":peak_f} + + def closed_loop_3db(self, fbkdiv, params): + f = 1e6 + step = 1.1 + step_divs = 0 + + last_f = 1 + + while f < 1e9: + s = 1j * 2 * pi * f + tf_value = self.calc_tf(fbkdiv, s, params) + this_result = 20*log10(abs(tf_value/(1+tf_value))) + + if (this_result+3) < 0 and step_divs < 5: + f = last_f + step = (step - 1) * .5 + 1 + step_divs = step_divs + 1 + elif (this_result+3) < 0 and step_divs == 5: + break + else: + last_f = f + f = f * step + + return {"f":last_f} + + def open_loop_crossing(self, fbkdiv, params): + f = 1e6 + step = 1.1 + step_divs = 0 + + last_f = 1 + last_tf = 0 + + while f < 1e9: + s = 1j * 2 * pi * f + tf_value = self.calc_tf(fbkdiv, s, params) + this_result = 20*log10(abs(tf_value)) + + if this_result < 0 and step_divs < 5: + f = last_f + step = (step - 1) * .5 + 1 + step_divs = step_divs + 1 + elif this_result < 0 and step_divs == 5: + break + else: + last_f = f + last_tf = tf_value + f = f * step + + return {"f":last_f, "phase":phase(-last_tf)*180/pi} diff --git a/soc/hps_proto2_platform.py b/soc/hps_proto2_platform.py index 9bf2ae446..d8542b66c 100644 --- a/soc/hps_proto2_platform.py +++ b/soc/hps_proto2_platform.py @@ -9,7 +9,7 @@ from litex.build.lattice.programmer import LatticeProgrammer from litex.soc.cores.clock import NXOSCA # from litex.soc.cores.ram import NXLRAM -from hps_lattice_nx import NXLRAM +from hps_lattice_nx import NXLRAM, NXPLL hps_io = [ ("done", 0, Pins("A5"), IOStandard("LVCMOS18H")), @@ -64,25 +64,47 @@ class _CRG(Module): """Clock Reset Generator""" def __init__(self, platform, sys_clk_freq): - self.clock_domains.cd_sys = ClockDomain() + # Input for PLL self.clock_domains.cd_por = ClockDomain() + # Outputs from PLL + self.clock_domains.cd_sys = ClockDomain() + self.clock_domains.cd_cfu = ClockDomain() + + # PLL output clocks' enable signals + self.sys_clk_enable = Signal(reset=1) + self.cfu_clk_enable = Signal(reset=1) + # Clock from HFOSC - self.submodules.sys_clk = sys_osc = NXOSCA() - sys_osc.create_hf_clk(self.cd_sys, sys_clk_freq) + self.submodules.osc_clk = sys_osc = NXOSCA() + sys_osc.create_hf_clk(self.cd_por, sys_clk_freq) + # We make the period constraint 7% tighter than our actual system # clock frequency, because the CrossLink-NX internal oscillator runs # at ±7% of nominal frequency. - platform.add_period_constraint(self.cd_sys.clk, - 1e9 / (sys_clk_freq * 1.07)) + clk_freq = sys_clk_freq * 1.07 # Power On Reset por_cycles = 4096 por_counter = Signal(log2_int(por_cycles), reset=por_cycles - 1) - self.comb += self.cd_por.clk.eq(self.cd_sys.clk) self.sync.por += If(por_counter != 0, por_counter.eq(por_counter - 1)) - self.specials += AsyncResetSynchronizer( - self.cd_sys, (por_counter != 0)) + + # PLL + self.submodules.sys_pll = sys_pll = NXPLL(platform=platform, create_output_port_clocks=True) + sys_pll.register_clkin(self.cd_por.clk, clk_freq) + sys_pll.create_clkout(self.cd_sys, clk_freq) + sys_pll.create_clkout(self.cd_cfu, clk_freq) + + self.specials += [ + AsyncResetSynchronizer(self.cd_sys, ~self.sys_pll.locked | (por_counter != 0)), + AsyncResetSynchronizer(self.cd_cfu, ~self.sys_pll.locked | (por_counter != 0)), + ] + + def do_finalize(self): + self.comb += [ + self.sys_pll.enable.sys.eq(self.sys_clk_enable), + self.sys_pll.enable.cfu.eq(self.cfu_clk_enable), + ] _nextpnr_report_filename = 'nextpnr-nexus-report.json' diff --git a/soc/hps_soc.py b/soc/hps_soc.py index 1c7104d32..8dd454b0f 100755 --- a/soc/hps_soc.py +++ b/soc/hps_soc.py @@ -35,7 +35,8 @@ from litespi.phy.generic import LiteSPIPHY from litespi import LiteSPI -from migen import Module, Instance, Signal, Record +from migen import Module, Signal, Record, ClockSignal, ResetSignal, \ + ClockDomainsRenamer, FSM, NextValue, NextState, If from patch import Patch # from cam_control import CameraControl @@ -54,6 +55,73 @@ SOC_DIR = os.path.dirname(os.path.realpath(__file__)) +class CfuCpuClockCtrl(Module): + """ + A module that controls clocks between CFU and CPU so that power usage is + optimized. + """ + + def __init__(self, cfu_bus, cfu_cen, cpu_cen): + """Constructor + + Args: + cfu_bus: bus between CFU and CPU + cfu_cen: clock enable signal for CFU + cpu_cen: clock enable signal for CPU + """ + self.cfu_bus = cfu_bus + self.cfu_cen = cfu_cen + self.cpu_cen = cpu_cen + + delay = Signal(max=10) + + self.submodules.fsm = fsm = FSM() + # It's just an initial state, once left it's not entered again + fsm.act("RESET", + # Enable both CFU and CPU + self.cfu_cen.eq(1), + self.cpu_cen.eq(1), + + NextValue(delay, delay + 1), + + # After 10 cycles disable CFU and go to next state + # Leave immediately if CPU has a command prepared + If(delay == 9, + NextState("CPU_ENABLED"), + ).Elif(self.cfu_bus.cmd.valid, + NextState("CFU_ENABLED") + ) + ) + + fsm.act("CPU_ENABLED", + self.cpu_cen.eq(1), + self.cfu_cen.eq(0), + + # If CPU has prepared a command, enable CFU + If(self.cfu_bus.cmd.valid, + self.cfu_cen.eq(1), + NextState("CFU_ENABLED"), + ) + ) + + fsm.act("CFU_ENABLED", + self.cfu_cen.eq(1), + self.cpu_cen.eq(1), + + # Disable CPU if CFU received a command (cmd.valid deasserted) + # And CFU didn't already respond to that command + If(~self.cfu_bus.rsp.valid & ~self.cfu_bus.cmd.valid, + self.cpu_cen.eq(0), + ), + + # If CFU has prepared a response, enable CPU + If(self.cfu_bus.rsp.valid, + self.cpu_cen.eq(1), + NextState("CPU_ENABLED"), + ) + ) + + class HpsSoC(LiteXSoC): # Memory layout csr_origin = 0xf0000000 @@ -98,6 +166,19 @@ def __init__(self, platform, debug, variant=None, reset_address=reset_address, cfu=cpu_cfu) + cfu_cen = Signal() + cpu_cen = Signal() + self.comb += [ + self.crg.sys_clk_enable.eq(cpu_cen), + self.crg.cfu_clk_enable.eq(cfu_cen) + ] + + self.cpu.cfu_params.update(i_clk=ClockSignal("cfu")) + self.cpu.cfu_params.update(i_reset=ResetSignal("cfu")) + + self.submodules.cfu_cpu_clk_ctl = ClockDomainsRenamer("por")( + CfuCpuClockCtrl(self.cpu.cfu_bus, cfu_cen, cpu_cen)) + # RAM if separate_arena: ram_size = 64*KB @@ -149,7 +230,7 @@ def __init__(self, platform, debug, variant=None, def setup_ram(self, size): region = SoCRegion(self.sram_origin, size, cached=True, linker=True) - self.submodules.lram = self.platform.create_ram(32, size) + self.submodules.lram = ClockDomainsRenamer("por")(self.platform.create_ram(32, size)) self.bus.add_slave("sram_lram", self.lram.bus, region) self.bus.add_region("sram", region) @@ -158,7 +239,7 @@ def setup_arena(self, size): region = SoCRegion(self.arena_origin, size, cached=True, linker=True) self.bus.add_region("arena", region) if size > 0: - self.submodules.arena = self.platform.create_ram(32, size, dual_port=True) + self.submodules.arena = ClockDomainsRenamer("por")(self.platform.create_ram(32, size, dual_port=True)) self.bus.add_slave("arena_lram", self.arena.bus, region) self.add_config('SOC_SEPARATE_ARENA')