2 * Copyright Altera Corporation (C) 2012-2015
4 * SPDX-License-Identifier: BSD-3-Clause
9 #include <asm/arch/sdram.h>
10 #include "sequencer.h"
11 #include "sequencer_auto.h"
12 #include "sequencer_auto_ac_init.h"
13 #include "sequencer_auto_inst_init.h"
14 #include "sequencer_defines.h"
16 static struct socfpga_sdr_rw_load_manager *sdr_rw_load_mgr_regs =
17 (struct socfpga_sdr_rw_load_manager *)(SDR_PHYGRP_RWMGRGRP_ADDRESS | 0x800);
19 static struct socfpga_sdr_rw_load_jump_manager *sdr_rw_load_jump_mgr_regs =
20 (struct socfpga_sdr_rw_load_jump_manager *)(SDR_PHYGRP_RWMGRGRP_ADDRESS | 0xC00);
22 static struct socfpga_sdr_reg_file *sdr_reg_file =
23 (struct socfpga_sdr_reg_file *)SDR_PHYGRP_REGFILEGRP_ADDRESS;
25 static struct socfpga_sdr_scc_mgr *sdr_scc_mgr =
26 (struct socfpga_sdr_scc_mgr *)(SDR_PHYGRP_SCCGRP_ADDRESS | 0xe00);
28 static struct socfpga_phy_mgr_cmd *phy_mgr_cmd =
29 (struct socfpga_phy_mgr_cmd *)SDR_PHYGRP_PHYMGRGRP_ADDRESS;
31 static struct socfpga_phy_mgr_cfg *phy_mgr_cfg =
32 (struct socfpga_phy_mgr_cfg *)(SDR_PHYGRP_PHYMGRGRP_ADDRESS | 0x40);
34 static struct socfpga_data_mgr *data_mgr =
35 (struct socfpga_data_mgr *)SDR_PHYGRP_DATAMGRGRP_ADDRESS;
37 static struct socfpga_sdr_ctrl *sdr_ctrl =
38 (struct socfpga_sdr_ctrl *)SDR_CTRLGRP_ADDRESS;
43 * In order to reduce ROM size, most of the selectable calibration steps are
44 * decided at compile time based on the user's calibration mode selection,
45 * as captured by the STATIC_CALIB_STEPS selection below.
47 * However, to support simulation-time selection of fast simulation mode, where
48 * we skip everything except the bare minimum, we need a few of the steps to
49 * be dynamic. In those cases, we either use the DYNAMIC_CALIB_STEPS for the
50 * check, which is based on the rtl-supplied value, or we dynamically compute
51 * the value to use based on the dynamically-chosen calibration mode
55 #define STATIC_IN_RTL_SIM 0
56 #define STATIC_SKIP_DELAY_LOOPS 0
58 #define STATIC_CALIB_STEPS (STATIC_IN_RTL_SIM | CALIB_SKIP_FULL_TEST | \
59 STATIC_SKIP_DELAY_LOOPS)
61 /* calibration steps requested by the rtl */
62 uint16_t dyn_calib_steps;
65 * To make CALIB_SKIP_DELAY_LOOPS a dynamic conditional option
66 * instead of static, we use boolean logic to select between
67 * non-skip and skip values
69 * The mask is set to include all bits when not-skipping, but is
73 uint16_t skip_delay_mask; /* mask off bits when skipping/not-skipping */
75 #define SKIP_DELAY_LOOP_VALUE_OR_ZERO(non_skip_value) \
76 ((non_skip_value) & skip_delay_mask)
79 struct param_type *param;
80 uint32_t curr_shadow_reg;
82 static uint32_t rw_mgr_mem_calibrate_write_test(uint32_t rank_bgn,
83 uint32_t write_group, uint32_t use_dm,
84 uint32_t all_correct, uint32_t *bit_chk, uint32_t all_ranks);
86 static void set_failing_group_stage(uint32_t group, uint32_t stage,
90 * Only set the global stage if there was not been any other
93 if (gbl->error_stage == CAL_STAGE_NIL) {
94 gbl->error_substage = substage;
95 gbl->error_stage = stage;
96 gbl->error_group = group;
100 static void reg_file_set_group(u16 set_group)
102 clrsetbits_le32(&sdr_reg_file->cur_stage, 0xffff0000, set_group << 16);
105 static void reg_file_set_stage(u8 set_stage)
107 clrsetbits_le32(&sdr_reg_file->cur_stage, 0xffff, set_stage & 0xff);
110 static void reg_file_set_sub_stage(u8 set_sub_stage)
112 set_sub_stage &= 0xff;
113 clrsetbits_le32(&sdr_reg_file->cur_stage, 0xff00, set_sub_stage << 8);
117 * phy_mgr_initialize() - Initialize PHY Manager
119 * Initialize PHY Manager.
121 static void phy_mgr_initialize(void)
125 debug("%s:%d\n", __func__, __LINE__);
126 /* Calibration has control over path to memory */
128 * In Hard PHY this is a 2-bit control:
132 writel(0x3, &phy_mgr_cfg->mux_sel);
134 /* USER memory clock is not stable we begin initialization */
135 writel(0, &phy_mgr_cfg->reset_mem_stbl);
137 /* USER calibration status all set to zero */
138 writel(0, &phy_mgr_cfg->cal_status);
140 writel(0, &phy_mgr_cfg->cal_debug_info);
142 /* Init params only if we do NOT skip calibration. */
143 if ((dyn_calib_steps & CALIB_SKIP_ALL) == CALIB_SKIP_ALL)
146 ratio = RW_MGR_MEM_DQ_PER_READ_DQS /
147 RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS;
148 param->read_correct_mask_vg = (1 << ratio) - 1;
149 param->write_correct_mask_vg = (1 << ratio) - 1;
150 param->read_correct_mask = (1 << RW_MGR_MEM_DQ_PER_READ_DQS) - 1;
151 param->write_correct_mask = (1 << RW_MGR_MEM_DQ_PER_WRITE_DQS) - 1;
152 ratio = RW_MGR_MEM_DATA_WIDTH /
153 RW_MGR_MEM_DATA_MASK_WIDTH;
154 param->dm_correct_mask = (1 << ratio) - 1;
157 static void set_rank_and_odt_mask(const u32 rank, const u32 odt_mode)
163 if (odt_mode == RW_MGR_ODT_MODE_OFF) {
166 } else { /* RW_MGR_ODT_MODE_READ_WRITE */
167 switch (RW_MGR_MEM_NUMBER_OF_RANKS) {
169 /* Read: ODT = 0 ; Write: ODT = 1 */
173 case 2: /* 2 Ranks */
174 if (RW_MGR_MEM_NUMBER_OF_CS_PER_DIMM == 1) {
175 /* - Dual-Slot , Single-Rank
176 * (1 chip-select per DIMM)
178 * - RDIMM, 4 total CS (2 CS per DIMM)
180 * Since MEM_NUMBER_OF_RANKS is 2 they are
182 * with 2 CS each (special for RDIMM)
183 * Read: Turn on ODT on the opposite rank
184 * Write: Turn on ODT on all ranks
186 odt_mask_0 = 0x3 & ~(1 << rank);
190 * USER - Single-Slot , Dual-rank DIMMs
191 * (2 chip-selects per DIMM)
192 * USER Read: Turn on ODT off on all ranks
193 * USER Write: Turn on ODT on active rank
196 odt_mask_1 = 0x3 & (1 << rank);
199 case 4: /* 4 Ranks */
201 * ----------+-----------------------+
204 * Read From +-----------------------+
205 * Rank | 3 | 2 | 1 | 0 |
206 * ----------+-----+-----+-----+-----+
207 * 0 | 0 | 1 | 0 | 0 |
208 * 1 | 1 | 0 | 0 | 0 |
209 * 2 | 0 | 0 | 0 | 1 |
210 * 3 | 0 | 0 | 1 | 0 |
211 * ----------+-----+-----+-----+-----+
214 * ----------+-----------------------+
217 * Write To +-----------------------+
218 * Rank | 3 | 2 | 1 | 0 |
219 * ----------+-----+-----+-----+-----+
220 * 0 | 0 | 1 | 0 | 1 |
221 * 1 | 1 | 0 | 1 | 0 |
222 * 2 | 0 | 1 | 0 | 1 |
223 * 3 | 1 | 0 | 1 | 0 |
224 * ----------+-----+-----+-----+-----+
248 cs_and_odt_mask = (0xFF & ~(1 << rank)) |
249 ((0xFF & odt_mask_0) << 8) |
250 ((0xFF & odt_mask_1) << 16);
251 writel(cs_and_odt_mask, SDR_PHYGRP_RWMGRGRP_ADDRESS |
252 RW_MGR_SET_CS_AND_ODT_MASK_OFFSET);
256 * scc_mgr_set() - Set SCC Manager register
257 * @off: Base offset in SCC Manager space
258 * @grp: Read/Write group
259 * @val: Value to be set
261 * This function sets the SCC Manager (Scan Chain Control Manager) register.
263 static void scc_mgr_set(u32 off, u32 grp, u32 val)
265 writel(val, SDR_PHYGRP_SCCGRP_ADDRESS | off | (grp << 2));
269 * scc_mgr_initialize() - Initialize SCC Manager registers
271 * Initialize SCC Manager registers.
273 static void scc_mgr_initialize(void)
276 * Clear register file for HPS. 16 (2^4) is the size of the
277 * full register file in the scc mgr:
278 * RFILE_DEPTH = 1 + log2(MEM_DQ_PER_DQS + 1 + MEM_DM_PER_DQS +
279 * MEM_IF_READ_DQS_WIDTH - 1);
283 for (i = 0; i < 16; i++) {
284 debug_cond(DLEVEL == 1, "%s:%d: Clearing SCC RFILE index %u\n",
285 __func__, __LINE__, i);
286 scc_mgr_set(SCC_MGR_HHP_RFILE_OFFSET, 0, i);
290 static void scc_mgr_set_dqdqs_output_phase(uint32_t write_group, uint32_t phase)
292 scc_mgr_set(SCC_MGR_DQDQS_OUT_PHASE_OFFSET, write_group, phase);
295 static void scc_mgr_set_dqs_bus_in_delay(uint32_t read_group, uint32_t delay)
297 scc_mgr_set(SCC_MGR_DQS_IN_DELAY_OFFSET, read_group, delay);
300 static void scc_mgr_set_dqs_en_phase(uint32_t read_group, uint32_t phase)
302 scc_mgr_set(SCC_MGR_DQS_EN_PHASE_OFFSET, read_group, phase);
305 static void scc_mgr_set_dqs_en_delay(uint32_t read_group, uint32_t delay)
307 scc_mgr_set(SCC_MGR_DQS_EN_DELAY_OFFSET, read_group, delay);
310 static void scc_mgr_set_dqs_io_in_delay(uint32_t delay)
312 scc_mgr_set(SCC_MGR_IO_IN_DELAY_OFFSET, RW_MGR_MEM_DQ_PER_WRITE_DQS,
316 static void scc_mgr_set_dq_in_delay(uint32_t dq_in_group, uint32_t delay)
318 scc_mgr_set(SCC_MGR_IO_IN_DELAY_OFFSET, dq_in_group, delay);
321 static void scc_mgr_set_dq_out1_delay(uint32_t dq_in_group, uint32_t delay)
323 scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET, dq_in_group, delay);
326 static void scc_mgr_set_dqs_out1_delay(uint32_t delay)
328 scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET, RW_MGR_MEM_DQ_PER_WRITE_DQS,
332 static void scc_mgr_set_dm_out1_delay(uint32_t dm, uint32_t delay)
334 scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET,
335 RW_MGR_MEM_DQ_PER_WRITE_DQS + 1 + dm,
339 /* load up dqs config settings */
340 static void scc_mgr_load_dqs(uint32_t dqs)
342 writel(dqs, &sdr_scc_mgr->dqs_ena);
345 /* load up dqs io config settings */
346 static void scc_mgr_load_dqs_io(void)
348 writel(0, &sdr_scc_mgr->dqs_io_ena);
351 /* load up dq config settings */
352 static void scc_mgr_load_dq(uint32_t dq_in_group)
354 writel(dq_in_group, &sdr_scc_mgr->dq_ena);
357 /* load up dm config settings */
358 static void scc_mgr_load_dm(uint32_t dm)
360 writel(dm, &sdr_scc_mgr->dm_ena);
364 * scc_mgr_set_all_ranks() - Set SCC Manager register for all ranks
365 * @off: Base offset in SCC Manager space
366 * @grp: Read/Write group
367 * @val: Value to be set
368 * @update: If non-zero, trigger SCC Manager update for all ranks
370 * This function sets the SCC Manager (Scan Chain Control Manager) register
371 * and optionally triggers the SCC update for all ranks.
373 static void scc_mgr_set_all_ranks(const u32 off, const u32 grp, const u32 val,
378 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
379 r += NUM_RANKS_PER_SHADOW_REG) {
380 scc_mgr_set(off, grp, val);
382 if (update || (r == 0)) {
383 writel(grp, &sdr_scc_mgr->dqs_ena);
384 writel(0, &sdr_scc_mgr->update);
389 static void scc_mgr_set_dqs_en_phase_all_ranks(u32 read_group, u32 phase)
392 * USER although the h/w doesn't support different phases per
393 * shadow register, for simplicity our scc manager modeling
394 * keeps different phase settings per shadow reg, and it's
395 * important for us to keep them in sync to match h/w.
396 * for efficiency, the scan chain update should occur only
399 scc_mgr_set_all_ranks(SCC_MGR_DQS_EN_PHASE_OFFSET,
400 read_group, phase, 0);
403 static void scc_mgr_set_dqdqs_output_phase_all_ranks(uint32_t write_group,
407 * USER although the h/w doesn't support different phases per
408 * shadow register, for simplicity our scc manager modeling
409 * keeps different phase settings per shadow reg, and it's
410 * important for us to keep them in sync to match h/w.
411 * for efficiency, the scan chain update should occur only
414 scc_mgr_set_all_ranks(SCC_MGR_DQDQS_OUT_PHASE_OFFSET,
415 write_group, phase, 0);
418 static void scc_mgr_set_dqs_en_delay_all_ranks(uint32_t read_group,
422 * In shadow register mode, the T11 settings are stored in
423 * registers in the core, which are updated by the DQS_ENA
424 * signals. Not issuing the SCC_MGR_UPD command allows us to
425 * save lots of rank switching overhead, by calling
426 * select_shadow_regs_for_update with update_scan_chains
429 scc_mgr_set_all_ranks(SCC_MGR_DQS_EN_DELAY_OFFSET,
430 read_group, delay, 1);
431 writel(0, &sdr_scc_mgr->update);
435 * scc_mgr_set_oct_out1_delay() - Set OCT output delay
436 * @write_group: Write group
437 * @delay: Delay value
439 * This function sets the OCT output delay in SCC manager.
441 static void scc_mgr_set_oct_out1_delay(const u32 write_group, const u32 delay)
443 const int ratio = RW_MGR_MEM_IF_READ_DQS_WIDTH /
444 RW_MGR_MEM_IF_WRITE_DQS_WIDTH;
445 const int base = write_group * ratio;
448 * Load the setting in the SCC manager
449 * Although OCT affects only write data, the OCT delay is controlled
450 * by the DQS logic block which is instantiated once per read group.
451 * For protocols where a write group consists of multiple read groups,
452 * the setting must be set multiple times.
454 for (i = 0; i < ratio; i++)
455 scc_mgr_set(SCC_MGR_OCT_OUT1_DELAY_OFFSET, base + i, delay);
459 * scc_mgr_set_hhp_extras() - Set HHP extras.
461 * Load the fixed setting in the SCC manager HHP extras.
463 static void scc_mgr_set_hhp_extras(void)
466 * Load the fixed setting in the SCC manager
467 * bits: 0:0 = 1'b1 - DQS bypass
468 * bits: 1:1 = 1'b1 - DQ bypass
469 * bits: 4:2 = 3'b001 - rfifo_mode
470 * bits: 6:5 = 2'b01 - rfifo clock_select
471 * bits: 7:7 = 1'b0 - separate gating from ungating setting
472 * bits: 8:8 = 1'b0 - separate OE from Output delay setting
474 const u32 value = (0 << 8) | (0 << 7) | (1 << 5) |
475 (1 << 2) | (1 << 1) | (1 << 0);
476 const u32 addr = SDR_PHYGRP_SCCGRP_ADDRESS |
477 SCC_MGR_HHP_GLOBALS_OFFSET |
478 SCC_MGR_HHP_EXTRAS_OFFSET;
480 debug_cond(DLEVEL == 1, "%s:%d Setting HHP Extras\n",
483 debug_cond(DLEVEL == 1, "%s:%d Done Setting HHP Extras\n",
488 * scc_mgr_zero_all() - Zero all DQS config
490 * Zero all DQS config.
492 static void scc_mgr_zero_all(void)
497 * USER Zero all DQS config settings, across all groups and all
500 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
501 r += NUM_RANKS_PER_SHADOW_REG) {
502 for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) {
504 * The phases actually don't exist on a per-rank basis,
505 * but there's no harm updating them several times, so
506 * let's keep the code simple.
508 scc_mgr_set_dqs_bus_in_delay(i, IO_DQS_IN_RESERVE);
509 scc_mgr_set_dqs_en_phase(i, 0);
510 scc_mgr_set_dqs_en_delay(i, 0);
513 for (i = 0; i < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; i++) {
514 scc_mgr_set_dqdqs_output_phase(i, 0);
515 /* Arria V/Cyclone V don't have out2. */
516 scc_mgr_set_oct_out1_delay(i, IO_DQS_OUT_RESERVE);
520 /* Multicast to all DQS group enables. */
521 writel(0xff, &sdr_scc_mgr->dqs_ena);
522 writel(0, &sdr_scc_mgr->update);
526 * scc_set_bypass_mode() - Set bypass mode and trigger SCC update
527 * @write_group: Write group
529 * Set bypass mode and trigger SCC update.
531 static void scc_set_bypass_mode(const u32 write_group)
533 /* Multicast to all DQ enables. */
534 writel(0xff, &sdr_scc_mgr->dq_ena);
535 writel(0xff, &sdr_scc_mgr->dm_ena);
537 /* Update current DQS IO enable. */
538 writel(0, &sdr_scc_mgr->dqs_io_ena);
540 /* Update the DQS logic. */
541 writel(write_group, &sdr_scc_mgr->dqs_ena);
544 writel(0, &sdr_scc_mgr->update);
548 * scc_mgr_load_dqs_for_write_group() - Load DQS settings for Write Group
549 * @write_group: Write group
551 * Load DQS settings for Write Group, do not trigger SCC update.
553 static void scc_mgr_load_dqs_for_write_group(const u32 write_group)
555 const int ratio = RW_MGR_MEM_IF_READ_DQS_WIDTH /
556 RW_MGR_MEM_IF_WRITE_DQS_WIDTH;
557 const int base = write_group * ratio;
560 * Load the setting in the SCC manager
561 * Although OCT affects only write data, the OCT delay is controlled
562 * by the DQS logic block which is instantiated once per read group.
563 * For protocols where a write group consists of multiple read groups,
564 * the setting must be set multiple times.
566 for (i = 0; i < ratio; i++)
567 writel(base + i, &sdr_scc_mgr->dqs_ena);
571 * scc_mgr_zero_group() - Zero all configs for a group
573 * Zero DQ, DM, DQS and OCT configs for a group.
575 static void scc_mgr_zero_group(const u32 write_group, const int out_only)
579 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
580 r += NUM_RANKS_PER_SHADOW_REG) {
581 /* Zero all DQ config settings. */
582 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
583 scc_mgr_set_dq_out1_delay(i, 0);
585 scc_mgr_set_dq_in_delay(i, 0);
588 /* Multicast to all DQ enables. */
589 writel(0xff, &sdr_scc_mgr->dq_ena);
591 /* Zero all DM config settings. */
592 for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++)
593 scc_mgr_set_dm_out1_delay(i, 0);
595 /* Multicast to all DM enables. */
596 writel(0xff, &sdr_scc_mgr->dm_ena);
598 /* Zero all DQS IO settings. */
600 scc_mgr_set_dqs_io_in_delay(0);
602 /* Arria V/Cyclone V don't have out2. */
603 scc_mgr_set_dqs_out1_delay(IO_DQS_OUT_RESERVE);
604 scc_mgr_set_oct_out1_delay(write_group, IO_DQS_OUT_RESERVE);
605 scc_mgr_load_dqs_for_write_group(write_group);
607 /* Multicast to all DQS IO enables (only 1 in total). */
608 writel(0, &sdr_scc_mgr->dqs_io_ena);
610 /* Hit update to zero everything. */
611 writel(0, &sdr_scc_mgr->update);
616 * apply and load a particular input delay for the DQ pins in a group
617 * group_bgn is the index of the first dq pin (in the write group)
619 static void scc_mgr_apply_group_dq_in_delay(uint32_t group_bgn, uint32_t delay)
623 for (i = 0, p = group_bgn; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++, p++) {
624 scc_mgr_set_dq_in_delay(p, delay);
630 * scc_mgr_apply_group_dq_out1_delay() - Apply and load an output delay for the DQ pins in a group
631 * @delay: Delay value
633 * Apply and load a particular output delay for the DQ pins in a group.
635 static void scc_mgr_apply_group_dq_out1_delay(const u32 delay)
639 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
640 scc_mgr_set_dq_out1_delay(i, delay);
645 /* apply and load a particular output delay for the DM pins in a group */
646 static void scc_mgr_apply_group_dm_out1_delay(uint32_t delay1)
650 for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) {
651 scc_mgr_set_dm_out1_delay(i, delay1);
657 /* apply and load delay on both DQS and OCT out1 */
658 static void scc_mgr_apply_group_dqs_io_and_oct_out1(uint32_t write_group,
661 scc_mgr_set_dqs_out1_delay(delay);
662 scc_mgr_load_dqs_io();
664 scc_mgr_set_oct_out1_delay(write_group, delay);
665 scc_mgr_load_dqs_for_write_group(write_group);
669 * scc_mgr_apply_group_all_out_delay_add() - Apply a delay to the entire output side: DQ, DM, DQS, OCT
670 * @write_group: Write group
671 * @delay: Delay value
673 * Apply a delay to the entire output side: DQ, DM, DQS, OCT.
675 static void scc_mgr_apply_group_all_out_delay_add(const u32 write_group,
681 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++)
685 for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++)
689 new_delay = READ_SCC_DQS_IO_OUT2_DELAY + delay;
690 if (new_delay > IO_IO_OUT2_DELAY_MAX) {
691 debug_cond(DLEVEL == 1,
692 "%s:%d (%u, %u) DQS: %u > %d; adding %u to OUT1\n",
693 __func__, __LINE__, write_group, delay, new_delay,
694 IO_IO_OUT2_DELAY_MAX,
695 new_delay - IO_IO_OUT2_DELAY_MAX);
696 new_delay -= IO_IO_OUT2_DELAY_MAX;
697 scc_mgr_set_dqs_out1_delay(new_delay);
700 scc_mgr_load_dqs_io();
703 new_delay = READ_SCC_OCT_OUT2_DELAY + delay;
704 if (new_delay > IO_IO_OUT2_DELAY_MAX) {
705 debug_cond(DLEVEL == 1,
706 "%s:%d (%u, %u) DQS: %u > %d; adding %u to OUT1\n",
707 __func__, __LINE__, write_group, delay,
708 new_delay, IO_IO_OUT2_DELAY_MAX,
709 new_delay - IO_IO_OUT2_DELAY_MAX);
710 new_delay -= IO_IO_OUT2_DELAY_MAX;
711 scc_mgr_set_oct_out1_delay(write_group, new_delay);
714 scc_mgr_load_dqs_for_write_group(write_group);
718 * scc_mgr_apply_group_all_out_delay_add() - Apply a delay to the entire output side to all ranks
719 * @write_group: Write group
720 * @delay: Delay value
722 * Apply a delay to the entire output side (DQ, DM, DQS, OCT) to all ranks.
725 scc_mgr_apply_group_all_out_delay_add_all_ranks(const u32 write_group,
730 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
731 r += NUM_RANKS_PER_SHADOW_REG) {
732 scc_mgr_apply_group_all_out_delay_add(write_group, delay);
733 writel(0, &sdr_scc_mgr->update);
738 * set_jump_as_return() - Return instruction optimization
740 * Optimization used to recover some slots in ddr3 inst_rom could be
741 * applied to other protocols if we wanted to
743 static void set_jump_as_return(void)
746 * To save space, we replace return with jump to special shared
747 * RETURN instruction so we set the counter to large value so that
750 writel(0xff, &sdr_rw_load_mgr_regs->load_cntr0);
751 writel(RW_MGR_RETURN, &sdr_rw_load_jump_mgr_regs->load_jump_add0);
755 * should always use constants as argument to ensure all computations are
756 * performed at compile time
758 static void delay_for_n_mem_clocks(const uint32_t clocks)
765 debug("%s:%d: clocks=%u ... start\n", __func__, __LINE__, clocks);
768 afi_clocks = (clocks + AFI_RATE_RATIO-1) / AFI_RATE_RATIO;
769 /* scale (rounding up) to get afi clocks */
772 * Note, we don't bother accounting for being off a little bit
773 * because of a few extra instructions in outer loops
774 * Note, the loops have a test at the end, and do the test before
775 * the decrement, and so always perform the loop
776 * 1 time more than the counter value
778 if (afi_clocks == 0) {
780 } else if (afi_clocks <= 0x100) {
781 inner = afi_clocks-1;
784 } else if (afi_clocks <= 0x10000) {
786 outer = (afi_clocks-1) >> 8;
791 c_loop = (afi_clocks-1) >> 16;
795 * rom instructions are structured as follows:
797 * IDLE_LOOP2: jnz cntr0, TARGET_A
798 * IDLE_LOOP1: jnz cntr1, TARGET_B
801 * so, when doing nested loops, TARGET_A is set to IDLE_LOOP2, and
802 * TARGET_B is set to IDLE_LOOP2 as well
804 * if we have no outer loop, though, then we can use IDLE_LOOP1 only,
805 * and set TARGET_B to IDLE_LOOP1 and we skip IDLE_LOOP2 entirely
807 * a little confusing, but it helps save precious space in the inst_rom
808 * and sequencer rom and keeps the delays more accurate and reduces
811 if (afi_clocks <= 0x100) {
812 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(inner),
813 &sdr_rw_load_mgr_regs->load_cntr1);
815 writel(RW_MGR_IDLE_LOOP1,
816 &sdr_rw_load_jump_mgr_regs->load_jump_add1);
818 writel(RW_MGR_IDLE_LOOP1, SDR_PHYGRP_RWMGRGRP_ADDRESS |
819 RW_MGR_RUN_SINGLE_GROUP_OFFSET);
821 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(inner),
822 &sdr_rw_load_mgr_regs->load_cntr0);
824 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(outer),
825 &sdr_rw_load_mgr_regs->load_cntr1);
827 writel(RW_MGR_IDLE_LOOP2,
828 &sdr_rw_load_jump_mgr_regs->load_jump_add0);
830 writel(RW_MGR_IDLE_LOOP2,
831 &sdr_rw_load_jump_mgr_regs->load_jump_add1);
833 /* hack to get around compiler not being smart enough */
834 if (afi_clocks <= 0x10000) {
835 /* only need to run once */
836 writel(RW_MGR_IDLE_LOOP2, SDR_PHYGRP_RWMGRGRP_ADDRESS |
837 RW_MGR_RUN_SINGLE_GROUP_OFFSET);
840 writel(RW_MGR_IDLE_LOOP2,
841 SDR_PHYGRP_RWMGRGRP_ADDRESS |
842 RW_MGR_RUN_SINGLE_GROUP_OFFSET);
843 } while (c_loop-- != 0);
846 debug("%s:%d clocks=%u ... end\n", __func__, __LINE__, clocks);
850 * rw_mgr_mem_init_load_regs() - Load instruction registers
851 * @cntr0: Counter 0 value
852 * @cntr1: Counter 1 value
853 * @cntr2: Counter 2 value
854 * @jump: Jump instruction value
856 * Load instruction registers.
858 static void rw_mgr_mem_init_load_regs(u32 cntr0, u32 cntr1, u32 cntr2, u32 jump)
860 uint32_t grpaddr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
861 RW_MGR_RUN_SINGLE_GROUP_OFFSET;
864 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(cntr0),
865 &sdr_rw_load_mgr_regs->load_cntr0);
866 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(cntr1),
867 &sdr_rw_load_mgr_regs->load_cntr1);
868 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(cntr2),
869 &sdr_rw_load_mgr_regs->load_cntr2);
871 /* Load jump address */
872 writel(jump, &sdr_rw_load_jump_mgr_regs->load_jump_add0);
873 writel(jump, &sdr_rw_load_jump_mgr_regs->load_jump_add1);
874 writel(jump, &sdr_rw_load_jump_mgr_regs->load_jump_add2);
876 /* Execute count instruction */
877 writel(jump, grpaddr);
881 * rw_mgr_mem_load_user() - Load user calibration values
882 * @fin1: Final instruction 1
883 * @fin2: Final instruction 2
884 * @precharge: If 1, precharge the banks at the end
886 * Load user calibration values and optionally precharge the banks.
888 static void rw_mgr_mem_load_user(const u32 fin1, const u32 fin2,
891 u32 grpaddr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
892 RW_MGR_RUN_SINGLE_GROUP_OFFSET;
895 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r++) {
896 if (param->skip_ranks[r]) {
897 /* request to skip the rank */
902 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF);
904 /* precharge all banks ... */
906 writel(RW_MGR_PRECHARGE_ALL, grpaddr);
909 * USER Use Mirror-ed commands for odd ranks if address
912 if ((RW_MGR_MEM_ADDRESS_MIRRORING >> r) & 0x1) {
913 set_jump_as_return();
914 writel(RW_MGR_MRS2_MIRR, grpaddr);
915 delay_for_n_mem_clocks(4);
916 set_jump_as_return();
917 writel(RW_MGR_MRS3_MIRR, grpaddr);
918 delay_for_n_mem_clocks(4);
919 set_jump_as_return();
920 writel(RW_MGR_MRS1_MIRR, grpaddr);
921 delay_for_n_mem_clocks(4);
922 set_jump_as_return();
923 writel(fin1, grpaddr);
925 set_jump_as_return();
926 writel(RW_MGR_MRS2, grpaddr);
927 delay_for_n_mem_clocks(4);
928 set_jump_as_return();
929 writel(RW_MGR_MRS3, grpaddr);
930 delay_for_n_mem_clocks(4);
931 set_jump_as_return();
932 writel(RW_MGR_MRS1, grpaddr);
933 set_jump_as_return();
934 writel(fin2, grpaddr);
940 set_jump_as_return();
941 writel(RW_MGR_ZQCL, grpaddr);
943 /* tZQinit = tDLLK = 512 ck cycles */
944 delay_for_n_mem_clocks(512);
948 static void rw_mgr_mem_initialize(void)
950 debug("%s:%d\n", __func__, __LINE__);
952 /* The reset / cke part of initialization is broadcasted to all ranks */
953 writel(RW_MGR_RANK_ALL, SDR_PHYGRP_RWMGRGRP_ADDRESS |
954 RW_MGR_SET_CS_AND_ODT_MASK_OFFSET);
957 * Here's how you load register for a loop
958 * Counters are located @ 0x800
959 * Jump address are located @ 0xC00
960 * For both, registers 0 to 3 are selected using bits 3 and 2, like
961 * in 0x800, 0x804, 0x808, 0x80C and 0xC00, 0xC04, 0xC08, 0xC0C
962 * I know this ain't pretty, but Avalon bus throws away the 2 least
966 /* start with memory RESET activated */
971 * 200us @ 266MHz (3.75 ns) ~ 54000 clock cycles
972 * If a and b are the number of iteration in 2 nested loops
973 * it takes the following number of cycles to complete the operation:
974 * number_of_cycles = ((2 + n) * a + 2) * b
975 * where n is the number of instruction in the inner loop
976 * One possible solution is n = 0 , a = 256 , b = 106 => a = FF,
979 rw_mgr_mem_init_load_regs(SEQ_TINIT_CNTR0_VAL, SEQ_TINIT_CNTR1_VAL,
981 RW_MGR_INIT_RESET_0_CKE_0);
983 /* indicate that memory is stable */
984 writel(1, &phy_mgr_cfg->reset_mem_stbl);
987 * transition the RESET to high
992 * 500us @ 266MHz (3.75 ns) ~ 134000 clock cycles
993 * If a and b are the number of iteration in 2 nested loops
994 * it takes the following number of cycles to complete the operation
995 * number_of_cycles = ((2 + n) * a + 2) * b
996 * where n is the number of instruction in the inner loop
997 * One possible solution is n = 2 , a = 131 , b = 256 => a = 83,
1000 rw_mgr_mem_init_load_regs(SEQ_TRESET_CNTR0_VAL, SEQ_TRESET_CNTR1_VAL,
1001 SEQ_TRESET_CNTR2_VAL,
1002 RW_MGR_INIT_RESET_1_CKE_0);
1004 /* bring up clock enable */
1006 /* tXRP < 250 ck cycles */
1007 delay_for_n_mem_clocks(250);
1009 rw_mgr_mem_load_user(RW_MGR_MRS0_DLL_RESET_MIRR, RW_MGR_MRS0_DLL_RESET,
1014 * At the end of calibration we have to program the user settings in, and
1015 * USER hand off the memory to the user.
1017 static void rw_mgr_mem_handoff(void)
1019 rw_mgr_mem_load_user(RW_MGR_MRS0_USER_MIRR, RW_MGR_MRS0_USER, 1);
1021 * USER need to wait tMOD (12CK or 15ns) time before issuing
1022 * other commands, but we will have plenty of NIOS cycles before
1023 * actual handoff so its okay.
1028 * performs a guaranteed read on the patterns we are going to use during a
1029 * read test to ensure memory works
1031 static uint32_t rw_mgr_mem_calibrate_read_test_patterns(uint32_t rank_bgn,
1032 uint32_t group, uint32_t num_tries, uint32_t *bit_chk,
1036 uint32_t correct_mask_vg;
1037 uint32_t tmp_bit_chk;
1038 uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS :
1039 (rank_bgn + NUM_RANKS_PER_SHADOW_REG);
1041 uint32_t base_rw_mgr;
1043 *bit_chk = param->read_correct_mask;
1044 correct_mask_vg = param->read_correct_mask_vg;
1046 for (r = rank_bgn; r < rank_end; r++) {
1047 if (param->skip_ranks[r])
1048 /* request to skip the rank */
1052 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE);
1054 /* Load up a constant bursts of read commands */
1055 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr0);
1056 writel(RW_MGR_GUARANTEED_READ,
1057 &sdr_rw_load_jump_mgr_regs->load_jump_add0);
1059 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr1);
1060 writel(RW_MGR_GUARANTEED_READ_CONT,
1061 &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1064 for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS-1; ; vg--) {
1065 /* reset the fifos to get pointers to known state */
1067 writel(0, &phy_mgr_cmd->fifo_reset);
1068 writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS |
1069 RW_MGR_RESET_READ_DATAPATH_OFFSET);
1071 tmp_bit_chk = tmp_bit_chk << (RW_MGR_MEM_DQ_PER_READ_DQS
1072 / RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS);
1074 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET;
1075 writel(RW_MGR_GUARANTEED_READ, addr +
1076 ((group * RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS +
1079 base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS);
1080 tmp_bit_chk = tmp_bit_chk | (correct_mask_vg & (~base_rw_mgr));
1085 *bit_chk &= tmp_bit_chk;
1088 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET;
1089 writel(RW_MGR_CLEAR_DQS_ENABLE, addr + (group << 2));
1091 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
1092 debug_cond(DLEVEL == 1, "%s:%d test_load_patterns(%u,ALL) => (%u == %u) =>\
1093 %lu\n", __func__, __LINE__, group, *bit_chk, param->read_correct_mask,
1094 (long unsigned int)(*bit_chk == param->read_correct_mask));
1095 return *bit_chk == param->read_correct_mask;
1098 static uint32_t rw_mgr_mem_calibrate_read_test_patterns_all_ranks
1099 (uint32_t group, uint32_t num_tries, uint32_t *bit_chk)
1101 return rw_mgr_mem_calibrate_read_test_patterns(0, group,
1102 num_tries, bit_chk, 1);
1105 /* load up the patterns we are going to use during a read test */
1106 static void rw_mgr_mem_calibrate_read_load_patterns(uint32_t rank_bgn,
1110 uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS :
1111 (rank_bgn + NUM_RANKS_PER_SHADOW_REG);
1113 debug("%s:%d\n", __func__, __LINE__);
1114 for (r = rank_bgn; r < rank_end; r++) {
1115 if (param->skip_ranks[r])
1116 /* request to skip the rank */
1120 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE);
1122 /* Load up a constant bursts */
1123 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr0);
1125 writel(RW_MGR_GUARANTEED_WRITE_WAIT0,
1126 &sdr_rw_load_jump_mgr_regs->load_jump_add0);
1128 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr1);
1130 writel(RW_MGR_GUARANTEED_WRITE_WAIT1,
1131 &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1133 writel(0x04, &sdr_rw_load_mgr_regs->load_cntr2);
1135 writel(RW_MGR_GUARANTEED_WRITE_WAIT2,
1136 &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1138 writel(0x04, &sdr_rw_load_mgr_regs->load_cntr3);
1140 writel(RW_MGR_GUARANTEED_WRITE_WAIT3,
1141 &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1143 writel(RW_MGR_GUARANTEED_WRITE, SDR_PHYGRP_RWMGRGRP_ADDRESS |
1144 RW_MGR_RUN_SINGLE_GROUP_OFFSET);
1147 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
1151 * try a read and see if it returns correct data back. has dummy reads
1152 * inserted into the mix used to align dqs enable. has more thorough checks
1153 * than the regular read test.
1155 static uint32_t rw_mgr_mem_calibrate_read_test(uint32_t rank_bgn, uint32_t group,
1156 uint32_t num_tries, uint32_t all_correct, uint32_t *bit_chk,
1157 uint32_t all_groups, uint32_t all_ranks)
1160 uint32_t correct_mask_vg;
1161 uint32_t tmp_bit_chk;
1162 uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS :
1163 (rank_bgn + NUM_RANKS_PER_SHADOW_REG);
1165 uint32_t base_rw_mgr;
1167 *bit_chk = param->read_correct_mask;
1168 correct_mask_vg = param->read_correct_mask_vg;
1170 uint32_t quick_read_mode = (((STATIC_CALIB_STEPS) &
1171 CALIB_SKIP_DELAY_SWEEPS) && ENABLE_SUPER_QUICK_CALIBRATION);
1173 for (r = rank_bgn; r < rank_end; r++) {
1174 if (param->skip_ranks[r])
1175 /* request to skip the rank */
1179 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE);
1181 writel(0x10, &sdr_rw_load_mgr_regs->load_cntr1);
1183 writel(RW_MGR_READ_B2B_WAIT1,
1184 &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1186 writel(0x10, &sdr_rw_load_mgr_regs->load_cntr2);
1187 writel(RW_MGR_READ_B2B_WAIT2,
1188 &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1190 if (quick_read_mode)
1191 writel(0x1, &sdr_rw_load_mgr_regs->load_cntr0);
1192 /* need at least two (1+1) reads to capture failures */
1193 else if (all_groups)
1194 writel(0x06, &sdr_rw_load_mgr_regs->load_cntr0);
1196 writel(0x32, &sdr_rw_load_mgr_regs->load_cntr0);
1198 writel(RW_MGR_READ_B2B,
1199 &sdr_rw_load_jump_mgr_regs->load_jump_add0);
1201 writel(RW_MGR_MEM_IF_READ_DQS_WIDTH *
1202 RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS - 1,
1203 &sdr_rw_load_mgr_regs->load_cntr3);
1205 writel(0x0, &sdr_rw_load_mgr_regs->load_cntr3);
1207 writel(RW_MGR_READ_B2B,
1208 &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1211 for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS-1; ; vg--) {
1212 /* reset the fifos to get pointers to known state */
1213 writel(0, &phy_mgr_cmd->fifo_reset);
1214 writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS |
1215 RW_MGR_RESET_READ_DATAPATH_OFFSET);
1217 tmp_bit_chk = tmp_bit_chk << (RW_MGR_MEM_DQ_PER_READ_DQS
1218 / RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS);
1221 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_ALL_GROUPS_OFFSET;
1223 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET;
1225 writel(RW_MGR_READ_B2B, addr +
1226 ((group * RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS +
1229 base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS);
1230 tmp_bit_chk = tmp_bit_chk | (correct_mask_vg & ~(base_rw_mgr));
1235 *bit_chk &= tmp_bit_chk;
1238 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET;
1239 writel(RW_MGR_CLEAR_DQS_ENABLE, addr + (group << 2));
1242 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
1243 debug_cond(DLEVEL == 2, "%s:%d read_test(%u,ALL,%u) =>\
1244 (%u == %u) => %lu", __func__, __LINE__, group,
1245 all_groups, *bit_chk, param->read_correct_mask,
1246 (long unsigned int)(*bit_chk ==
1247 param->read_correct_mask));
1248 return *bit_chk == param->read_correct_mask;
1250 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
1251 debug_cond(DLEVEL == 2, "%s:%d read_test(%u,ONE,%u) =>\
1252 (%u != %lu) => %lu\n", __func__, __LINE__,
1253 group, all_groups, *bit_chk, (long unsigned int)0,
1254 (long unsigned int)(*bit_chk != 0x00));
1255 return *bit_chk != 0x00;
1259 static uint32_t rw_mgr_mem_calibrate_read_test_all_ranks(uint32_t group,
1260 uint32_t num_tries, uint32_t all_correct, uint32_t *bit_chk,
1261 uint32_t all_groups)
1263 return rw_mgr_mem_calibrate_read_test(0, group, num_tries, all_correct,
1264 bit_chk, all_groups, 1);
1267 static void rw_mgr_incr_vfifo(uint32_t grp, uint32_t *v)
1269 writel(grp, &phy_mgr_cmd->inc_vfifo_hard_phy);
1273 static void rw_mgr_decr_vfifo(uint32_t grp, uint32_t *v)
1277 for (i = 0; i < VFIFO_SIZE-1; i++)
1278 rw_mgr_incr_vfifo(grp, v);
1281 static int find_vfifo_read(uint32_t grp, uint32_t *bit_chk)
1284 uint32_t fail_cnt = 0;
1285 uint32_t test_status;
1287 for (v = 0; v < VFIFO_SIZE; ) {
1288 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: vfifo %u\n",
1289 __func__, __LINE__, v);
1290 test_status = rw_mgr_mem_calibrate_read_test_all_ranks
1291 (grp, 1, PASS_ONE_BIT, bit_chk, 0);
1299 /* fiddle with FIFO */
1300 rw_mgr_incr_vfifo(grp, &v);
1303 if (v >= VFIFO_SIZE) {
1304 /* no failing read found!! Something must have gone wrong */
1305 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: vfifo failed\n",
1306 __func__, __LINE__);
1313 static int find_working_phase(uint32_t *grp, uint32_t *bit_chk,
1314 uint32_t dtaps_per_ptap, uint32_t *work_bgn,
1315 uint32_t *v, uint32_t *d, uint32_t *p,
1316 uint32_t *i, uint32_t *max_working_cnt)
1318 uint32_t found_begin = 0;
1319 uint32_t tmp_delay = 0;
1320 uint32_t test_status;
1322 for (*d = 0; *d <= dtaps_per_ptap; (*d)++, tmp_delay +=
1323 IO_DELAY_PER_DQS_EN_DCHAIN_TAP) {
1324 *work_bgn = tmp_delay;
1325 scc_mgr_set_dqs_en_delay_all_ranks(*grp, *d);
1327 for (*i = 0; *i < VFIFO_SIZE; (*i)++) {
1328 for (*p = 0; *p <= IO_DQS_EN_PHASE_MAX; (*p)++, *work_bgn +=
1329 IO_DELAY_PER_OPA_TAP) {
1330 scc_mgr_set_dqs_en_phase_all_ranks(*grp, *p);
1333 rw_mgr_mem_calibrate_read_test_all_ranks
1334 (*grp, 1, PASS_ONE_BIT, bit_chk, 0);
1337 *max_working_cnt = 1;
1346 if (*p > IO_DQS_EN_PHASE_MAX)
1347 /* fiddle with FIFO */
1348 rw_mgr_incr_vfifo(*grp, v);
1355 if (*i >= VFIFO_SIZE) {
1356 /* cannot find working solution */
1357 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: no vfifo/\
1358 ptap/dtap\n", __func__, __LINE__);
1365 static void sdr_backup_phase(uint32_t *grp, uint32_t *bit_chk,
1366 uint32_t *work_bgn, uint32_t *v, uint32_t *d,
1367 uint32_t *p, uint32_t *max_working_cnt)
1369 uint32_t found_begin = 0;
1372 /* Special case code for backing up a phase */
1374 *p = IO_DQS_EN_PHASE_MAX;
1375 rw_mgr_decr_vfifo(*grp, v);
1379 tmp_delay = *work_bgn - IO_DELAY_PER_OPA_TAP;
1380 scc_mgr_set_dqs_en_phase_all_ranks(*grp, *p);
1382 for (*d = 0; *d <= IO_DQS_EN_DELAY_MAX && tmp_delay < *work_bgn;
1383 (*d)++, tmp_delay += IO_DELAY_PER_DQS_EN_DCHAIN_TAP) {
1384 scc_mgr_set_dqs_en_delay_all_ranks(*grp, *d);
1386 if (rw_mgr_mem_calibrate_read_test_all_ranks(*grp, 1,
1390 *work_bgn = tmp_delay;
1395 /* We have found a working dtap before the ptap found above */
1396 if (found_begin == 1)
1397 (*max_working_cnt)++;
1400 * Restore VFIFO to old state before we decremented it
1404 if (*p > IO_DQS_EN_PHASE_MAX) {
1406 rw_mgr_incr_vfifo(*grp, v);
1409 scc_mgr_set_dqs_en_delay_all_ranks(*grp, 0);
1412 static int sdr_nonworking_phase(uint32_t *grp, uint32_t *bit_chk,
1413 uint32_t *work_bgn, uint32_t *v, uint32_t *d,
1414 uint32_t *p, uint32_t *i, uint32_t *max_working_cnt,
1417 uint32_t found_end = 0;
1420 *work_end += IO_DELAY_PER_OPA_TAP;
1421 if (*p > IO_DQS_EN_PHASE_MAX) {
1422 /* fiddle with FIFO */
1424 rw_mgr_incr_vfifo(*grp, v);
1427 for (; *i < VFIFO_SIZE + 1; (*i)++) {
1428 for (; *p <= IO_DQS_EN_PHASE_MAX; (*p)++, *work_end
1429 += IO_DELAY_PER_OPA_TAP) {
1430 scc_mgr_set_dqs_en_phase_all_ranks(*grp, *p);
1432 if (!rw_mgr_mem_calibrate_read_test_all_ranks
1433 (*grp, 1, PASS_ONE_BIT, bit_chk, 0)) {
1437 (*max_working_cnt)++;
1444 if (*p > IO_DQS_EN_PHASE_MAX) {
1445 /* fiddle with FIFO */
1446 rw_mgr_incr_vfifo(*grp, v);
1451 if (*i >= VFIFO_SIZE + 1) {
1452 /* cannot see edge of failing read */
1453 debug_cond(DLEVEL == 2, "%s:%d sdr_nonworking_phase: end:\
1454 failed\n", __func__, __LINE__);
1461 static int sdr_find_window_centre(uint32_t *grp, uint32_t *bit_chk,
1462 uint32_t *work_bgn, uint32_t *v, uint32_t *d,
1463 uint32_t *p, uint32_t *work_mid,
1469 *work_mid = (*work_bgn + *work_end) / 2;
1471 debug_cond(DLEVEL == 2, "work_bgn=%d work_end=%d work_mid=%d\n",
1472 *work_bgn, *work_end, *work_mid);
1473 /* Get the middle delay to be less than a VFIFO delay */
1474 for (*p = 0; *p <= IO_DQS_EN_PHASE_MAX;
1475 (*p)++, tmp_delay += IO_DELAY_PER_OPA_TAP)
1477 debug_cond(DLEVEL == 2, "vfifo ptap delay %d\n", tmp_delay);
1478 while (*work_mid > tmp_delay)
1479 *work_mid -= tmp_delay;
1480 debug_cond(DLEVEL == 2, "new work_mid %d\n", *work_mid);
1483 for (*p = 0; *p <= IO_DQS_EN_PHASE_MAX && tmp_delay < *work_mid;
1484 (*p)++, tmp_delay += IO_DELAY_PER_OPA_TAP)
1486 tmp_delay -= IO_DELAY_PER_OPA_TAP;
1487 debug_cond(DLEVEL == 2, "new p %d, tmp_delay=%d\n", (*p) - 1, tmp_delay);
1488 for (*d = 0; *d <= IO_DQS_EN_DELAY_MAX && tmp_delay < *work_mid; (*d)++,
1489 tmp_delay += IO_DELAY_PER_DQS_EN_DCHAIN_TAP)
1491 debug_cond(DLEVEL == 2, "new d %d, tmp_delay=%d\n", *d, tmp_delay);
1493 scc_mgr_set_dqs_en_phase_all_ranks(*grp, (*p) - 1);
1494 scc_mgr_set_dqs_en_delay_all_ranks(*grp, *d);
1497 * push vfifo until we can successfully calibrate. We can do this
1498 * because the largest possible margin in 1 VFIFO cycle.
1500 for (i = 0; i < VFIFO_SIZE; i++) {
1501 debug_cond(DLEVEL == 2, "find_dqs_en_phase: center: vfifo=%u\n",
1503 if (rw_mgr_mem_calibrate_read_test_all_ranks(*grp, 1,
1509 /* fiddle with FIFO */
1510 rw_mgr_incr_vfifo(*grp, v);
1513 if (i >= VFIFO_SIZE) {
1514 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: center: \
1515 failed\n", __func__, __LINE__);
1522 /* find a good dqs enable to use */
1523 static uint32_t rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(uint32_t grp)
1525 uint32_t v, d, p, i;
1526 uint32_t max_working_cnt;
1528 uint32_t dtaps_per_ptap;
1529 uint32_t work_bgn, work_mid, work_end;
1530 uint32_t found_passing_read, found_failing_read, initial_failing_dtap;
1532 debug("%s:%d %u\n", __func__, __LINE__, grp);
1534 reg_file_set_sub_stage(CAL_SUBSTAGE_VFIFO_CENTER);
1536 scc_mgr_set_dqs_en_delay_all_ranks(grp, 0);
1537 scc_mgr_set_dqs_en_phase_all_ranks(grp, 0);
1539 /* ************************************************************** */
1540 /* * Step 0 : Determine number of delay taps for each phase tap * */
1541 dtaps_per_ptap = IO_DELAY_PER_OPA_TAP/IO_DELAY_PER_DQS_EN_DCHAIN_TAP;
1543 /* ********************************************************* */
1544 /* * Step 1 : First push vfifo until we get a failing read * */
1545 v = find_vfifo_read(grp, &bit_chk);
1547 max_working_cnt = 0;
1549 /* ******************************************************** */
1550 /* * step 2: find first working phase, increment in ptaps * */
1552 if (find_working_phase(&grp, &bit_chk, dtaps_per_ptap, &work_bgn, &v, &d,
1553 &p, &i, &max_working_cnt) == 0)
1556 work_end = work_bgn;
1559 * If d is 0 then the working window covers a phase tap and
1560 * we can follow the old procedure otherwise, we've found the beginning,
1561 * and we need to increment the dtaps until we find the end.
1564 /* ********************************************************* */
1565 /* * step 3a: if we have room, back off by one and
1566 increment in dtaps * */
1568 sdr_backup_phase(&grp, &bit_chk, &work_bgn, &v, &d, &p,
1571 /* ********************************************************* */
1572 /* * step 4a: go forward from working phase to non working
1573 phase, increment in ptaps * */
1574 if (sdr_nonworking_phase(&grp, &bit_chk, &work_bgn, &v, &d, &p,
1575 &i, &max_working_cnt, &work_end) == 0)
1578 /* ********************************************************* */
1579 /* * step 5a: back off one from last, increment in dtaps * */
1581 /* Special case code for backing up a phase */
1583 p = IO_DQS_EN_PHASE_MAX;
1584 rw_mgr_decr_vfifo(grp, &v);
1589 work_end -= IO_DELAY_PER_OPA_TAP;
1590 scc_mgr_set_dqs_en_phase_all_ranks(grp, p);
1592 /* * The actual increment of dtaps is done outside of
1593 the if/else loop to share code */
1596 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: v/p: \
1597 vfifo=%u ptap=%u\n", __func__, __LINE__,
1600 /* ******************************************************* */
1601 /* * step 3-5b: Find the right edge of the window using
1603 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase:vfifo=%u \
1604 ptap=%u dtap=%u bgn=%u\n", __func__, __LINE__,
1607 work_end = work_bgn;
1609 /* * The actual increment of dtaps is done outside of the
1610 if/else loop to share code */
1612 /* Only here to counterbalance a subtract later on which is
1613 not needed if this branch of the algorithm is taken */
1617 /* The dtap increment to find the failing edge is done here */
1618 for (; d <= IO_DQS_EN_DELAY_MAX; d++, work_end +=
1619 IO_DELAY_PER_DQS_EN_DCHAIN_TAP) {
1620 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: \
1621 end-2: dtap=%u\n", __func__, __LINE__, d);
1622 scc_mgr_set_dqs_en_delay_all_ranks(grp, d);
1624 if (!rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1,
1631 /* Go back to working dtap */
1633 work_end -= IO_DELAY_PER_DQS_EN_DCHAIN_TAP;
1635 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: v/p/d: vfifo=%u \
1636 ptap=%u dtap=%u end=%u\n", __func__, __LINE__,
1637 v, p, d-1, work_end);
1639 if (work_end < work_bgn) {
1641 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: end-2: \
1642 failed\n", __func__, __LINE__);
1646 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: found range [%u,%u]\n",
1647 __func__, __LINE__, work_bgn, work_end);
1649 /* *************************************************************** */
1651 * * We need to calculate the number of dtaps that equal a ptap
1652 * * To do that we'll back up a ptap and re-find the edge of the
1653 * * window using dtaps
1656 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: calculate dtaps_per_ptap \
1657 for tracking\n", __func__, __LINE__);
1659 /* Special case code for backing up a phase */
1661 p = IO_DQS_EN_PHASE_MAX;
1662 rw_mgr_decr_vfifo(grp, &v);
1663 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: backedup \
1664 cycle/phase: v=%u p=%u\n", __func__, __LINE__,
1668 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: backedup \
1669 phase only: v=%u p=%u", __func__, __LINE__,
1673 scc_mgr_set_dqs_en_phase_all_ranks(grp, p);
1676 * Increase dtap until we first see a passing read (in case the
1677 * window is smaller than a ptap),
1678 * and then a failing read to mark the edge of the window again
1681 /* Find a passing read */
1682 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: find passing read\n",
1683 __func__, __LINE__);
1684 found_passing_read = 0;
1685 found_failing_read = 0;
1686 initial_failing_dtap = d;
1687 for (; d <= IO_DQS_EN_DELAY_MAX; d++) {
1688 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: testing \
1689 read d=%u\n", __func__, __LINE__, d);
1690 scc_mgr_set_dqs_en_delay_all_ranks(grp, d);
1692 if (rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1,
1695 found_passing_read = 1;
1700 if (found_passing_read) {
1701 /* Find a failing read */
1702 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: find failing \
1703 read\n", __func__, __LINE__);
1704 for (d = d + 1; d <= IO_DQS_EN_DELAY_MAX; d++) {
1705 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: \
1706 testing read d=%u\n", __func__, __LINE__, d);
1707 scc_mgr_set_dqs_en_delay_all_ranks(grp, d);
1709 if (!rw_mgr_mem_calibrate_read_test_all_ranks
1710 (grp, 1, PASS_ONE_BIT, &bit_chk, 0)) {
1711 found_failing_read = 1;
1716 debug_cond(DLEVEL == 1, "%s:%d find_dqs_en_phase: failed to \
1717 calculate dtaps", __func__, __LINE__);
1718 debug_cond(DLEVEL == 1, "per ptap. Fall back on static value\n");
1722 * The dynamically calculated dtaps_per_ptap is only valid if we
1723 * found a passing/failing read. If we didn't, it means d hit the max
1724 * (IO_DQS_EN_DELAY_MAX). Otherwise, dtaps_per_ptap retains its
1725 * statically calculated value.
1727 if (found_passing_read && found_failing_read)
1728 dtaps_per_ptap = d - initial_failing_dtap;
1730 writel(dtaps_per_ptap, &sdr_reg_file->dtaps_per_ptap);
1731 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: dtaps_per_ptap=%u \
1732 - %u = %u", __func__, __LINE__, d,
1733 initial_failing_dtap, dtaps_per_ptap);
1735 /* ******************************************** */
1736 /* * step 6: Find the centre of the window * */
1737 if (sdr_find_window_centre(&grp, &bit_chk, &work_bgn, &v, &d, &p,
1738 &work_mid, &work_end) == 0)
1741 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: center found: \
1742 vfifo=%u ptap=%u dtap=%u\n", __func__, __LINE__,
1748 * Try rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase across different
1749 * dq_in_delay values
1752 rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase_sweep_dq_in_delay
1753 (uint32_t write_group, uint32_t read_group, uint32_t test_bgn)
1761 const uint32_t delay_step = IO_IO_IN_DELAY_MAX /
1762 (RW_MGR_MEM_DQ_PER_READ_DQS-1);
1763 /* we start at zero, so have one less dq to devide among */
1765 debug("%s:%d (%u,%u,%u)", __func__, __LINE__, write_group, read_group,
1768 /* try different dq_in_delays since the dq path is shorter than dqs */
1770 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
1771 r += NUM_RANKS_PER_SHADOW_REG) {
1772 for (i = 0, p = test_bgn, d = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++, p++, d += delay_step) {
1773 debug_cond(DLEVEL == 1, "%s:%d rw_mgr_mem_calibrate_\
1774 vfifo_find_dqs_", __func__, __LINE__);
1775 debug_cond(DLEVEL == 1, "en_phase_sweep_dq_in_delay: g=%u/%u ",
1776 write_group, read_group);
1777 debug_cond(DLEVEL == 1, "r=%u, i=%u p=%u d=%u\n", r, i , p, d);
1778 scc_mgr_set_dq_in_delay(p, d);
1781 writel(0, &sdr_scc_mgr->update);
1784 found = rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(read_group);
1786 debug_cond(DLEVEL == 1, "%s:%d rw_mgr_mem_calibrate_vfifo_find_dqs_\
1787 en_phase_sweep_dq", __func__, __LINE__);
1788 debug_cond(DLEVEL == 1, "_in_delay: g=%u/%u found=%u; Reseting delay \
1789 chain to zero\n", write_group, read_group, found);
1791 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
1792 r += NUM_RANKS_PER_SHADOW_REG) {
1793 for (i = 0, p = test_bgn; i < RW_MGR_MEM_DQ_PER_READ_DQS;
1795 scc_mgr_set_dq_in_delay(p, 0);
1798 writel(0, &sdr_scc_mgr->update);
1804 /* per-bit deskew DQ and center */
1805 static uint32_t rw_mgr_mem_calibrate_vfifo_center(uint32_t rank_bgn,
1806 uint32_t write_group, uint32_t read_group, uint32_t test_bgn,
1807 uint32_t use_read_test, uint32_t update_fom)
1809 uint32_t i, p, d, min_index;
1811 * Store these as signed since there are comparisons with
1815 uint32_t sticky_bit_chk;
1816 int32_t left_edge[RW_MGR_MEM_DQ_PER_READ_DQS];
1817 int32_t right_edge[RW_MGR_MEM_DQ_PER_READ_DQS];
1818 int32_t final_dq[RW_MGR_MEM_DQ_PER_READ_DQS];
1820 int32_t orig_mid_min, mid_min;
1821 int32_t new_dqs, start_dqs, start_dqs_en, shift_dq, final_dqs,
1823 int32_t dq_margin, dqs_margin;
1825 uint32_t temp_dq_in_delay1, temp_dq_in_delay2;
1828 debug("%s:%d: %u %u", __func__, __LINE__, read_group, test_bgn);
1830 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_DQS_IN_DELAY_OFFSET;
1831 start_dqs = readl(addr + (read_group << 2));
1832 if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS)
1833 start_dqs_en = readl(addr + ((read_group << 2)
1834 - IO_DQS_EN_DELAY_OFFSET));
1836 /* set the left and right edge of each bit to an illegal value */
1837 /* use (IO_IO_IN_DELAY_MAX + 1) as an illegal value */
1839 for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) {
1840 left_edge[i] = IO_IO_IN_DELAY_MAX + 1;
1841 right_edge[i] = IO_IO_IN_DELAY_MAX + 1;
1844 /* Search for the left edge of the window for each bit */
1845 for (d = 0; d <= IO_IO_IN_DELAY_MAX; d++) {
1846 scc_mgr_apply_group_dq_in_delay(write_group, test_bgn, d);
1848 writel(0, &sdr_scc_mgr->update);
1851 * Stop searching when the read test doesn't pass AND when
1852 * we've seen a passing read on every bit.
1854 if (use_read_test) {
1855 stop = !rw_mgr_mem_calibrate_read_test(rank_bgn,
1856 read_group, NUM_READ_PB_TESTS, PASS_ONE_BIT,
1859 rw_mgr_mem_calibrate_write_test(rank_bgn, write_group,
1862 bit_chk = bit_chk >> (RW_MGR_MEM_DQ_PER_READ_DQS *
1863 (read_group - (write_group *
1864 RW_MGR_MEM_IF_READ_DQS_WIDTH /
1865 RW_MGR_MEM_IF_WRITE_DQS_WIDTH)));
1866 stop = (bit_chk == 0);
1868 sticky_bit_chk = sticky_bit_chk | bit_chk;
1869 stop = stop && (sticky_bit_chk == param->read_correct_mask);
1870 debug_cond(DLEVEL == 2, "%s:%d vfifo_center(left): dtap=%u => %u == %u \
1871 && %u", __func__, __LINE__, d,
1873 param->read_correct_mask, stop);
1878 for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) {
1880 /* Remember a passing test as the
1884 /* If a left edge has not been seen yet,
1885 then a future passing test will mark
1886 this edge as the right edge */
1888 IO_IO_IN_DELAY_MAX + 1) {
1889 right_edge[i] = -(d + 1);
1892 bit_chk = bit_chk >> 1;
1897 /* Reset DQ delay chains to 0 */
1898 scc_mgr_apply_group_dq_in_delay(test_bgn, 0);
1900 for (i = RW_MGR_MEM_DQ_PER_READ_DQS - 1;; i--) {
1901 debug_cond(DLEVEL == 2, "%s:%d vfifo_center: left_edge[%u]: \
1902 %d right_edge[%u]: %d\n", __func__, __LINE__,
1903 i, left_edge[i], i, right_edge[i]);
1906 * Check for cases where we haven't found the left edge,
1907 * which makes our assignment of the the right edge invalid.
1908 * Reset it to the illegal value.
1910 if ((left_edge[i] == IO_IO_IN_DELAY_MAX + 1) && (
1911 right_edge[i] != IO_IO_IN_DELAY_MAX + 1)) {
1912 right_edge[i] = IO_IO_IN_DELAY_MAX + 1;
1913 debug_cond(DLEVEL == 2, "%s:%d vfifo_center: reset \
1914 right_edge[%u]: %d\n", __func__, __LINE__,
1919 * Reset sticky bit (except for bits where we have seen
1920 * both the left and right edge).
1922 sticky_bit_chk = sticky_bit_chk << 1;
1923 if ((left_edge[i] != IO_IO_IN_DELAY_MAX + 1) &&
1924 (right_edge[i] != IO_IO_IN_DELAY_MAX + 1)) {
1925 sticky_bit_chk = sticky_bit_chk | 1;
1932 /* Search for the right edge of the window for each bit */
1933 for (d = 0; d <= IO_DQS_IN_DELAY_MAX - start_dqs; d++) {
1934 scc_mgr_set_dqs_bus_in_delay(read_group, d + start_dqs);
1935 if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) {
1936 uint32_t delay = d + start_dqs_en;
1937 if (delay > IO_DQS_EN_DELAY_MAX)
1938 delay = IO_DQS_EN_DELAY_MAX;
1939 scc_mgr_set_dqs_en_delay(read_group, delay);
1941 scc_mgr_load_dqs(read_group);
1943 writel(0, &sdr_scc_mgr->update);
1946 * Stop searching when the read test doesn't pass AND when
1947 * we've seen a passing read on every bit.
1949 if (use_read_test) {
1950 stop = !rw_mgr_mem_calibrate_read_test(rank_bgn,
1951 read_group, NUM_READ_PB_TESTS, PASS_ONE_BIT,
1954 rw_mgr_mem_calibrate_write_test(rank_bgn, write_group,
1957 bit_chk = bit_chk >> (RW_MGR_MEM_DQ_PER_READ_DQS *
1958 (read_group - (write_group *
1959 RW_MGR_MEM_IF_READ_DQS_WIDTH /
1960 RW_MGR_MEM_IF_WRITE_DQS_WIDTH)));
1961 stop = (bit_chk == 0);
1963 sticky_bit_chk = sticky_bit_chk | bit_chk;
1964 stop = stop && (sticky_bit_chk == param->read_correct_mask);
1966 debug_cond(DLEVEL == 2, "%s:%d vfifo_center(right): dtap=%u => %u == \
1967 %u && %u", __func__, __LINE__, d,
1968 sticky_bit_chk, param->read_correct_mask, stop);
1973 for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) {
1975 /* Remember a passing test as
1980 /* If a right edge has not been
1981 seen yet, then a future passing
1982 test will mark this edge as the
1984 if (right_edge[i] ==
1985 IO_IO_IN_DELAY_MAX + 1) {
1986 left_edge[i] = -(d + 1);
1989 /* d = 0 failed, but it passed
1990 when testing the left edge,
1991 so it must be marginal,
1993 if (right_edge[i] ==
1994 IO_IO_IN_DELAY_MAX + 1 &&
2000 /* If a right edge has not been
2001 seen yet, then a future passing
2002 test will mark this edge as the
2004 else if (right_edge[i] ==
2005 IO_IO_IN_DELAY_MAX +
2007 left_edge[i] = -(d + 1);
2012 debug_cond(DLEVEL == 2, "%s:%d vfifo_center[r,\
2013 d=%u]: ", __func__, __LINE__, d);
2014 debug_cond(DLEVEL == 2, "bit_chk_test=%d left_edge[%u]: %d ",
2015 (int)(bit_chk & 1), i, left_edge[i]);
2016 debug_cond(DLEVEL == 2, "right_edge[%u]: %d\n", i,
2018 bit_chk = bit_chk >> 1;
2023 /* Check that all bits have a window */
2024 for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) {
2025 debug_cond(DLEVEL == 2, "%s:%d vfifo_center: left_edge[%u]: \
2026 %d right_edge[%u]: %d", __func__, __LINE__,
2027 i, left_edge[i], i, right_edge[i]);
2028 if ((left_edge[i] == IO_IO_IN_DELAY_MAX + 1) || (right_edge[i]
2029 == IO_IO_IN_DELAY_MAX + 1)) {
2031 * Restore delay chain settings before letting the loop
2032 * in rw_mgr_mem_calibrate_vfifo to retry different
2033 * dqs/ck relationships.
2035 scc_mgr_set_dqs_bus_in_delay(read_group, start_dqs);
2036 if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) {
2037 scc_mgr_set_dqs_en_delay(read_group,
2040 scc_mgr_load_dqs(read_group);
2041 writel(0, &sdr_scc_mgr->update);
2043 debug_cond(DLEVEL == 1, "%s:%d vfifo_center: failed to \
2044 find edge [%u]: %d %d", __func__, __LINE__,
2045 i, left_edge[i], right_edge[i]);
2046 if (use_read_test) {
2047 set_failing_group_stage(read_group *
2048 RW_MGR_MEM_DQ_PER_READ_DQS + i,
2050 CAL_SUBSTAGE_VFIFO_CENTER);
2052 set_failing_group_stage(read_group *
2053 RW_MGR_MEM_DQ_PER_READ_DQS + i,
2054 CAL_STAGE_VFIFO_AFTER_WRITES,
2055 CAL_SUBSTAGE_VFIFO_CENTER);
2061 /* Find middle of window for each DQ bit */
2062 mid_min = left_edge[0] - right_edge[0];
2064 for (i = 1; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) {
2065 mid = left_edge[i] - right_edge[i];
2066 if (mid < mid_min) {
2073 * -mid_min/2 represents the amount that we need to move DQS.
2074 * If mid_min is odd and positive we'll need to add one to
2075 * make sure the rounding in further calculations is correct
2076 * (always bias to the right), so just add 1 for all positive values.
2081 mid_min = mid_min / 2;
2083 debug_cond(DLEVEL == 1, "%s:%d vfifo_center: mid_min=%d (index=%u)\n",
2084 __func__, __LINE__, mid_min, min_index);
2086 /* Determine the amount we can change DQS (which is -mid_min) */
2087 orig_mid_min = mid_min;
2088 new_dqs = start_dqs - mid_min;
2089 if (new_dqs > IO_DQS_IN_DELAY_MAX)
2090 new_dqs = IO_DQS_IN_DELAY_MAX;
2091 else if (new_dqs < 0)
2094 mid_min = start_dqs - new_dqs;
2095 debug_cond(DLEVEL == 1, "vfifo_center: new mid_min=%d new_dqs=%d\n",
2098 if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) {
2099 if (start_dqs_en - mid_min > IO_DQS_EN_DELAY_MAX)
2100 mid_min += start_dqs_en - mid_min - IO_DQS_EN_DELAY_MAX;
2101 else if (start_dqs_en - mid_min < 0)
2102 mid_min += start_dqs_en - mid_min;
2104 new_dqs = start_dqs - mid_min;
2106 debug_cond(DLEVEL == 1, "vfifo_center: start_dqs=%d start_dqs_en=%d \
2107 new_dqs=%d mid_min=%d\n", start_dqs,
2108 IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS ? start_dqs_en : -1,
2111 /* Initialize data for export structures */
2112 dqs_margin = IO_IO_IN_DELAY_MAX + 1;
2113 dq_margin = IO_IO_IN_DELAY_MAX + 1;
2115 /* add delay to bring centre of all DQ windows to the same "level" */
2116 for (i = 0, p = test_bgn; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++, p++) {
2117 /* Use values before divide by 2 to reduce round off error */
2118 shift_dq = (left_edge[i] - right_edge[i] -
2119 (left_edge[min_index] - right_edge[min_index]))/2 +
2120 (orig_mid_min - mid_min);
2122 debug_cond(DLEVEL == 2, "vfifo_center: before: \
2123 shift_dq[%u]=%d\n", i, shift_dq);
2125 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_IN_DELAY_OFFSET;
2126 temp_dq_in_delay1 = readl(addr + (p << 2));
2127 temp_dq_in_delay2 = readl(addr + (i << 2));
2129 if (shift_dq + (int32_t)temp_dq_in_delay1 >
2130 (int32_t)IO_IO_IN_DELAY_MAX) {
2131 shift_dq = (int32_t)IO_IO_IN_DELAY_MAX - temp_dq_in_delay2;
2132 } else if (shift_dq + (int32_t)temp_dq_in_delay1 < 0) {
2133 shift_dq = -(int32_t)temp_dq_in_delay1;
2135 debug_cond(DLEVEL == 2, "vfifo_center: after: \
2136 shift_dq[%u]=%d\n", i, shift_dq);
2137 final_dq[i] = temp_dq_in_delay1 + shift_dq;
2138 scc_mgr_set_dq_in_delay(p, final_dq[i]);
2141 debug_cond(DLEVEL == 2, "vfifo_center: margin[%u]=[%d,%d]\n", i,
2142 left_edge[i] - shift_dq + (-mid_min),
2143 right_edge[i] + shift_dq - (-mid_min));
2144 /* To determine values for export structures */
2145 if (left_edge[i] - shift_dq + (-mid_min) < dq_margin)
2146 dq_margin = left_edge[i] - shift_dq + (-mid_min);
2148 if (right_edge[i] + shift_dq - (-mid_min) < dqs_margin)
2149 dqs_margin = right_edge[i] + shift_dq - (-mid_min);
2152 final_dqs = new_dqs;
2153 if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS)
2154 final_dqs_en = start_dqs_en - mid_min;
2157 if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) {
2158 scc_mgr_set_dqs_en_delay(read_group, final_dqs_en);
2159 scc_mgr_load_dqs(read_group);
2163 scc_mgr_set_dqs_bus_in_delay(read_group, final_dqs);
2164 scc_mgr_load_dqs(read_group);
2165 debug_cond(DLEVEL == 2, "%s:%d vfifo_center: dq_margin=%d \
2166 dqs_margin=%d", __func__, __LINE__,
2167 dq_margin, dqs_margin);
2170 * Do not remove this line as it makes sure all of our decisions
2171 * have been applied. Apply the update bit.
2173 writel(0, &sdr_scc_mgr->update);
2175 return (dq_margin >= 0) && (dqs_margin >= 0);
2179 * calibrate the read valid prediction FIFO.
2181 * - read valid prediction will consist of finding a good DQS enable phase,
2182 * DQS enable delay, DQS input phase, and DQS input delay.
2183 * - we also do a per-bit deskew on the DQ lines.
2185 static uint32_t rw_mgr_mem_calibrate_vfifo(uint32_t read_group,
2188 uint32_t p, d, rank_bgn, sr;
2189 uint32_t dtaps_per_ptap;
2191 uint32_t grp_calibrated;
2192 uint32_t write_group, write_test_bgn;
2193 uint32_t failed_substage;
2195 debug("%s:%d: %u %u\n", __func__, __LINE__, read_group, test_bgn);
2197 /* update info for sims */
2198 reg_file_set_stage(CAL_STAGE_VFIFO);
2200 write_group = read_group;
2201 write_test_bgn = test_bgn;
2203 /* USER Determine number of delay taps for each phase tap */
2204 dtaps_per_ptap = DIV_ROUND_UP(IO_DELAY_PER_OPA_TAP,
2205 IO_DELAY_PER_DQS_EN_DCHAIN_TAP) - 1;
2207 /* update info for sims */
2208 reg_file_set_group(read_group);
2212 reg_file_set_sub_stage(CAL_SUBSTAGE_GUARANTEED_READ);
2213 failed_substage = CAL_SUBSTAGE_GUARANTEED_READ;
2215 for (d = 0; d <= dtaps_per_ptap && grp_calibrated == 0; d += 2) {
2217 * In RLDRAMX we may be messing the delay of pins in
2218 * the same write group but outside of the current read
2219 * the group, but that's ok because we haven't
2220 * calibrated output side yet.
2223 scc_mgr_apply_group_all_out_delay_add_all_ranks(
2227 for (p = 0; p <= IO_DQDQS_OUT_PHASE_MAX && grp_calibrated == 0;
2229 /* set a particular dqdqs phase */
2230 scc_mgr_set_dqdqs_output_phase_all_ranks(read_group, p);
2232 debug_cond(DLEVEL == 1, "%s:%d calibrate_vfifo: g=%u \
2233 p=%u d=%u\n", __func__, __LINE__,
2237 * Load up the patterns used by read calibration
2238 * using current DQDQS phase.
2240 rw_mgr_mem_calibrate_read_load_patterns(0, 1);
2241 if (!(gbl->phy_debug_mode_flags &
2242 PHY_DEBUG_DISABLE_GUARANTEED_READ)) {
2243 if (!rw_mgr_mem_calibrate_read_test_patterns_all_ranks
2244 (read_group, 1, &bit_chk)) {
2245 debug_cond(DLEVEL == 1, "%s:%d Guaranteed read test failed:",
2246 __func__, __LINE__);
2247 debug_cond(DLEVEL == 1, " g=%u p=%u d=%u\n",
2255 if (rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase_sweep_dq_in_delay
2256 (write_group, read_group, test_bgn)) {
2258 * USER Read per-bit deskew can be done on a
2259 * per shadow register basis.
2261 for (rank_bgn = 0, sr = 0;
2262 rank_bgn < RW_MGR_MEM_NUMBER_OF_RANKS;
2263 rank_bgn += NUM_RANKS_PER_SHADOW_REG,
2266 * Determine if this set of ranks
2267 * should be skipped entirely.
2269 if (!param->skip_shadow_regs[sr]) {
2271 * If doing read after write
2272 * calibration, do not update
2273 * FOM, now - do it then.
2275 if (!rw_mgr_mem_calibrate_vfifo_center
2276 (rank_bgn, write_group,
2277 read_group, test_bgn, 1, 0)) {
2280 CAL_SUBSTAGE_VFIFO_CENTER;
2286 failed_substage = CAL_SUBSTAGE_DQS_EN_PHASE;
2291 if (grp_calibrated == 0) {
2292 set_failing_group_stage(write_group, CAL_STAGE_VFIFO,
2298 * Reset the delay chains back to zero if they have moved > 1
2299 * (check for > 1 because loop will increase d even when pass in
2303 scc_mgr_zero_group(write_group, 1);
2308 /* VFIFO Calibration -- Read Deskew Calibration after write deskew */
2309 static uint32_t rw_mgr_mem_calibrate_vfifo_end(uint32_t read_group,
2312 uint32_t rank_bgn, sr;
2313 uint32_t grp_calibrated;
2314 uint32_t write_group;
2316 debug("%s:%d %u %u", __func__, __LINE__, read_group, test_bgn);
2318 /* update info for sims */
2320 reg_file_set_stage(CAL_STAGE_VFIFO_AFTER_WRITES);
2321 reg_file_set_sub_stage(CAL_SUBSTAGE_VFIFO_CENTER);
2323 write_group = read_group;
2325 /* update info for sims */
2326 reg_file_set_group(read_group);
2329 /* Read per-bit deskew can be done on a per shadow register basis */
2330 for (rank_bgn = 0, sr = 0; rank_bgn < RW_MGR_MEM_NUMBER_OF_RANKS;
2331 rank_bgn += NUM_RANKS_PER_SHADOW_REG, ++sr) {
2332 /* Determine if this set of ranks should be skipped entirely */
2333 if (!param->skip_shadow_regs[sr]) {
2334 /* This is the last calibration round, update FOM here */
2335 if (!rw_mgr_mem_calibrate_vfifo_center(rank_bgn,
2346 if (grp_calibrated == 0) {
2347 set_failing_group_stage(write_group,
2348 CAL_STAGE_VFIFO_AFTER_WRITES,
2349 CAL_SUBSTAGE_VFIFO_CENTER);
2356 /* Calibrate LFIFO to find smallest read latency */
2357 static uint32_t rw_mgr_mem_calibrate_lfifo(void)
2362 debug("%s:%d\n", __func__, __LINE__);
2364 /* update info for sims */
2365 reg_file_set_stage(CAL_STAGE_LFIFO);
2366 reg_file_set_sub_stage(CAL_SUBSTAGE_READ_LATENCY);
2368 /* Load up the patterns used by read calibration for all ranks */
2369 rw_mgr_mem_calibrate_read_load_patterns(0, 1);
2373 writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat);
2374 debug_cond(DLEVEL == 2, "%s:%d lfifo: read_lat=%u",
2375 __func__, __LINE__, gbl->curr_read_lat);
2377 if (!rw_mgr_mem_calibrate_read_test_all_ranks(0,
2385 /* reduce read latency and see if things are working */
2387 gbl->curr_read_lat--;
2388 } while (gbl->curr_read_lat > 0);
2390 /* reset the fifos to get pointers to known state */
2392 writel(0, &phy_mgr_cmd->fifo_reset);
2395 /* add a fudge factor to the read latency that was determined */
2396 gbl->curr_read_lat += 2;
2397 writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat);
2398 debug_cond(DLEVEL == 2, "%s:%d lfifo: success: using \
2399 read_lat=%u\n", __func__, __LINE__,
2400 gbl->curr_read_lat);
2403 set_failing_group_stage(0xff, CAL_STAGE_LFIFO,
2404 CAL_SUBSTAGE_READ_LATENCY);
2406 debug_cond(DLEVEL == 2, "%s:%d lfifo: failed at initial \
2407 read_lat=%u\n", __func__, __LINE__,
2408 gbl->curr_read_lat);
2414 * issue write test command.
2415 * two variants are provided. one that just tests a write pattern and
2416 * another that tests datamask functionality.
2418 static void rw_mgr_mem_calibrate_write_test_issue(uint32_t group,
2421 uint32_t mcc_instruction;
2422 uint32_t quick_write_mode = (((STATIC_CALIB_STEPS) & CALIB_SKIP_WRITES) &&
2423 ENABLE_SUPER_QUICK_CALIBRATION);
2424 uint32_t rw_wl_nop_cycles;
2428 * Set counter and jump addresses for the right
2429 * number of NOP cycles.
2430 * The number of supported NOP cycles can range from -1 to infinity
2431 * Three different cases are handled:
2433 * 1. For a number of NOP cycles greater than 0, the RW Mgr looping
2434 * mechanism will be used to insert the right number of NOPs
2436 * 2. For a number of NOP cycles equals to 0, the micro-instruction
2437 * issuing the write command will jump straight to the
2438 * micro-instruction that turns on DQS (for DDRx), or outputs write
2439 * data (for RLD), skipping
2440 * the NOP micro-instruction all together
2442 * 3. A number of NOP cycles equal to -1 indicates that DQS must be
2443 * turned on in the same micro-instruction that issues the write
2444 * command. Then we need
2445 * to directly jump to the micro-instruction that sends out the data
2447 * NOTE: Implementing this mechanism uses 2 RW Mgr jump-counters
2448 * (2 and 3). One jump-counter (0) is used to perform multiple
2449 * write-read operations.
2450 * one counter left to issue this command in "multiple-group" mode
2453 rw_wl_nop_cycles = gbl->rw_wl_nop_cycles;
2455 if (rw_wl_nop_cycles == -1) {
2457 * CNTR 2 - We want to execute the special write operation that
2458 * turns on DQS right away and then skip directly to the
2459 * instruction that sends out the data. We set the counter to a
2460 * large number so that the jump is always taken.
2462 writel(0xFF, &sdr_rw_load_mgr_regs->load_cntr2);
2464 /* CNTR 3 - Not used */
2466 mcc_instruction = RW_MGR_LFSR_WR_RD_DM_BANK_0_WL_1;
2467 writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_DATA,
2468 &sdr_rw_load_jump_mgr_regs->load_jump_add2);
2469 writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_NOP,
2470 &sdr_rw_load_jump_mgr_regs->load_jump_add3);
2472 mcc_instruction = RW_MGR_LFSR_WR_RD_BANK_0_WL_1;
2473 writel(RW_MGR_LFSR_WR_RD_BANK_0_DATA,
2474 &sdr_rw_load_jump_mgr_regs->load_jump_add2);
2475 writel(RW_MGR_LFSR_WR_RD_BANK_0_NOP,
2476 &sdr_rw_load_jump_mgr_regs->load_jump_add3);
2478 } else if (rw_wl_nop_cycles == 0) {
2480 * CNTR 2 - We want to skip the NOP operation and go straight
2481 * to the DQS enable instruction. We set the counter to a large
2482 * number so that the jump is always taken.
2484 writel(0xFF, &sdr_rw_load_mgr_regs->load_cntr2);
2486 /* CNTR 3 - Not used */
2488 mcc_instruction = RW_MGR_LFSR_WR_RD_DM_BANK_0;
2489 writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_DQS,
2490 &sdr_rw_load_jump_mgr_regs->load_jump_add2);
2492 mcc_instruction = RW_MGR_LFSR_WR_RD_BANK_0;
2493 writel(RW_MGR_LFSR_WR_RD_BANK_0_DQS,
2494 &sdr_rw_load_jump_mgr_regs->load_jump_add2);
2498 * CNTR 2 - In this case we want to execute the next instruction
2499 * and NOT take the jump. So we set the counter to 0. The jump
2500 * address doesn't count.
2502 writel(0x0, &sdr_rw_load_mgr_regs->load_cntr2);
2503 writel(0x0, &sdr_rw_load_jump_mgr_regs->load_jump_add2);
2506 * CNTR 3 - Set the nop counter to the number of cycles we
2507 * need to loop for, minus 1.
2509 writel(rw_wl_nop_cycles - 1, &sdr_rw_load_mgr_regs->load_cntr3);
2511 mcc_instruction = RW_MGR_LFSR_WR_RD_DM_BANK_0;
2512 writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_NOP,
2513 &sdr_rw_load_jump_mgr_regs->load_jump_add3);
2515 mcc_instruction = RW_MGR_LFSR_WR_RD_BANK_0;
2516 writel(RW_MGR_LFSR_WR_RD_BANK_0_NOP,
2517 &sdr_rw_load_jump_mgr_regs->load_jump_add3);
2521 writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS |
2522 RW_MGR_RESET_READ_DATAPATH_OFFSET);
2524 if (quick_write_mode)
2525 writel(0x08, &sdr_rw_load_mgr_regs->load_cntr0);
2527 writel(0x40, &sdr_rw_load_mgr_regs->load_cntr0);
2529 writel(mcc_instruction, &sdr_rw_load_jump_mgr_regs->load_jump_add0);
2532 * CNTR 1 - This is used to ensure enough time elapses
2533 * for read data to come back.
2535 writel(0x30, &sdr_rw_load_mgr_regs->load_cntr1);
2538 writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_WAIT,
2539 &sdr_rw_load_jump_mgr_regs->load_jump_add1);
2541 writel(RW_MGR_LFSR_WR_RD_BANK_0_WAIT,
2542 &sdr_rw_load_jump_mgr_regs->load_jump_add1);
2545 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET;
2546 writel(mcc_instruction, addr + (group << 2));
2549 /* Test writes, can check for a single bit pass or multiple bit pass */
2550 static uint32_t rw_mgr_mem_calibrate_write_test(uint32_t rank_bgn,
2551 uint32_t write_group, uint32_t use_dm, uint32_t all_correct,
2552 uint32_t *bit_chk, uint32_t all_ranks)
2555 uint32_t correct_mask_vg;
2556 uint32_t tmp_bit_chk;
2558 uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS :
2559 (rank_bgn + NUM_RANKS_PER_SHADOW_REG);
2560 uint32_t addr_rw_mgr;
2561 uint32_t base_rw_mgr;
2563 *bit_chk = param->write_correct_mask;
2564 correct_mask_vg = param->write_correct_mask_vg;
2566 for (r = rank_bgn; r < rank_end; r++) {
2567 if (param->skip_ranks[r]) {
2568 /* request to skip the rank */
2573 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE);
2576 addr_rw_mgr = SDR_PHYGRP_RWMGRGRP_ADDRESS;
2577 for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS-1; ; vg--) {
2578 /* reset the fifos to get pointers to known state */
2579 writel(0, &phy_mgr_cmd->fifo_reset);
2581 tmp_bit_chk = tmp_bit_chk <<
2582 (RW_MGR_MEM_DQ_PER_WRITE_DQS /
2583 RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS);
2584 rw_mgr_mem_calibrate_write_test_issue(write_group *
2585 RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS+vg,
2588 base_rw_mgr = readl(addr_rw_mgr);
2589 tmp_bit_chk = tmp_bit_chk | (correct_mask_vg & ~(base_rw_mgr));
2593 *bit_chk &= tmp_bit_chk;
2597 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
2598 debug_cond(DLEVEL == 2, "write_test(%u,%u,ALL) : %u == \
2599 %u => %lu", write_group, use_dm,
2600 *bit_chk, param->write_correct_mask,
2601 (long unsigned int)(*bit_chk ==
2602 param->write_correct_mask));
2603 return *bit_chk == param->write_correct_mask;
2605 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
2606 debug_cond(DLEVEL == 2, "write_test(%u,%u,ONE) : %u != ",
2607 write_group, use_dm, *bit_chk);
2608 debug_cond(DLEVEL == 2, "%lu" " => %lu", (long unsigned int)0,
2609 (long unsigned int)(*bit_chk != 0));
2610 return *bit_chk != 0x00;
2615 * center all windows. do per-bit-deskew to possibly increase size of
2618 static uint32_t rw_mgr_mem_calibrate_writes_center(uint32_t rank_bgn,
2619 uint32_t write_group, uint32_t test_bgn)
2621 uint32_t i, p, min_index;
2624 * Store these as signed since there are comparisons with
2628 uint32_t sticky_bit_chk;
2629 int32_t left_edge[RW_MGR_MEM_DQ_PER_WRITE_DQS];
2630 int32_t right_edge[RW_MGR_MEM_DQ_PER_WRITE_DQS];
2632 int32_t mid_min, orig_mid_min;
2633 int32_t new_dqs, start_dqs, shift_dq;
2634 int32_t dq_margin, dqs_margin, dm_margin;
2636 uint32_t temp_dq_out1_delay;
2639 debug("%s:%d %u %u", __func__, __LINE__, write_group, test_bgn);
2643 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_OUT1_DELAY_OFFSET;
2644 start_dqs = readl(addr +
2645 (RW_MGR_MEM_DQ_PER_WRITE_DQS << 2));
2647 /* per-bit deskew */
2650 * set the left and right edge of each bit to an illegal value
2651 * use (IO_IO_OUT1_DELAY_MAX + 1) as an illegal value.
2654 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
2655 left_edge[i] = IO_IO_OUT1_DELAY_MAX + 1;
2656 right_edge[i] = IO_IO_OUT1_DELAY_MAX + 1;
2659 /* Search for the left edge of the window for each bit */
2660 for (d = 0; d <= IO_IO_OUT1_DELAY_MAX; d++) {
2661 scc_mgr_apply_group_dq_out1_delay(write_group, d);
2663 writel(0, &sdr_scc_mgr->update);
2666 * Stop searching when the read test doesn't pass AND when
2667 * we've seen a passing read on every bit.
2669 stop = !rw_mgr_mem_calibrate_write_test(rank_bgn, write_group,
2670 0, PASS_ONE_BIT, &bit_chk, 0);
2671 sticky_bit_chk = sticky_bit_chk | bit_chk;
2672 stop = stop && (sticky_bit_chk == param->write_correct_mask);
2673 debug_cond(DLEVEL == 2, "write_center(left): dtap=%d => %u \
2674 == %u && %u [bit_chk= %u ]\n",
2675 d, sticky_bit_chk, param->write_correct_mask,
2681 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
2684 * Remember a passing test as the
2690 * If a left edge has not been seen
2691 * yet, then a future passing test will
2692 * mark this edge as the right edge.
2695 IO_IO_OUT1_DELAY_MAX + 1) {
2696 right_edge[i] = -(d + 1);
2699 debug_cond(DLEVEL == 2, "write_center[l,d=%d):", d);
2700 debug_cond(DLEVEL == 2, "bit_chk_test=%d left_edge[%u]: %d",
2701 (int)(bit_chk & 1), i, left_edge[i]);
2702 debug_cond(DLEVEL == 2, "right_edge[%u]: %d\n", i,
2704 bit_chk = bit_chk >> 1;
2709 /* Reset DQ delay chains to 0 */
2710 scc_mgr_apply_group_dq_out1_delay(0);
2712 for (i = RW_MGR_MEM_DQ_PER_WRITE_DQS - 1;; i--) {
2713 debug_cond(DLEVEL == 2, "%s:%d write_center: left_edge[%u]: \
2714 %d right_edge[%u]: %d\n", __func__, __LINE__,
2715 i, left_edge[i], i, right_edge[i]);
2718 * Check for cases where we haven't found the left edge,
2719 * which makes our assignment of the the right edge invalid.
2720 * Reset it to the illegal value.
2722 if ((left_edge[i] == IO_IO_OUT1_DELAY_MAX + 1) &&
2723 (right_edge[i] != IO_IO_OUT1_DELAY_MAX + 1)) {
2724 right_edge[i] = IO_IO_OUT1_DELAY_MAX + 1;
2725 debug_cond(DLEVEL == 2, "%s:%d write_center: reset \
2726 right_edge[%u]: %d\n", __func__, __LINE__,
2731 * Reset sticky bit (except for bits where we have
2732 * seen the left edge).
2734 sticky_bit_chk = sticky_bit_chk << 1;
2735 if ((left_edge[i] != IO_IO_OUT1_DELAY_MAX + 1))
2736 sticky_bit_chk = sticky_bit_chk | 1;
2742 /* Search for the right edge of the window for each bit */
2743 for (d = 0; d <= IO_IO_OUT1_DELAY_MAX - start_dqs; d++) {
2744 scc_mgr_apply_group_dqs_io_and_oct_out1(write_group,
2747 writel(0, &sdr_scc_mgr->update);
2750 * Stop searching when the read test doesn't pass AND when
2751 * we've seen a passing read on every bit.
2753 stop = !rw_mgr_mem_calibrate_write_test(rank_bgn, write_group,
2754 0, PASS_ONE_BIT, &bit_chk, 0);
2756 sticky_bit_chk = sticky_bit_chk | bit_chk;
2757 stop = stop && (sticky_bit_chk == param->write_correct_mask);
2759 debug_cond(DLEVEL == 2, "write_center (right): dtap=%u => %u == \
2760 %u && %u\n", d, sticky_bit_chk,
2761 param->write_correct_mask, stop);
2765 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS;
2767 /* d = 0 failed, but it passed when
2768 testing the left edge, so it must be
2769 marginal, set it to -1 */
2770 if (right_edge[i] ==
2771 IO_IO_OUT1_DELAY_MAX + 1 &&
2773 IO_IO_OUT1_DELAY_MAX + 1) {
2780 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
2783 * Remember a passing test as
2790 * If a right edge has not
2791 * been seen yet, then a future
2792 * passing test will mark this
2793 * edge as the left edge.
2795 if (right_edge[i] ==
2796 IO_IO_OUT1_DELAY_MAX + 1)
2797 left_edge[i] = -(d + 1);
2800 * d = 0 failed, but it passed
2801 * when testing the left edge,
2802 * so it must be marginal, set
2805 if (right_edge[i] ==
2806 IO_IO_OUT1_DELAY_MAX + 1 &&
2808 IO_IO_OUT1_DELAY_MAX + 1)
2811 * If a right edge has not been
2812 * seen yet, then a future
2813 * passing test will mark this
2814 * edge as the left edge.
2816 else if (right_edge[i] ==
2817 IO_IO_OUT1_DELAY_MAX +
2819 left_edge[i] = -(d + 1);
2822 debug_cond(DLEVEL == 2, "write_center[r,d=%d):", d);
2823 debug_cond(DLEVEL == 2, "bit_chk_test=%d left_edge[%u]: %d",
2824 (int)(bit_chk & 1), i, left_edge[i]);
2825 debug_cond(DLEVEL == 2, "right_edge[%u]: %d\n", i,
2827 bit_chk = bit_chk >> 1;
2832 /* Check that all bits have a window */
2833 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
2834 debug_cond(DLEVEL == 2, "%s:%d write_center: left_edge[%u]: \
2835 %d right_edge[%u]: %d", __func__, __LINE__,
2836 i, left_edge[i], i, right_edge[i]);
2837 if ((left_edge[i] == IO_IO_OUT1_DELAY_MAX + 1) ||
2838 (right_edge[i] == IO_IO_OUT1_DELAY_MAX + 1)) {
2839 set_failing_group_stage(test_bgn + i,
2841 CAL_SUBSTAGE_WRITES_CENTER);
2846 /* Find middle of window for each DQ bit */
2847 mid_min = left_edge[0] - right_edge[0];
2849 for (i = 1; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
2850 mid = left_edge[i] - right_edge[i];
2851 if (mid < mid_min) {
2858 * -mid_min/2 represents the amount that we need to move DQS.
2859 * If mid_min is odd and positive we'll need to add one to
2860 * make sure the rounding in further calculations is correct
2861 * (always bias to the right), so just add 1 for all positive values.
2865 mid_min = mid_min / 2;
2866 debug_cond(DLEVEL == 1, "%s:%d write_center: mid_min=%d\n", __func__,
2869 /* Determine the amount we can change DQS (which is -mid_min) */
2870 orig_mid_min = mid_min;
2871 new_dqs = start_dqs;
2873 debug_cond(DLEVEL == 1, "%s:%d write_center: start_dqs=%d new_dqs=%d \
2874 mid_min=%d\n", __func__, __LINE__, start_dqs, new_dqs, mid_min);
2875 /* Initialize data for export structures */
2876 dqs_margin = IO_IO_OUT1_DELAY_MAX + 1;
2877 dq_margin = IO_IO_OUT1_DELAY_MAX + 1;
2879 /* add delay to bring centre of all DQ windows to the same "level" */
2880 for (i = 0, p = test_bgn; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++, p++) {
2881 /* Use values before divide by 2 to reduce round off error */
2882 shift_dq = (left_edge[i] - right_edge[i] -
2883 (left_edge[min_index] - right_edge[min_index]))/2 +
2884 (orig_mid_min - mid_min);
2886 debug_cond(DLEVEL == 2, "%s:%d write_center: before: shift_dq \
2887 [%u]=%d\n", __func__, __LINE__, i, shift_dq);
2889 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_OUT1_DELAY_OFFSET;
2890 temp_dq_out1_delay = readl(addr + (i << 2));
2891 if (shift_dq + (int32_t)temp_dq_out1_delay >
2892 (int32_t)IO_IO_OUT1_DELAY_MAX) {
2893 shift_dq = (int32_t)IO_IO_OUT1_DELAY_MAX - temp_dq_out1_delay;
2894 } else if (shift_dq + (int32_t)temp_dq_out1_delay < 0) {
2895 shift_dq = -(int32_t)temp_dq_out1_delay;
2897 debug_cond(DLEVEL == 2, "write_center: after: shift_dq[%u]=%d\n",
2899 scc_mgr_set_dq_out1_delay(i, temp_dq_out1_delay + shift_dq);
2902 debug_cond(DLEVEL == 2, "write_center: margin[%u]=[%d,%d]\n", i,
2903 left_edge[i] - shift_dq + (-mid_min),
2904 right_edge[i] + shift_dq - (-mid_min));
2905 /* To determine values for export structures */
2906 if (left_edge[i] - shift_dq + (-mid_min) < dq_margin)
2907 dq_margin = left_edge[i] - shift_dq + (-mid_min);
2909 if (right_edge[i] + shift_dq - (-mid_min) < dqs_margin)
2910 dqs_margin = right_edge[i] + shift_dq - (-mid_min);
2914 scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, new_dqs);
2915 writel(0, &sdr_scc_mgr->update);
2918 debug_cond(DLEVEL == 2, "%s:%d write_center: DM\n", __func__, __LINE__);
2921 * set the left and right edge of each bit to an illegal value,
2922 * use (IO_IO_OUT1_DELAY_MAX + 1) as an illegal value,
2924 left_edge[0] = IO_IO_OUT1_DELAY_MAX + 1;
2925 right_edge[0] = IO_IO_OUT1_DELAY_MAX + 1;
2926 int32_t bgn_curr = IO_IO_OUT1_DELAY_MAX + 1;
2927 int32_t end_curr = IO_IO_OUT1_DELAY_MAX + 1;
2928 int32_t bgn_best = IO_IO_OUT1_DELAY_MAX + 1;
2929 int32_t end_best = IO_IO_OUT1_DELAY_MAX + 1;
2930 int32_t win_best = 0;
2932 /* Search for the/part of the window with DM shift */
2933 for (d = IO_IO_OUT1_DELAY_MAX; d >= 0; d -= DELTA_D) {
2934 scc_mgr_apply_group_dm_out1_delay(d);
2935 writel(0, &sdr_scc_mgr->update);
2937 if (rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 1,
2938 PASS_ALL_BITS, &bit_chk,
2940 /* USE Set current end of the window */
2943 * If a starting edge of our window has not been seen
2944 * this is our current start of the DM window.
2946 if (bgn_curr == IO_IO_OUT1_DELAY_MAX + 1)
2950 * If current window is bigger than best seen.
2951 * Set best seen to be current window.
2953 if ((end_curr-bgn_curr+1) > win_best) {
2954 win_best = end_curr-bgn_curr+1;
2955 bgn_best = bgn_curr;
2956 end_best = end_curr;
2959 /* We just saw a failing test. Reset temp edge */
2960 bgn_curr = IO_IO_OUT1_DELAY_MAX + 1;
2961 end_curr = IO_IO_OUT1_DELAY_MAX + 1;
2966 /* Reset DM delay chains to 0 */
2967 scc_mgr_apply_group_dm_out1_delay(0);
2970 * Check to see if the current window nudges up aganist 0 delay.
2971 * If so we need to continue the search by shifting DQS otherwise DQS
2972 * search begins as a new search. */
2973 if (end_curr != 0) {
2974 bgn_curr = IO_IO_OUT1_DELAY_MAX + 1;
2975 end_curr = IO_IO_OUT1_DELAY_MAX + 1;
2978 /* Search for the/part of the window with DQS shifts */
2979 for (d = 0; d <= IO_IO_OUT1_DELAY_MAX - new_dqs; d += DELTA_D) {
2981 * Note: This only shifts DQS, so are we limiting ourselve to
2982 * width of DQ unnecessarily.
2984 scc_mgr_apply_group_dqs_io_and_oct_out1(write_group,
2987 writel(0, &sdr_scc_mgr->update);
2988 if (rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 1,
2989 PASS_ALL_BITS, &bit_chk,
2991 /* USE Set current end of the window */
2994 * If a beginning edge of our window has not been seen
2995 * this is our current begin of the DM window.
2997 if (bgn_curr == IO_IO_OUT1_DELAY_MAX + 1)
3001 * If current window is bigger than best seen. Set best
3002 * seen to be current window.
3004 if ((end_curr-bgn_curr+1) > win_best) {
3005 win_best = end_curr-bgn_curr+1;
3006 bgn_best = bgn_curr;
3007 end_best = end_curr;
3010 /* We just saw a failing test. Reset temp edge */
3011 bgn_curr = IO_IO_OUT1_DELAY_MAX + 1;
3012 end_curr = IO_IO_OUT1_DELAY_MAX + 1;
3014 /* Early exit optimization: if ther remaining delay
3015 chain space is less than already seen largest window
3018 (IO_IO_OUT1_DELAY_MAX - new_dqs - d)) {
3024 /* assign left and right edge for cal and reporting; */
3025 left_edge[0] = -1*bgn_best;
3026 right_edge[0] = end_best;
3028 debug_cond(DLEVEL == 2, "%s:%d dm_calib: left=%d right=%d\n", __func__,
3029 __LINE__, left_edge[0], right_edge[0]);
3031 /* Move DQS (back to orig) */
3032 scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, new_dqs);
3036 /* Find middle of window for the DM bit */
3037 mid = (left_edge[0] - right_edge[0]) / 2;
3039 /* only move right, since we are not moving DQS/DQ */
3043 /* dm_marign should fail if we never find a window */
3047 dm_margin = left_edge[0] - mid;
3049 scc_mgr_apply_group_dm_out1_delay(mid);
3050 writel(0, &sdr_scc_mgr->update);
3052 debug_cond(DLEVEL == 2, "%s:%d dm_calib: left=%d right=%d mid=%d \
3053 dm_margin=%d\n", __func__, __LINE__, left_edge[0],
3054 right_edge[0], mid, dm_margin);
3056 gbl->fom_out += dq_margin + dqs_margin;
3058 debug_cond(DLEVEL == 2, "%s:%d write_center: dq_margin=%d \
3059 dqs_margin=%d dm_margin=%d\n", __func__, __LINE__,
3060 dq_margin, dqs_margin, dm_margin);
3063 * Do not remove this line as it makes sure all of our
3064 * decisions have been applied.
3066 writel(0, &sdr_scc_mgr->update);
3067 return (dq_margin >= 0) && (dqs_margin >= 0) && (dm_margin >= 0);
3070 /* calibrate the write operations */
3071 static uint32_t rw_mgr_mem_calibrate_writes(uint32_t rank_bgn, uint32_t g,
3074 /* update info for sims */
3075 debug("%s:%d %u %u\n", __func__, __LINE__, g, test_bgn);
3077 reg_file_set_stage(CAL_STAGE_WRITES);
3078 reg_file_set_sub_stage(CAL_SUBSTAGE_WRITES_CENTER);
3080 reg_file_set_group(g);
3082 if (!rw_mgr_mem_calibrate_writes_center(rank_bgn, g, test_bgn)) {
3083 set_failing_group_stage(g, CAL_STAGE_WRITES,
3084 CAL_SUBSTAGE_WRITES_CENTER);
3092 * mem_precharge_and_activate() - Precharge all banks and activate
3094 * Precharge all banks and activate row 0 in bank "000..." and bank "111...".
3096 static void mem_precharge_and_activate(void)
3100 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r++) {
3101 /* Test if the rank should be skipped. */
3102 if (param->skip_ranks[r])
3106 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF);
3108 /* Precharge all banks. */
3109 writel(RW_MGR_PRECHARGE_ALL, SDR_PHYGRP_RWMGRGRP_ADDRESS |
3110 RW_MGR_RUN_SINGLE_GROUP_OFFSET);
3112 writel(0x0F, &sdr_rw_load_mgr_regs->load_cntr0);
3113 writel(RW_MGR_ACTIVATE_0_AND_1_WAIT1,
3114 &sdr_rw_load_jump_mgr_regs->load_jump_add0);
3116 writel(0x0F, &sdr_rw_load_mgr_regs->load_cntr1);
3117 writel(RW_MGR_ACTIVATE_0_AND_1_WAIT2,
3118 &sdr_rw_load_jump_mgr_regs->load_jump_add1);
3120 /* Activate rows. */
3121 writel(RW_MGR_ACTIVATE_0_AND_1, SDR_PHYGRP_RWMGRGRP_ADDRESS |
3122 RW_MGR_RUN_SINGLE_GROUP_OFFSET);
3127 * mem_init_latency() - Configure memory RLAT and WLAT settings
3129 * Configure memory RLAT and WLAT parameters.
3131 static void mem_init_latency(void)
3134 * For AV/CV, LFIFO is hardened and always runs at full rate
3135 * so max latency in AFI clocks, used here, is correspondingly
3138 const u32 max_latency = (1 << MAX_LATENCY_COUNT_WIDTH) - 1;
3141 debug("%s:%d\n", __func__, __LINE__);
3144 * Read in write latency.
3145 * WL for Hard PHY does not include additive latency.
3147 wlat = readl(&data_mgr->t_wl_add);
3148 wlat += readl(&data_mgr->mem_t_add);
3150 gbl->rw_wl_nop_cycles = wlat - 1;
3152 /* Read in readl latency. */
3153 rlat = readl(&data_mgr->t_rl_add);
3155 /* Set a pretty high read latency initially. */
3156 gbl->curr_read_lat = rlat + 16;
3157 if (gbl->curr_read_lat > max_latency)
3158 gbl->curr_read_lat = max_latency;
3160 writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat);
3162 /* Advertise write latency. */
3163 writel(wlat, &phy_mgr_cfg->afi_wlat);
3166 /* Set VFIFO and LFIFO to instant-on settings in skip calibration mode */
3167 static void mem_skip_calibrate(void)
3169 uint32_t vfifo_offset;
3172 debug("%s:%d\n", __func__, __LINE__);
3173 /* Need to update every shadow register set used by the interface */
3174 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
3175 r += NUM_RANKS_PER_SHADOW_REG) {
3177 * Set output phase alignment settings appropriate for
3180 for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) {
3181 scc_mgr_set_dqs_en_phase(i, 0);
3182 #if IO_DLL_CHAIN_LENGTH == 6
3183 scc_mgr_set_dqdqs_output_phase(i, 6);
3185 scc_mgr_set_dqdqs_output_phase(i, 7);
3190 * Write data arrives to the I/O two cycles before write
3191 * latency is reached (720 deg).
3192 * -> due to bit-slip in a/c bus
3193 * -> to allow board skew where dqs is longer than ck
3194 * -> how often can this happen!?
3195 * -> can claim back some ptaps for high freq
3196 * support if we can relax this, but i digress...
3198 * The write_clk leads mem_ck by 90 deg
3199 * The minimum ptap of the OPA is 180 deg
3200 * Each ptap has (360 / IO_DLL_CHAIN_LENGH) deg of delay
3201 * The write_clk is always delayed by 2 ptaps
3203 * Hence, to make DQS aligned to CK, we need to delay
3205 * (720 - 90 - 180 - 2 * (360 / IO_DLL_CHAIN_LENGTH))
3207 * Dividing the above by (360 / IO_DLL_CHAIN_LENGTH)
3208 * gives us the number of ptaps, which simplies to:
3210 * (1.25 * IO_DLL_CHAIN_LENGTH - 2)
3212 scc_mgr_set_dqdqs_output_phase(i, (1.25 *
3213 IO_DLL_CHAIN_LENGTH - 2));
3215 writel(0xff, &sdr_scc_mgr->dqs_ena);
3216 writel(0xff, &sdr_scc_mgr->dqs_io_ena);
3218 for (i = 0; i < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; i++) {
3219 writel(i, SDR_PHYGRP_SCCGRP_ADDRESS |
3220 SCC_MGR_GROUP_COUNTER_OFFSET);
3222 writel(0xff, &sdr_scc_mgr->dq_ena);
3223 writel(0xff, &sdr_scc_mgr->dm_ena);
3224 writel(0, &sdr_scc_mgr->update);
3227 /* Compensate for simulation model behaviour */
3228 for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) {
3229 scc_mgr_set_dqs_bus_in_delay(i, 10);
3230 scc_mgr_load_dqs(i);
3232 writel(0, &sdr_scc_mgr->update);
3235 * ArriaV has hard FIFOs that can only be initialized by incrementing
3238 vfifo_offset = CALIB_VFIFO_OFFSET;
3239 for (j = 0; j < vfifo_offset; j++) {
3240 writel(0xff, &phy_mgr_cmd->inc_vfifo_hard_phy);
3242 writel(0, &phy_mgr_cmd->fifo_reset);
3245 * For ACV with hard lfifo, we get the skip-cal setting from
3246 * generation-time constant.
3248 gbl->curr_read_lat = CALIB_LFIFO_OFFSET;
3249 writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat);
3252 /* Memory calibration entry point */
3253 static uint32_t mem_calibrate(void)
3256 uint32_t rank_bgn, sr;
3257 uint32_t write_group, write_test_bgn;
3258 uint32_t read_group, read_test_bgn;
3259 uint32_t run_groups, current_run;
3260 uint32_t failing_groups = 0;
3261 uint32_t group_failed = 0;
3262 uint32_t sr_failed = 0;
3264 debug("%s:%d\n", __func__, __LINE__);
3266 /* Initialize the data settings */
3267 gbl->error_substage = CAL_SUBSTAGE_NIL;
3268 gbl->error_stage = CAL_STAGE_NIL;
3269 gbl->error_group = 0xff;
3273 /* Initialize WLAT and RLAT. */
3276 /* Initialize bit slips. */
3277 mem_precharge_and_activate();
3279 for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) {
3280 writel(i, SDR_PHYGRP_SCCGRP_ADDRESS |
3281 SCC_MGR_GROUP_COUNTER_OFFSET);
3282 /* Only needed once to set all groups, pins, DQ, DQS, DM. */
3284 scc_mgr_set_hhp_extras();
3286 scc_set_bypass_mode(i);
3289 if ((dyn_calib_steps & CALIB_SKIP_ALL) == CALIB_SKIP_ALL) {
3291 * Set VFIFO and LFIFO to instant-on settings in skip
3294 mem_skip_calibrate();
3296 for (i = 0; i < NUM_CALIB_REPEAT; i++) {
3298 * Zero all delay chain/phase settings for all
3299 * groups and all shadow register sets.
3303 run_groups = ~param->skip_groups;
3305 for (write_group = 0, write_test_bgn = 0; write_group
3306 < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; write_group++,
3307 write_test_bgn += RW_MGR_MEM_DQ_PER_WRITE_DQS) {
3308 /* Initialized the group failure */
3311 current_run = run_groups & ((1 <<
3312 RW_MGR_NUM_DQS_PER_WRITE_GROUP) - 1);
3313 run_groups = run_groups >>
3314 RW_MGR_NUM_DQS_PER_WRITE_GROUP;
3316 if (current_run == 0)
3319 writel(write_group, SDR_PHYGRP_SCCGRP_ADDRESS |
3320 SCC_MGR_GROUP_COUNTER_OFFSET);
3321 scc_mgr_zero_group(write_group, 0);
3323 for (read_group = write_group *
3324 RW_MGR_MEM_IF_READ_DQS_WIDTH /
3325 RW_MGR_MEM_IF_WRITE_DQS_WIDTH,
3327 read_group < (write_group + 1) *
3328 RW_MGR_MEM_IF_READ_DQS_WIDTH /
3329 RW_MGR_MEM_IF_WRITE_DQS_WIDTH &&
3331 read_group++, read_test_bgn +=
3332 RW_MGR_MEM_DQ_PER_READ_DQS) {
3333 /* Calibrate the VFIFO */
3334 if (!((STATIC_CALIB_STEPS) &
3335 CALIB_SKIP_VFIFO)) {
3336 if (!rw_mgr_mem_calibrate_vfifo
3342 phy_debug_mode_flags &
3343 PHY_DEBUG_SWEEP_ALL_GROUPS)) {
3350 /* Calibrate the output side */
3351 if (group_failed == 0) {
3352 for (rank_bgn = 0, sr = 0; rank_bgn
3353 < RW_MGR_MEM_NUMBER_OF_RANKS;
3355 NUM_RANKS_PER_SHADOW_REG,
3358 if (!((STATIC_CALIB_STEPS) &
3359 CALIB_SKIP_WRITES)) {
3360 if ((STATIC_CALIB_STEPS)
3361 & CALIB_SKIP_DELAY_SWEEPS) {
3362 /* not needed in quick mode! */
3365 * Determine if this set of
3366 * ranks should be skipped
3369 if (!param->skip_shadow_regs[sr]) {
3370 if (!rw_mgr_mem_calibrate_writes
3371 (rank_bgn, write_group,
3375 phy_debug_mode_flags &
3376 PHY_DEBUG_SWEEP_ALL_GROUPS)) {
3388 if (group_failed == 0) {
3389 for (read_group = write_group *
3390 RW_MGR_MEM_IF_READ_DQS_WIDTH /
3391 RW_MGR_MEM_IF_WRITE_DQS_WIDTH,
3393 read_group < (write_group + 1)
3394 * RW_MGR_MEM_IF_READ_DQS_WIDTH
3395 / RW_MGR_MEM_IF_WRITE_DQS_WIDTH &&
3397 read_group++, read_test_bgn +=
3398 RW_MGR_MEM_DQ_PER_READ_DQS) {
3399 if (!((STATIC_CALIB_STEPS) &
3400 CALIB_SKIP_WRITES)) {
3401 if (!rw_mgr_mem_calibrate_vfifo_end
3402 (read_group, read_test_bgn)) {
3405 if (!(gbl->phy_debug_mode_flags
3406 & PHY_DEBUG_SWEEP_ALL_GROUPS)) {
3414 if (group_failed != 0)
3419 * USER If there are any failing groups then report
3422 if (failing_groups != 0)
3425 /* Calibrate the LFIFO */
3426 if (!((STATIC_CALIB_STEPS) & CALIB_SKIP_LFIFO)) {
3428 * If we're skipping groups as part of debug,
3429 * don't calibrate LFIFO.
3431 if (param->skip_groups == 0) {
3432 if (!rw_mgr_mem_calibrate_lfifo())
3440 * Do not remove this line as it makes sure all of our decisions
3441 * have been applied.
3443 writel(0, &sdr_scc_mgr->update);
3448 * run_mem_calibrate() - Perform memory calibration
3450 * This function triggers the entire memory calibration procedure.
3452 static int run_mem_calibrate(void)
3456 debug("%s:%d\n", __func__, __LINE__);
3458 /* Reset pass/fail status shown on afi_cal_success/fail */
3459 writel(PHY_MGR_CAL_RESET, &phy_mgr_cfg->cal_status);
3461 /* Stop tracking manager. */
3462 clrbits_le32(&sdr_ctrl->ctrl_cfg, 1 << 22);
3464 phy_mgr_initialize();
3465 rw_mgr_mem_initialize();
3467 /* Perform the actual memory calibration. */
3468 pass = mem_calibrate();
3470 mem_precharge_and_activate();
3471 writel(0, &phy_mgr_cmd->fifo_reset);
3474 rw_mgr_mem_handoff();
3476 * In Hard PHY this is a 2-bit control:
3478 * 1: DDIO Mux Select
3480 writel(0x2, &phy_mgr_cfg->mux_sel);
3482 /* Start tracking manager. */
3483 setbits_le32(&sdr_ctrl->ctrl_cfg, 1 << 22);
3489 * debug_mem_calibrate() - Report result of memory calibration
3490 * @pass: Value indicating whether calibration passed or failed
3492 * This function reports the results of the memory calibration
3493 * and writes debug information into the register file.
3495 static void debug_mem_calibrate(int pass)
3497 uint32_t debug_info;
3500 printf("%s: CALIBRATION PASSED\n", __FILE__);
3505 if (gbl->fom_in > 0xff)
3508 if (gbl->fom_out > 0xff)
3509 gbl->fom_out = 0xff;
3511 /* Update the FOM in the register file */
3512 debug_info = gbl->fom_in;
3513 debug_info |= gbl->fom_out << 8;
3514 writel(debug_info, &sdr_reg_file->fom);
3516 writel(debug_info, &phy_mgr_cfg->cal_debug_info);
3517 writel(PHY_MGR_CAL_SUCCESS, &phy_mgr_cfg->cal_status);
3519 printf("%s: CALIBRATION FAILED\n", __FILE__);
3521 debug_info = gbl->error_stage;
3522 debug_info |= gbl->error_substage << 8;
3523 debug_info |= gbl->error_group << 16;
3525 writel(debug_info, &sdr_reg_file->failing_stage);
3526 writel(debug_info, &phy_mgr_cfg->cal_debug_info);
3527 writel(PHY_MGR_CAL_FAIL, &phy_mgr_cfg->cal_status);
3529 /* Update the failing group/stage in the register file */
3530 debug_info = gbl->error_stage;
3531 debug_info |= gbl->error_substage << 8;
3532 debug_info |= gbl->error_group << 16;
3533 writel(debug_info, &sdr_reg_file->failing_stage);
3536 printf("%s: Calibration complete\n", __FILE__);
3540 * hc_initialize_rom_data() - Initialize ROM data
3542 * Initialize ROM data.
3544 static void hc_initialize_rom_data(void)
3548 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_INST_ROM_WRITE_OFFSET;
3549 for (i = 0; i < ARRAY_SIZE(inst_rom_init); i++)
3550 writel(inst_rom_init[i], addr + (i << 2));
3552 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_AC_ROM_WRITE_OFFSET;
3553 for (i = 0; i < ARRAY_SIZE(ac_rom_init); i++)
3554 writel(ac_rom_init[i], addr + (i << 2));
3558 * initialize_reg_file() - Initialize SDR register file
3560 * Initialize SDR register file.
3562 static void initialize_reg_file(void)
3564 /* Initialize the register file with the correct data */
3565 writel(REG_FILE_INIT_SEQ_SIGNATURE, &sdr_reg_file->signature);
3566 writel(0, &sdr_reg_file->debug_data_addr);
3567 writel(0, &sdr_reg_file->cur_stage);
3568 writel(0, &sdr_reg_file->fom);
3569 writel(0, &sdr_reg_file->failing_stage);
3570 writel(0, &sdr_reg_file->debug1);
3571 writel(0, &sdr_reg_file->debug2);
3575 * initialize_hps_phy() - Initialize HPS PHY
3577 * Initialize HPS PHY.
3579 static void initialize_hps_phy(void)
3583 * Tracking also gets configured here because it's in the
3586 uint32_t trk_sample_count = 7500;
3587 uint32_t trk_long_idle_sample_count = (10 << 16) | 100;
3589 * Format is number of outer loops in the 16 MSB, sample
3594 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_ACDELAYEN_SET(2);
3595 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQDELAYEN_SET(1);
3596 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQSDELAYEN_SET(1);
3597 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQSLOGICDELAYEN_SET(1);
3598 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_RESETDELAYEN_SET(0);
3599 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_LPDDRDIS_SET(1);
3601 * This field selects the intrinsic latency to RDATA_EN/FULL path.
3602 * 00-bypass, 01- add 5 cycles, 10- add 10 cycles, 11- add 15 cycles.
3604 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_ADDLATSEL_SET(0);
3605 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_SAMPLECOUNT_19_0_SET(
3607 writel(reg, &sdr_ctrl->phy_ctrl0);
3610 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_SAMPLECOUNT_31_20_SET(
3612 SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_SAMPLECOUNT_19_0_WIDTH);
3613 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_LONGIDLESAMPLECOUNT_19_0_SET(
3614 trk_long_idle_sample_count);
3615 writel(reg, &sdr_ctrl->phy_ctrl1);
3618 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_2_LONGIDLESAMPLECOUNT_31_20_SET(
3619 trk_long_idle_sample_count >>
3620 SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_LONGIDLESAMPLECOUNT_19_0_WIDTH);
3621 writel(reg, &sdr_ctrl->phy_ctrl2);
3625 * initialize_tracking() - Initialize tracking
3627 * Initialize the register file with usable initial data.
3629 static void initialize_tracking(void)
3632 * Initialize the register file with the correct data.
3633 * Compute usable version of value in case we skip full
3634 * computation later.
3636 writel(DIV_ROUND_UP(IO_DELAY_PER_OPA_TAP, IO_DELAY_PER_DCHAIN_TAP) - 1,
3637 &sdr_reg_file->dtaps_per_ptap);
3639 /* trk_sample_count */
3640 writel(7500, &sdr_reg_file->trk_sample_count);
3642 /* longidle outer loop [15:0] */
3643 writel((10 << 16) | (100 << 0), &sdr_reg_file->trk_longidle);
3646 * longidle sample count [31:24]
3647 * trfc, worst case of 933Mhz 4Gb [23:16]
3648 * trcd, worst case [15:8]
3651 writel((243 << 24) | (14 << 16) | (10 << 8) | (4 << 0),
3652 &sdr_reg_file->delays);
3655 writel((RW_MGR_IDLE << 24) | (RW_MGR_ACTIVATE_1 << 16) |
3656 (RW_MGR_SGLE_READ << 8) | (RW_MGR_PRECHARGE_ALL << 0),
3657 &sdr_reg_file->trk_rw_mgr_addr);
3659 writel(RW_MGR_MEM_IF_READ_DQS_WIDTH,
3660 &sdr_reg_file->trk_read_dqs_width);
3663 writel((RW_MGR_REFRESH_ALL << 24) | (1000 << 0),
3664 &sdr_reg_file->trk_rfsh);
3667 int sdram_calibration_full(void)
3669 struct param_type my_param;
3670 struct gbl_type my_gbl;
3673 memset(&my_param, 0, sizeof(my_param));
3674 memset(&my_gbl, 0, sizeof(my_gbl));
3679 /* Set the calibration enabled by default */
3680 gbl->phy_debug_mode_flags |= PHY_DEBUG_ENABLE_CAL_RPT;
3682 * Only sweep all groups (regardless of fail state) by default
3683 * Set enabled read test by default.
3685 #if DISABLE_GUARANTEED_READ
3686 gbl->phy_debug_mode_flags |= PHY_DEBUG_DISABLE_GUARANTEED_READ;
3688 /* Initialize the register file */
3689 initialize_reg_file();
3691 /* Initialize any PHY CSR */
3692 initialize_hps_phy();
3694 scc_mgr_initialize();
3696 initialize_tracking();
3698 printf("%s: Preparing to start memory calibration\n", __FILE__);
3700 debug("%s:%d\n", __func__, __LINE__);
3701 debug_cond(DLEVEL == 1,
3702 "DDR3 FULL_RATE ranks=%u cs/dimm=%u dq/dqs=%u,%u vg/dqs=%u,%u ",
3703 RW_MGR_MEM_NUMBER_OF_RANKS, RW_MGR_MEM_NUMBER_OF_CS_PER_DIMM,
3704 RW_MGR_MEM_DQ_PER_READ_DQS, RW_MGR_MEM_DQ_PER_WRITE_DQS,
3705 RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS,
3706 RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS);
3707 debug_cond(DLEVEL == 1,
3708 "dqs=%u,%u dq=%u dm=%u ptap_delay=%u dtap_delay=%u ",
3709 RW_MGR_MEM_IF_READ_DQS_WIDTH, RW_MGR_MEM_IF_WRITE_DQS_WIDTH,
3710 RW_MGR_MEM_DATA_WIDTH, RW_MGR_MEM_DATA_MASK_WIDTH,
3711 IO_DELAY_PER_OPA_TAP, IO_DELAY_PER_DCHAIN_TAP);
3712 debug_cond(DLEVEL == 1, "dtap_dqsen_delay=%u, dll=%u",
3713 IO_DELAY_PER_DQS_EN_DCHAIN_TAP, IO_DLL_CHAIN_LENGTH);
3714 debug_cond(DLEVEL == 1, "max values: en_p=%u dqdqs_p=%u en_d=%u dqs_in_d=%u ",
3715 IO_DQS_EN_PHASE_MAX, IO_DQDQS_OUT_PHASE_MAX,
3716 IO_DQS_EN_DELAY_MAX, IO_DQS_IN_DELAY_MAX);
3717 debug_cond(DLEVEL == 1, "io_in_d=%u io_out1_d=%u io_out2_d=%u ",
3718 IO_IO_IN_DELAY_MAX, IO_IO_OUT1_DELAY_MAX,
3719 IO_IO_OUT2_DELAY_MAX);
3720 debug_cond(DLEVEL == 1, "dqs_in_reserve=%u dqs_out_reserve=%u\n",
3721 IO_DQS_IN_RESERVE, IO_DQS_OUT_RESERVE);
3723 hc_initialize_rom_data();
3725 /* update info for sims */
3726 reg_file_set_stage(CAL_STAGE_NIL);
3727 reg_file_set_group(0);
3730 * Load global needed for those actions that require
3731 * some dynamic calibration support.
3733 dyn_calib_steps = STATIC_CALIB_STEPS;
3735 * Load global to allow dynamic selection of delay loop settings
3736 * based on calibration mode.
3738 if (!(dyn_calib_steps & CALIB_SKIP_DELAY_LOOPS))
3739 skip_delay_mask = 0xff;
3741 skip_delay_mask = 0x0;
3743 pass = run_mem_calibrate();
3744 debug_mem_calibrate(pass);