spi: microchip-core-qspi: Add regular transfers

Merge series from Conor Dooley <conor@kernel.org>: This is a v2 of a patchset I sent about this time last year, adding the regular transfer_one_message op to the microchip-core-qspi driver. In that v1 Mark expressed his dislike for that op, so v2 is using prepare/unprepare/transfer_one instead. The unprepare implementation still contains the 750 us delay that the driver had back in v1. I've heard a suggestion internally as to why this is needed, but it was unsubstantiated, so I still have no justification for it. I held off on sending a v2 because of a lack of explanation for the delay, but I don't wanna hold off forever for something I might never understand.
author: Mark Brown <broonie@kernel.org> 2025-06-23 23:30:49 +0100
committer: Mark Brown <broonie@kernel.org> 2025-06-23 23:30:49 +0100
commit: 1256eb42db5d1635f4c6da5b1b58db0b53320883 (patch)
tree: 4a06ef5a4ed8e2f67ead59ad4257a7e6524ed8b1 /drivers/spi
parent: 5fc2c383125c2b4b6037e02ad8796b776b25e6d0 (diff)
parent: 8f9cf02c8852837923f1cdacfcc92e138513325c (diff)
3 files changed, 205 insertions, 37 deletions
diff --git a/drivers/spi/spi-microchip-core-qspi.c b/drivers/spi/spi-microchip-core-qspi.c
index fa828fcaaef2..d13a9b755c7f 100644
--- a/drivers/spi/spi-microchip-core-qspi.c
+++ b/drivers/spi/spi-microchip-core-qspi.c
@@ -194,7 +194,7 @@ static inline void mchp_coreqspi_read_op(struct mchp_coreqspi *qspi)
 	}
 }
 
-static inline void mchp_coreqspi_write_op(struct mchp_coreqspi *qspi, bool word)
+static inline void mchp_coreqspi_write_op(struct mchp_coreqspi *qspi)
 {
 	u32 control, data;
 
@@ -222,6 +222,87 @@ static inline void mchp_coreqspi_write_op(struct mchp_coreqspi *qspi, bool word)
 	}
 }
 
+static inline void mchp_coreqspi_write_read_op(struct mchp_coreqspi *qspi)
+{
+	u32 control, data;
+
+	qspi->rx_len = qspi->tx_len;
+
+	control = readl_relaxed(qspi->regs + REG_CONTROL);
+	control |= CONTROL_FLAGSX4;
+	writel_relaxed(control, qspi->regs + REG_CONTROL);
+
+	while (qspi->tx_len >= 4) {
+		while (readl_relaxed(qspi->regs + REG_STATUS) & STATUS_TXFIFOFULL)
+			;
+
+		data = qspi->txbuf ? *((u32 *)qspi->txbuf) : 0xaa;
+		if (qspi->txbuf)
+			qspi->txbuf += 4;
+		qspi->tx_len -= 4;
+		writel_relaxed(data, qspi->regs + REG_X4_TX_DATA);
+
+		/*
+		 * The rx FIFO is twice the size of the tx FIFO, so there is
+		 * no requirement to block transmission if receive data is not
+		 * ready, and it is fine to let the tx FIFO completely fill
+		 * without reading anything from the rx FIFO. Once the tx FIFO
+		 * has been filled and becomes non-full due to a transmission
+		 * occurring there will always be something to receive.
+		 * IOW, this is safe as TX_FIFO_SIZE + 4 < 2 * TX_FIFO_SIZE
+		 */
+		if (qspi->rx_len >= 4) {
+			if (readl_relaxed(qspi->regs + REG_STATUS) & STATUS_RXAVAILABLE) {
+				data = readl_relaxed(qspi->regs + REG_X4_RX_DATA);
+				*(u32 *)qspi->rxbuf = data;
+				qspi->rxbuf += 4;
+				qspi->rx_len -= 4;
+			}
+		}
+	}
+
+	/*
+	 * Since transmission is not being blocked by clearing the rx FIFO,
+	 * loop here until all received data "leaked" by the loop above has
+	 * been dealt with.
+	 */
+	while (qspi->rx_len >= 4) {
+		while (readl_relaxed(qspi->regs + REG_STATUS) & STATUS_RXFIFOEMPTY)
+			;
+		data = readl_relaxed(qspi->regs + REG_X4_RX_DATA);
+		*(u32 *)qspi->rxbuf = data;
+		qspi->rxbuf += 4;
+		qspi->rx_len -= 4;
+	}
+
+	/*
+	 * Since rx_len and tx_len must be < 4 bytes at this point, there's no
+	 * concern about overflowing the rx or tx FIFOs any longer. It's
+	 * therefore safe to loop over the remainder of the transmit data before
+	 * handling the remaining receive data.
+	 */
+	if (!qspi->tx_len)
+		return;
+
+	control &= ~CONTROL_FLAGSX4;
+	writel_relaxed(control, qspi->regs + REG_CONTROL);
+
+	while (qspi->tx_len--) {
+		while (readl_relaxed(qspi->regs + REG_STATUS) & STATUS_TXFIFOFULL)
+			;
+		data = qspi->txbuf ? *qspi->txbuf : 0xaa;
+		qspi->txbuf++;
+		writel_relaxed(data, qspi->regs + REG_TX_DATA);
+	}
+
+	while (qspi->rx_len--) {
+		while (readl_relaxed(qspi->regs + REG_STATUS) & STATUS_RXFIFOEMPTY)
+			;
+		data = readl_relaxed(qspi->regs + REG_RX_DATA);
+		*qspi->rxbuf++ = (data & 0xFF);
+	}
+}
+
 static void mchp_coreqspi_enable_ints(struct mchp_coreqspi *qspi)
 {
 	u32 mask = IEN_TXDONE |
@@ -266,7 +347,7 @@ static irqreturn_t mchp_coreqspi_isr(int irq, void *dev_id)
 }
 
 static int mchp_coreqspi_setup_clock(struct mchp_coreqspi *qspi, struct spi_device *spi,
-				     const struct spi_mem_op *op)
+				     u32 max_freq)
 {
 	unsigned long clk_hz;
 	u32 control, baud_rate_val = 0;
@@ -275,11 +356,11 @@ static int mchp_coreqspi_setup_clock(struct mchp_coreqspi *qspi, struct spi_devi
 	if (!clk_hz)
 		return -EINVAL;
 
-	baud_rate_val = DIV_ROUND_UP(clk_hz, 2 * op->max_freq);
+	baud_rate_val = DIV_ROUND_UP(clk_hz, 2 * max_freq);
 	if (baud_rate_val > MAX_DIVIDER || baud_rate_val < MIN_DIVIDER) {
 		dev_err(&spi->dev,
 			"could not configure the clock for spi clock %d Hz & system clock %ld Hz\n",
-			op->max_freq, clk_hz);
+			max_freq, clk_hz);
 		return -EINVAL;
 	}
 
@@ -367,23 +448,13 @@ static inline void mchp_coreqspi_config_op(struct mchp_coreqspi *qspi, const str
 	writel_relaxed(frames, qspi->regs + REG_FRAMES);
 }
 
-static int mchp_qspi_wait_for_ready(struct spi_mem *mem)
+static int mchp_coreqspi_wait_for_ready(struct mchp_coreqspi *qspi)
 {
-	struct mchp_coreqspi *qspi = spi_controller_get_devdata
-				    (mem->spi->controller);
 	u32 status;
-	int ret;
 
-	ret = readl_poll_timeout(qspi->regs + REG_STATUS, status,
+	return readl_poll_timeout(qspi->regs + REG_STATUS, status,
 				 (status & STATUS_READY), 0,
 				 TIMEOUT_MS);
-	if (ret) {
-		dev_err(&mem->spi->dev,
-			"Timeout waiting on QSPI ready.\n");
-		return -ETIMEDOUT;
-	}
-
-	return ret;
 }
 
 static int mchp_coreqspi_exec_op(struct spi_mem *mem, const struct spi_mem_op *op)
@@ -396,11 +467,13 @@ static int mchp_coreqspi_exec_op(struct spi_mem *mem, const struct spi_mem_op *o
 	int err, i;
 
 	mutex_lock(&qspi->op_lock);
-	err = mchp_qspi_wait_for_ready(mem);
-	if (err)
+	err = mchp_coreqspi_wait_for_ready(qspi);
+	if (err) {
+		dev_err(&mem->spi->dev, "Timeout waiting on QSPI ready.\n");
 		goto error;
+	}
 
-	err = mchp_coreqspi_setup_clock(qspi, mem->spi, op);
+	err = mchp_coreqspi_setup_clock(qspi, mem->spi, op->max_freq);
 	if (err)
 		goto error;
 
@@ -415,7 +488,7 @@ static int mchp_coreqspi_exec_op(struct spi_mem *mem, const struct spi_mem_op *o
 		qspi->rxbuf = NULL;
 		qspi->tx_len = op->cmd.nbytes;
 		qspi->rx_len = 0;
-		mchp_coreqspi_write_op(qspi, false);
+		mchp_coreqspi_write_op(qspi);
 	}
 
 	qspi->txbuf = &opaddr[0];
@@ -426,7 +499,7 @@ static int mchp_coreqspi_exec_op(struct spi_mem *mem, const struct spi_mem_op *o
 		qspi->rxbuf = NULL;
 		qspi->tx_len = op->addr.nbytes;
 		qspi->rx_len = 0;
-		mchp_coreqspi_write_op(qspi, false);
+		mchp_coreqspi_write_op(qspi);
 	}
 
 	if (op->data.nbytes) {
@@ -435,7 +508,7 @@ static int mchp_coreqspi_exec_op(struct spi_mem *mem, const struct spi_mem_op *o
 			qspi->rxbuf = NULL;
 			qspi->rx_len = 0;
 			qspi->tx_len = op->data.nbytes;
-			mchp_coreqspi_write_op(qspi, true);
+			mchp_coreqspi_write_op(qspi);
 		} else {
 			qspi->txbuf = NULL;
 			qspi->rxbuf = (u8 *)op->data.buf.in;
@@ -515,6 +588,109 @@ static const struct spi_controller_mem_caps mchp_coreqspi_mem_caps = {
 	.per_op_freq = true,
 };
 
+static int mchp_coreqspi_unprepare_message(struct spi_controller *ctlr, struct spi_message *m)
+{
+	struct mchp_coreqspi *qspi = spi_controller_get_devdata(ctlr);
+
+	/*
+	 * This delay is required for the driver to function correctly,
+	 * but no explanation has been determined for why it is required.
+	 */
+	udelay(750);
+
+	mutex_unlock(&qspi->op_lock);
+
+	return 0;
+}
+
+static int mchp_coreqspi_prepare_message(struct spi_controller *ctlr, struct spi_message *m)
+{
+	struct mchp_coreqspi *qspi = spi_controller_get_devdata(ctlr);
+	struct spi_transfer *t = NULL;
+	u32 control, frames;
+	u32 total_bytes = 0, cmd_bytes = 0, idle_cycles = 0;
+	int ret;
+	bool quad = false, dual = false;
+
+	mutex_lock(&qspi->op_lock);
+	ret = mchp_coreqspi_wait_for_ready(qspi);
+	if (ret) {
+		mutex_unlock(&qspi->op_lock);
+		dev_err(&ctlr->dev, "Timeout waiting on QSPI ready.\n");
+		return ret;
+	}
+
+	ret = mchp_coreqspi_setup_clock(qspi, m->spi, m->spi->max_speed_hz);
+	if (ret) {
+		mutex_unlock(&qspi->op_lock);
+		return ret;
+	}
+
+	control = readl_relaxed(qspi->regs + REG_CONTROL);
+	control &= ~(CONTROL_MODE12_MASK | CONTROL_MODE0);
+	writel_relaxed(control, qspi->regs + REG_CONTROL);
+
+	reinit_completion(&qspi->data_completion);
+
+	list_for_each_entry(t, &m->transfers, transfer_list) {
+		total_bytes += t->len;
+		if (!cmd_bytes && !(t->tx_buf && t->rx_buf))
+			cmd_bytes = t->len;
+		if (!t->rx_buf)
+			cmd_bytes = total_bytes;
+		if (t->tx_nbits == SPI_NBITS_QUAD || t->rx_nbits == SPI_NBITS_QUAD)
+			quad = true;
+		else if (t->tx_nbits == SPI_NBITS_DUAL || t->rx_nbits == SPI_NBITS_DUAL)
+			dual = true;
+	}
+
+	control = readl_relaxed(qspi->regs + REG_CONTROL);
+	if (quad) {
+		control |= (CONTROL_MODE0 | CONTROL_MODE12_EX_RW);
+	} else if (dual) {
+		control &= ~CONTROL_MODE0;
+		control |= CONTROL_MODE12_FULL;
+	} else {
+		control &= ~(CONTROL_MODE12_MASK | CONTROL_MODE0);
+	}
+	writel_relaxed(control, qspi->regs + REG_CONTROL);
+
+	frames = total_bytes & BYTESUPPER_MASK;
+	writel_relaxed(frames, qspi->regs + REG_FRAMESUP);
+	frames = total_bytes & BYTESLOWER_MASK;
+	frames |= cmd_bytes << FRAMES_CMDBYTES_SHIFT;
+	frames |= idle_cycles << FRAMES_IDLE_SHIFT;
+	control = readl_relaxed(qspi->regs + REG_CONTROL);
+	if (control & CONTROL_MODE12_MASK)
+		frames |= (1 << FRAMES_SHIFT);
+
+	frames |= FRAMES_FLAGWORD;
+	writel_relaxed(frames, qspi->regs + REG_FRAMES);
+
+	return 0;
+};
+
+static int mchp_coreqspi_transfer_one(struct spi_controller *ctlr, struct spi_device *spi,
+				      struct spi_transfer *t)
+{
+	struct mchp_coreqspi *qspi = spi_controller_get_devdata(ctlr);
+
+	qspi->tx_len = t->len;
+
+	if (t->tx_buf)
+		qspi->txbuf = (u8 *)t->tx_buf;
+
+	if (!t->rx_buf) {
+		mchp_coreqspi_write_op(qspi);
+	} else {
+		qspi->rxbuf = (u8 *)t->rx_buf;
+		qspi->rx_len = t->len;
+		mchp_coreqspi_write_read_op(qspi);
+	}
+
+	return 0;
+}
+
 static int mchp_coreqspi_probe(struct platform_device *pdev)
 {
 	struct spi_controller *ctlr;
@@ -562,6 +738,12 @@ static int mchp_coreqspi_probe(struct platform_device *pdev)
 	ctlr->mode_bits = SPI_CPOL | SPI_CPHA | SPI_RX_DUAL | SPI_RX_QUAD |
 			  SPI_TX_DUAL | SPI_TX_QUAD;
 	ctlr->dev.of_node = np;
+	ctlr->min_speed_hz = clk_get_rate(qspi->clk) / 30;
+	ctlr->prepare_message = mchp_coreqspi_prepare_message;
+	ctlr->unprepare_message = mchp_coreqspi_unprepare_message;
+	ctlr->transfer_one = mchp_coreqspi_transfer_one;
+	ctlr->num_chipselect = 2;
+	ctlr->use_gpio_descriptors = true;
 
 	ret = devm_spi_register_controller(&pdev->dev, ctlr);
 	if (ret)
diff --git a/drivers/spi/spi-pci1xxxx.c b/drivers/spi/spi-pci1xxxx.c
index 209a074a18a6..a6c8bf228288 100644
--- a/drivers/spi/spi-pci1xxxx.c
+++ b/drivers/spi/spi-pci1xxxx.c
@@ -765,7 +765,7 @@ static int pci1xxxx_spi_probe(struct pci_dev *pdev, const struct pci_device_id *
 				return -EINVAL;
 
 			num_vector = pci_alloc_irq_vectors(pdev, 1, hw_inst_cnt,
-							   PCI_IRQ_ALL_TYPES);
+							   PCI_IRQ_INTX | PCI_IRQ_MSI);
 			if (num_vector < 0) {
 				dev_err(&pdev->dev, "Error allocating MSI vectors\n");
 				return num_vector;
diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c
index 3581757a269b..3be7499db21e 100644
--- a/drivers/spi/spi-tegra210-quad.c
+++ b/drivers/spi/spi-tegra210-quad.c
@@ -407,9 +407,6 @@ tegra_qspi_read_rx_fifo_to_client_rxbuf(struct tegra_qspi *tqspi, struct spi_tra
 static void
 tegra_qspi_copy_client_txbuf_to_qspi_txbuf(struct tegra_qspi *tqspi, struct spi_transfer *t)
 {
-	dma_sync_single_for_cpu(tqspi->dev, tqspi->tx_dma_phys,
-				tqspi->dma_buf_size, DMA_TO_DEVICE);
-
 	/*
 	 * In packed mode, each word in FIFO may contain multiple packets
 	 * based on bits per word. So all bytes in each FIFO word are valid.
@@ -442,17 +439,11 @@ tegra_qspi_copy_client_txbuf_to_qspi_txbuf(struct tegra_qspi *tqspi, struct spi_
 
 		tqspi->cur_tx_pos += write_bytes;
 	}
-
-	dma_sync_single_for_device(tqspi->dev, tqspi->tx_dma_phys,
-				   tqspi->dma_buf_size, DMA_TO_DEVICE);
 }
 
 static void
 tegra_qspi_copy_qspi_rxbuf_to_client_rxbuf(struct tegra_qspi *tqspi, struct spi_transfer *t)
 {
-	dma_sync_single_for_cpu(tqspi->dev, tqspi->rx_dma_phys,
-				tqspi->dma_buf_size, DMA_FROM_DEVICE);
-
 	if (tqspi->is_packed) {
 		tqspi->cur_rx_pos += tqspi->curr_dma_words * tqspi->bytes_per_word;
 	} else {
@@ -478,9 +469,6 @@ tegra_qspi_copy_qspi_rxbuf_to_client_rxbuf(struct tegra_qspi *tqspi, struct spi_
 
 		tqspi->cur_rx_pos += read_bytes;
 	}
-
-	dma_sync_single_for_device(tqspi->dev, tqspi->rx_dma_phys,
-				   tqspi->dma_buf_size, DMA_FROM_DEVICE);
 }
 
 static void tegra_qspi_dma_complete(void *args)
@@ -701,8 +689,6 @@ static int tegra_qspi_start_dma_based_transfer(struct tegra_qspi *tqspi, struct
 				return ret;
 			}
 
-			dma_sync_single_for_device(tqspi->dev, tqspi->rx_dma_phys,
-						   tqspi->dma_buf_size, DMA_FROM_DEVICE);
 			ret = tegra_qspi_start_rx_dma(tqspi, t, len);
 			if (ret < 0) {
 				dev_err(tqspi->dev, "failed to start RX DMA: %d\n", ret);
author	Mark Brown <broonie@kernel.org>	2025-06-23 23:30:49 +0100
committer	Mark Brown <broonie@kernel.org>	2025-06-23 23:30:49 +0100
commit	1256eb42db5d1635f4c6da5b1b58db0b53320883 (patch)
tree	4a06ef5a4ed8e2f67ead59ad4257a7e6524ed8b1 /drivers/spi
parent	5fc2c383125c2b4b6037e02ad8796b776b25e6d0 (diff)
parent	8f9cf02c8852837923f1cdacfcc92e138513325c (diff)