Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update rknpu driver from airockchip/rknn-llm #9

Open
wants to merge 4 commits into
base: nanopi6-v6.1.y
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions arch/arm64/mm/cache.S
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ SYM_FUNC_START(__pi_dcache_inval_poc)
ret
SYM_FUNC_END(__pi_dcache_inval_poc)
SYM_FUNC_ALIAS(dcache_inval_poc, __pi_dcache_inval_poc)
EXPORT_SYMBOL(dcache_inval_poc)

/*
* dcache_clean_poc(start, end)
Expand All @@ -176,6 +177,7 @@ SYM_FUNC_START(__pi_dcache_clean_poc)
ret
SYM_FUNC_END(__pi_dcache_clean_poc)
SYM_FUNC_ALIAS(dcache_clean_poc, __pi_dcache_clean_poc)
EXPORT_SYMBOL(dcache_clean_poc)

/*
* dcache_clean_pop(start, end)
Expand Down
1 change: 1 addition & 0 deletions drivers/iommu/dma-iommu.c
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,7 @@ int iommu_get_dma_cookie(struct iommu_domain *domain)
mutex_init(&domain->iova_cookie->mutex);
return 0;
}
EXPORT_SYMBOL(iommu_get_dma_cookie);

/**
* iommu_get_msi_cookie - Acquire just MSI remapping resources
Expand Down
40 changes: 30 additions & 10 deletions drivers/rknpu/include/rknpu_drv.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <linux/completion.h>
#include <linux/device.h>
#include <linux/kref.h>
#include <linux/irq.h>
#include <linux/platform_device.h>
#include <linux/spinlock.h>
#include <linux/regulator/consumer.h>
Expand All @@ -28,10 +29,10 @@

#define DRIVER_NAME "rknpu"
#define DRIVER_DESC "RKNPU driver"
#define DRIVER_DATE "20231121"
#define DRIVER_DATE "20240828"
#define DRIVER_MAJOR 0
#define DRIVER_MINOR 9
#define DRIVER_PATCHLEVEL 3
#define DRIVER_PATCHLEVEL 8

#define LOG_TAG "RKNPU"

Expand All @@ -52,9 +53,19 @@
#define LOG_DEV_DEBUG(dev, fmt, args...) dev_dbg(dev, LOG_TAG ": " fmt, ##args)
#define LOG_DEV_ERROR(dev, fmt, args...) dev_err(dev, LOG_TAG ": " fmt, ##args)

struct rknpu_reset_data {
const char *srst_a_name;
const char *srst_h_name;
#define RKNPU_MAX_IOMMU_DOMAIN_NUM 16
#define RKNPU_CACHE_SG_TABLE_NUM 2

struct rknpu_irqs_data {
const char *name;
irqreturn_t (*irq_hdl)(int irq, void *ctx);
};

struct rknpu_amount_data {
uint16_t offset_clr_all;
uint16_t offset_dt_wr;
uint16_t offset_dt_rd;
uint16_t offset_wt_rd;
};

struct rknpu_config {
Expand All @@ -66,15 +77,16 @@ struct rknpu_config {
__u32 pc_task_number_mask;
__u32 pc_task_status_offset;
__u32 pc_dma_ctrl;
__u32 bw_enable;
const struct rknpu_irqs_data *irqs;
const struct rknpu_reset_data *resets;
int num_irqs;
int num_resets;
__u64 nbuf_phyaddr;
__u64 nbuf_size;
__u64 max_submit_number;
__u32 core_mask;
const struct rknpu_amount_data *amount_top;
const struct rknpu_amount_data *amount_core;
void (*state_init)(struct rknpu_device *rknpu_dev);
int (*cache_sgt_init)(struct rknpu_device *rknpu_dev);
};

struct rknpu_timer {
Expand Down Expand Up @@ -113,13 +125,14 @@ struct rknpu_device {
spinlock_t irq_lock;
struct mutex power_lock;
struct mutex reset_lock;
struct mutex domain_lock;
struct rknpu_subcore_data subcore_datas[RKNPU_MAX_CORES];
const struct rknpu_config *config;
void __iomem *bw_priority_base;
struct rknpu_fence_context *fence_ctx;
bool iommu_en;
struct reset_control *srst_a[RKNPU_MAX_CORES];
struct reset_control *srst_h[RKNPU_MAX_CORES];
struct reset_control **srsts;
int num_srsts;
struct clk_bulk_data *clks;
int num_clks;
struct regulator *vdd;
Expand Down Expand Up @@ -156,6 +169,12 @@ struct rknpu_device {
void __iomem *nbuf_base_io;
struct rknpu_mm *sram_mm;
unsigned long power_put_delay;
struct iommu_group *iommu_group;
int iommu_domain_num;
int iommu_domain_id;
struct iommu_domain *iommu_domains[RKNPU_MAX_IOMMU_DOMAIN_NUM];
struct sg_table *cache_sgt[RKNPU_CACHE_SG_TABLE_NUM];
atomic_t iommu_domain_refcount;
};

struct rknpu_session {
Expand All @@ -165,5 +184,6 @@ struct rknpu_session {

int rknpu_power_get(struct rknpu_device *rknpu_dev);
int rknpu_power_put(struct rknpu_device *rknpu_dev);
int rknpu_power_put_delay(struct rknpu_device *rknpu_dev);

#endif /* __LINUX_RKNPU_DRV_H_ */
11 changes: 7 additions & 4 deletions drivers/rknpu/include/rknpu_gem.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ struct rknpu_gem_object {
struct page **pages;
struct sg_table *sgt;
struct drm_mm_node mm_node;
int iommu_domain_id;
unsigned int core_mask;
unsigned int cache_with_sgt;
};

enum rknpu_cache_type {
Expand All @@ -68,10 +71,10 @@ enum rknpu_cache_type {
};

/* create a new buffer with gem object */
struct rknpu_gem_object *rknpu_gem_object_create(struct drm_device *dev,
unsigned int flags,
unsigned long size,
unsigned long sram_size);
struct rknpu_gem_object *
rknpu_gem_object_create(struct drm_device *dev, unsigned int flags,
unsigned long size, unsigned long sram_size,
int iommu_domain_id, unsigned int core_mask);

/* destroy a buffer with gem object */
void rknpu_gem_object_destroy(struct rknpu_gem_object *rknpu_obj);
Expand Down
50 changes: 28 additions & 22 deletions drivers/rknpu/include/rknpu_ioctl.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,6 @@
#define RKNPU_OFFSET_INT_STATUS 0x28
#define RKNPU_OFFSET_INT_RAW_STATUS 0x2c

#define RKNPU_OFFSET_CLR_ALL_RW_AMOUNT 0x8010
#define RKNPU_OFFSET_DT_WR_AMOUNT 0x8034
#define RKNPU_OFFSET_DT_RD_AMOUNT 0x8038
#define RKNPU_OFFSET_WT_RD_AMOUNT 0x803c

#define RKNPU_OFFSET_ENABLE_MASK 0xf008

#define RKNPU_INT_CLEAR 0x1ffff
Expand All @@ -44,10 +39,10 @@

#define RKNPU_STR_HELPER(x) #x

#define RKNPU_GET_DRV_VERSION_STRING(MAJOR, MINOR, PATCHLEVEL) \
RKNPU_STR_HELPER(MAJOR) \
#define RKNPU_GET_DRV_VERSION_STRING(MAJOR, MINOR, PATCHLEVEL) \
RKNPU_STR_HELPER(MAJOR) \
"." RKNPU_STR_HELPER(MINOR) "." RKNPU_STR_HELPER(PATCHLEVEL)
#define RKNPU_GET_DRV_VERSION_CODE(MAJOR, MINOR, PATCHLEVEL) \
#define RKNPU_GET_DRV_VERSION_CODE(MAJOR, MINOR, PATCHLEVEL) \
(MAJOR * 10000 + MINOR * 100 + PATCHLEVEL)
#define RKNPU_GET_DRV_VERSION_MAJOR(CODE) (CODE / 10000)
#define RKNPU_GET_DRV_VERSION_MINOR(CODE) ((CODE % 10000) / 100)
Expand All @@ -67,7 +62,7 @@ enum e_rknpu_mem_type {
RKNPU_MEM_WRITE_COMBINE = 1 << 2,
/* dma attr kernel mapping */
RKNPU_MEM_KERNEL_MAPPING = 1 << 3,
/* iommu mapping */
/* IOMMU mapping */
RKNPU_MEM_IOMMU = 1 << 4,
/* zero mapping */
RKNPU_MEM_ZEROING = 1 << 5,
Expand All @@ -79,19 +74,22 @@ enum e_rknpu_mem_type {
RKNPU_MEM_TRY_ALLOC_SRAM = 1 << 8,
/* request NBUF */
RKNPU_MEM_TRY_ALLOC_NBUF = 1 << 9,
/* IOMMU limiting IOVA alignment */
RKNPU_MEM_IOMMU_LIMIT_IOVA_ALIGNMENT = 1 << 10,
RKNPU_MEM_MASK = RKNPU_MEM_NON_CONTIGUOUS | RKNPU_MEM_CACHEABLE |
RKNPU_MEM_WRITE_COMBINE | RKNPU_MEM_KERNEL_MAPPING |
RKNPU_MEM_IOMMU | RKNPU_MEM_ZEROING |
RKNPU_MEM_SECURE | RKNPU_MEM_DMA32 |
RKNPU_MEM_TRY_ALLOC_SRAM | RKNPU_MEM_TRY_ALLOC_NBUF
RKNPU_MEM_TRY_ALLOC_SRAM | RKNPU_MEM_TRY_ALLOC_NBUF |
RKNPU_MEM_IOMMU_LIMIT_IOVA_ALIGNMENT
};

/* sync mode definitions. */
enum e_rknpu_mem_sync_mode {
RKNPU_MEM_SYNC_TO_DEVICE = 1 << 0,
RKNPU_MEM_SYNC_FROM_DEVICE = 1 << 1,
RKNPU_MEM_SYNC_MASK =
RKNPU_MEM_SYNC_TO_DEVICE | RKNPU_MEM_SYNC_FROM_DEVICE
RKNPU_MEM_SYNC_MASK = RKNPU_MEM_SYNC_TO_DEVICE |
RKNPU_MEM_SYNC_FROM_DEVICE
};

/* job mode definitions. */
Expand Down Expand Up @@ -134,6 +132,8 @@ enum e_rknpu_action {
RKNPU_POWER_OFF = 21,
RKNPU_GET_TOTAL_SRAM_SIZE = 22,
RKNPU_GET_FREE_SRAM_SIZE = 23,
RKNPU_GET_IOMMU_DOMAIN_ID = 24,
RKNPU_SET_IOMMU_DOMAIN_ID = 25,
};

/**
Expand All @@ -147,6 +147,8 @@ enum e_rknpu_action {
* @dma_addr: dma address that access by rknpu.
* @sram_size: user-desired sram memory allocation size.
* - this size value would be page-aligned internally.
* @iommu_domain_id: iommu domain id
* @reserved: just padding to be 64-bit aligned.
*/
struct rknpu_mem_create {
__u32 handle;
Expand All @@ -155,6 +157,8 @@ struct rknpu_mem_create {
__u64 obj_addr;
__u64 dma_addr;
__u64 sram_size;
__s32 iommu_domain_id;
__u32 core_mask;
};

/**
Expand Down Expand Up @@ -249,7 +253,8 @@ struct rknpu_subcore_task {
* @task_counter: task counter
* @priority: submit priority
* @task_obj_addr: address of task object
* @regcfg_obj_addr: address of register config object
* @iommu_domain_id: iommu domain id
* @reserved: just padding to be 64-bit aligned.
* @task_base_addr: task base address
* @hw_elapse_time: hardware elapse time
* @core_mask: core mask of rknpu
Expand All @@ -265,7 +270,8 @@ struct rknpu_submit {
__u32 task_counter;
__s32 priority;
__u64 task_obj_addr;
__u64 regcfg_obj_addr;
__u32 iommu_domain_id;
__u32 reserved;
__u64 task_base_addr;
__s64 hw_elapse_time;
__u32 core_mask;
Expand Down Expand Up @@ -299,25 +305,25 @@ struct rknpu_action {

#include <drm/drm.h>

#define DRM_IOCTL_RKNPU_ACTION \
#define DRM_IOCTL_RKNPU_ACTION \
DRM_IOWR(DRM_COMMAND_BASE + RKNPU_ACTION, struct rknpu_action)
#define DRM_IOCTL_RKNPU_SUBMIT \
#define DRM_IOCTL_RKNPU_SUBMIT \
DRM_IOWR(DRM_COMMAND_BASE + RKNPU_SUBMIT, struct rknpu_submit)
#define DRM_IOCTL_RKNPU_MEM_CREATE \
#define DRM_IOCTL_RKNPU_MEM_CREATE \
DRM_IOWR(DRM_COMMAND_BASE + RKNPU_MEM_CREATE, struct rknpu_mem_create)
#define DRM_IOCTL_RKNPU_MEM_MAP \
#define DRM_IOCTL_RKNPU_MEM_MAP \
DRM_IOWR(DRM_COMMAND_BASE + RKNPU_MEM_MAP, struct rknpu_mem_map)
#define DRM_IOCTL_RKNPU_MEM_DESTROY \
#define DRM_IOCTL_RKNPU_MEM_DESTROY \
DRM_IOWR(DRM_COMMAND_BASE + RKNPU_MEM_DESTROY, struct rknpu_mem_destroy)
#define DRM_IOCTL_RKNPU_MEM_SYNC \
#define DRM_IOCTL_RKNPU_MEM_SYNC \
DRM_IOWR(DRM_COMMAND_BASE + RKNPU_MEM_SYNC, struct rknpu_mem_sync)

#define IOCTL_RKNPU_ACTION RKNPU_IOWR(RKNPU_ACTION, struct rknpu_action)
#define IOCTL_RKNPU_SUBMIT RKNPU_IOWR(RKNPU_SUBMIT, struct rknpu_submit)
#define IOCTL_RKNPU_MEM_CREATE \
#define IOCTL_RKNPU_MEM_CREATE \
RKNPU_IOWR(RKNPU_MEM_CREATE, struct rknpu_mem_create)
#define IOCTL_RKNPU_MEM_MAP RKNPU_IOWR(RKNPU_MEM_MAP, struct rknpu_mem_map)
#define IOCTL_RKNPU_MEM_DESTROY \
#define IOCTL_RKNPU_MEM_DESTROY \
RKNPU_IOWR(RKNPU_MEM_DESTROY, struct rknpu_mem_destroy)
#define IOCTL_RKNPU_MEM_SYNC RKNPU_IOWR(RKNPU_MEM_SYNC, struct rknpu_mem_sync)

Expand Down
24 changes: 22 additions & 2 deletions drivers/rknpu/include/rknpu_iommu.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,29 @@ struct rknpu_iommu_dma_cookie {
};

dma_addr_t rknpu_iommu_dma_alloc_iova(struct iommu_domain *domain, size_t size,
u64 dma_limit, struct device *dev);
u64 dma_limit, struct device *dev,
bool size_aligned);

void rknpu_iommu_dma_free_iova(struct rknpu_iommu_dma_cookie *cookie,
dma_addr_t iova, size_t size);
dma_addr_t iova, size_t size, bool size_aligned);

int rknpu_iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
int nents, enum dma_data_direction dir,
bool iova_aligned);

void rknpu_iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
int nents, enum dma_data_direction dir,
bool iova_aligned);

int rknpu_iommu_init_domain(struct rknpu_device *rknpu_dev);
int rknpu_iommu_switch_domain(struct rknpu_device *rknpu_dev, int domain_id);
void rknpu_iommu_free_domains(struct rknpu_device *rknpu_dev);
int rknpu_iommu_domain_get_and_switch(struct rknpu_device *rknpu_dev,
int domain_id);
int rknpu_iommu_domain_put(struct rknpu_device *rknpu_dev);

#if KERNEL_VERSION(5, 10, 0) < LINUX_VERSION_CODE
int iommu_get_dma_cookie(struct iommu_domain *domain);
#endif

#endif
1 change: 1 addition & 0 deletions drivers/rknpu/include/rknpu_job.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ struct rknpu_job {
ktime_t hw_recoder_time;
ktime_t hw_elapse_time;
atomic_t submit_count[RKNPU_MAX_CORES];
int iommu_domain_id;
};

irqreturn_t rknpu_core0_irq_handler(int irq, void *data);
Expand Down
8 changes: 4 additions & 4 deletions drivers/rknpu/include/rknpu_mem.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@ struct rknpu_mem_object {
unsigned int owner;
};

int rknpu_mem_create_ioctl(struct rknpu_device *rknpu_dev, unsigned long data,
struct file *file);
int rknpu_mem_destroy_ioctl(struct rknpu_device *rknpu_dev, unsigned long data,
struct file *file);
int rknpu_mem_create_ioctl(struct rknpu_device *rknpu_dev, struct file *file,
unsigned int cmd, unsigned long data);
int rknpu_mem_destroy_ioctl(struct rknpu_device *rknpu_dev, struct file *file,
unsigned long data);
int rknpu_mem_sync_ioctl(struct rknpu_device *rknpu_dev, unsigned long data);

#endif
15 changes: 13 additions & 2 deletions drivers/rknpu/rknpu_devfreq.c
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,6 @@ static int rk3588_npu_set_read_margin(struct device *dev,
struct rockchip_opp_info *opp_info,
u32 rm)
{
struct rknpu_device *rknpu_dev = dev_get_drvdata(dev);
u32 offset = 0, val = 0;
int i, ret = 0;

Expand All @@ -174,7 +173,7 @@ static int rk3588_npu_set_read_margin(struct device *dev,

LOG_DEV_DEBUG(dev, "set rm to %d\n", rm);

for (i = 0; i < rknpu_dev->config->num_irqs; i++) {
for (i = 0; i < 3; i++) {
ret = regmap_read(opp_info->grf, offset, &val);
if (ret < 0) {
LOG_DEV_ERROR(dev, "failed to get rm from 0x%x\n",
Expand Down Expand Up @@ -365,7 +364,11 @@ int rknpu_devfreq_init(struct rknpu_device *rknpu_dev)
err_remove_governor:
devfreq_remove_governor(&devfreq_rknpu_ondemand);
err_uinit_table:
#if KERNEL_VERSION(5, 10, 198) <= LINUX_VERSION_CODE
rockchip_uninit_opp_table(dev, info);
#else
dev_pm_opp_of_remove_table(dev);
#endif

return ret;
}
Expand Down Expand Up @@ -699,7 +702,11 @@ int rknpu_devfreq_init(struct rknpu_device *rknpu_dev)
err_remove_governor:
devfreq_remove_governor(&devfreq_rknpu_ondemand);
err_remove_table:
#if KERNEL_VERSION(5, 10, 198) <= LINUX_VERSION_CODE
rockchip_uninit_opp_table(dev, &rknpu_dev->opp_info);
#else
dev_pm_opp_of_remove_table(dev);
#endif

rknpu_dev->devfreq = NULL;

Expand Down Expand Up @@ -760,6 +767,10 @@ void rknpu_devfreq_remove(struct rknpu_device *rknpu_dev)
}
if (rknpu_dev->devfreq)
devfreq_remove_governor(&devfreq_rknpu_ondemand);
#if KERNEL_VERSION(5, 10, 198) <= LINUX_VERSION_CODE
rockchip_uninit_opp_table(rknpu_dev->dev, &rknpu_dev->opp_info);
#else
dev_pm_opp_of_remove_table(rknpu_dev->dev);
#endif
}
EXPORT_SYMBOL(rknpu_devfreq_remove);
Loading