diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 081058d4e4366..05850c173c54d 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -161,6 +161,7 @@ SYM_FUNC_START(__pi_dcache_inval_poc) ret SYM_FUNC_END(__pi_dcache_inval_poc) SYM_FUNC_ALIAS(dcache_inval_poc, __pi_dcache_inval_poc) +EXPORT_SYMBOL(dcache_inval_poc) /* * dcache_clean_poc(start, end) @@ -176,6 +177,7 @@ SYM_FUNC_START(__pi_dcache_clean_poc) ret SYM_FUNC_END(__pi_dcache_clean_poc) SYM_FUNC_ALIAS(dcache_clean_poc, __pi_dcache_clean_poc) +EXPORT_SYMBOL(dcache_clean_poc) /* * dcache_clean_pop(start, end) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index cb0dc831834c9..61e9312c5b37d 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -318,6 +318,7 @@ int iommu_get_dma_cookie(struct iommu_domain *domain) mutex_init(&domain->iova_cookie->mutex); return 0; } +EXPORT_SYMBOL(iommu_get_dma_cookie); /** * iommu_get_msi_cookie - Acquire just MSI remapping resources diff --git a/drivers/rknpu/include/rknpu_drv.h b/drivers/rknpu/include/rknpu_drv.h index 98fba97c4af2e..90e66d68b727d 100644 --- a/drivers/rknpu/include/rknpu_drv.h +++ b/drivers/rknpu/include/rknpu_drv.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -28,10 +29,10 @@ #define DRIVER_NAME "rknpu" #define DRIVER_DESC "RKNPU driver" -#define DRIVER_DATE "20231121" +#define DRIVER_DATE "20240828" #define DRIVER_MAJOR 0 #define DRIVER_MINOR 9 -#define DRIVER_PATCHLEVEL 3 +#define DRIVER_PATCHLEVEL 8 #define LOG_TAG "RKNPU" @@ -52,9 +53,19 @@ #define LOG_DEV_DEBUG(dev, fmt, args...) dev_dbg(dev, LOG_TAG ": " fmt, ##args) #define LOG_DEV_ERROR(dev, fmt, args...) dev_err(dev, LOG_TAG ": " fmt, ##args) -struct rknpu_reset_data { - const char *srst_a_name; - const char *srst_h_name; +#define RKNPU_MAX_IOMMU_DOMAIN_NUM 16 +#define RKNPU_CACHE_SG_TABLE_NUM 2 + +struct rknpu_irqs_data { + const char *name; + irqreturn_t (*irq_hdl)(int irq, void *ctx); +}; + +struct rknpu_amount_data { + uint16_t offset_clr_all; + uint16_t offset_dt_wr; + uint16_t offset_dt_rd; + uint16_t offset_wt_rd; }; struct rknpu_config { @@ -66,15 +77,16 @@ struct rknpu_config { __u32 pc_task_number_mask; __u32 pc_task_status_offset; __u32 pc_dma_ctrl; - __u32 bw_enable; const struct rknpu_irqs_data *irqs; - const struct rknpu_reset_data *resets; int num_irqs; - int num_resets; __u64 nbuf_phyaddr; __u64 nbuf_size; __u64 max_submit_number; __u32 core_mask; + const struct rknpu_amount_data *amount_top; + const struct rknpu_amount_data *amount_core; + void (*state_init)(struct rknpu_device *rknpu_dev); + int (*cache_sgt_init)(struct rknpu_device *rknpu_dev); }; struct rknpu_timer { @@ -113,13 +125,14 @@ struct rknpu_device { spinlock_t irq_lock; struct mutex power_lock; struct mutex reset_lock; + struct mutex domain_lock; struct rknpu_subcore_data subcore_datas[RKNPU_MAX_CORES]; const struct rknpu_config *config; void __iomem *bw_priority_base; struct rknpu_fence_context *fence_ctx; bool iommu_en; - struct reset_control *srst_a[RKNPU_MAX_CORES]; - struct reset_control *srst_h[RKNPU_MAX_CORES]; + struct reset_control **srsts; + int num_srsts; struct clk_bulk_data *clks; int num_clks; struct regulator *vdd; @@ -156,6 +169,12 @@ struct rknpu_device { void __iomem *nbuf_base_io; struct rknpu_mm *sram_mm; unsigned long power_put_delay; + struct iommu_group *iommu_group; + int iommu_domain_num; + int iommu_domain_id; + struct iommu_domain *iommu_domains[RKNPU_MAX_IOMMU_DOMAIN_NUM]; + struct sg_table *cache_sgt[RKNPU_CACHE_SG_TABLE_NUM]; + atomic_t iommu_domain_refcount; }; struct rknpu_session { @@ -165,5 +184,6 @@ struct rknpu_session { int rknpu_power_get(struct rknpu_device *rknpu_dev); int rknpu_power_put(struct rknpu_device *rknpu_dev); +int rknpu_power_put_delay(struct rknpu_device *rknpu_dev); #endif /* __LINUX_RKNPU_DRV_H_ */ diff --git a/drivers/rknpu/include/rknpu_gem.h b/drivers/rknpu/include/rknpu_gem.h index aedcab89d2419..17f922105786f 100644 --- a/drivers/rknpu/include/rknpu_gem.h +++ b/drivers/rknpu/include/rknpu_gem.h @@ -60,6 +60,9 @@ struct rknpu_gem_object { struct page **pages; struct sg_table *sgt; struct drm_mm_node mm_node; + int iommu_domain_id; + unsigned int core_mask; + unsigned int cache_with_sgt; }; enum rknpu_cache_type { @@ -68,10 +71,10 @@ enum rknpu_cache_type { }; /* create a new buffer with gem object */ -struct rknpu_gem_object *rknpu_gem_object_create(struct drm_device *dev, - unsigned int flags, - unsigned long size, - unsigned long sram_size); +struct rknpu_gem_object * +rknpu_gem_object_create(struct drm_device *dev, unsigned int flags, + unsigned long size, unsigned long sram_size, + int iommu_domain_id, unsigned int core_mask); /* destroy a buffer with gem object */ void rknpu_gem_object_destroy(struct rknpu_gem_object *rknpu_obj); diff --git a/drivers/rknpu/include/rknpu_ioctl.h b/drivers/rknpu/include/rknpu_ioctl.h index 35b46701c7895..98c544464d719 100644 --- a/drivers/rknpu/include/rknpu_ioctl.h +++ b/drivers/rknpu/include/rknpu_ioctl.h @@ -31,11 +31,6 @@ #define RKNPU_OFFSET_INT_STATUS 0x28 #define RKNPU_OFFSET_INT_RAW_STATUS 0x2c -#define RKNPU_OFFSET_CLR_ALL_RW_AMOUNT 0x8010 -#define RKNPU_OFFSET_DT_WR_AMOUNT 0x8034 -#define RKNPU_OFFSET_DT_RD_AMOUNT 0x8038 -#define RKNPU_OFFSET_WT_RD_AMOUNT 0x803c - #define RKNPU_OFFSET_ENABLE_MASK 0xf008 #define RKNPU_INT_CLEAR 0x1ffff @@ -44,10 +39,10 @@ #define RKNPU_STR_HELPER(x) #x -#define RKNPU_GET_DRV_VERSION_STRING(MAJOR, MINOR, PATCHLEVEL) \ - RKNPU_STR_HELPER(MAJOR) \ +#define RKNPU_GET_DRV_VERSION_STRING(MAJOR, MINOR, PATCHLEVEL) \ + RKNPU_STR_HELPER(MAJOR) \ "." RKNPU_STR_HELPER(MINOR) "." RKNPU_STR_HELPER(PATCHLEVEL) -#define RKNPU_GET_DRV_VERSION_CODE(MAJOR, MINOR, PATCHLEVEL) \ +#define RKNPU_GET_DRV_VERSION_CODE(MAJOR, MINOR, PATCHLEVEL) \ (MAJOR * 10000 + MINOR * 100 + PATCHLEVEL) #define RKNPU_GET_DRV_VERSION_MAJOR(CODE) (CODE / 10000) #define RKNPU_GET_DRV_VERSION_MINOR(CODE) ((CODE % 10000) / 100) @@ -67,7 +62,7 @@ enum e_rknpu_mem_type { RKNPU_MEM_WRITE_COMBINE = 1 << 2, /* dma attr kernel mapping */ RKNPU_MEM_KERNEL_MAPPING = 1 << 3, - /* iommu mapping */ + /* IOMMU mapping */ RKNPU_MEM_IOMMU = 1 << 4, /* zero mapping */ RKNPU_MEM_ZEROING = 1 << 5, @@ -79,19 +74,22 @@ enum e_rknpu_mem_type { RKNPU_MEM_TRY_ALLOC_SRAM = 1 << 8, /* request NBUF */ RKNPU_MEM_TRY_ALLOC_NBUF = 1 << 9, + /* IOMMU limiting IOVA alignment */ + RKNPU_MEM_IOMMU_LIMIT_IOVA_ALIGNMENT = 1 << 10, RKNPU_MEM_MASK = RKNPU_MEM_NON_CONTIGUOUS | RKNPU_MEM_CACHEABLE | RKNPU_MEM_WRITE_COMBINE | RKNPU_MEM_KERNEL_MAPPING | RKNPU_MEM_IOMMU | RKNPU_MEM_ZEROING | RKNPU_MEM_SECURE | RKNPU_MEM_DMA32 | - RKNPU_MEM_TRY_ALLOC_SRAM | RKNPU_MEM_TRY_ALLOC_NBUF + RKNPU_MEM_TRY_ALLOC_SRAM | RKNPU_MEM_TRY_ALLOC_NBUF | + RKNPU_MEM_IOMMU_LIMIT_IOVA_ALIGNMENT }; /* sync mode definitions. */ enum e_rknpu_mem_sync_mode { RKNPU_MEM_SYNC_TO_DEVICE = 1 << 0, RKNPU_MEM_SYNC_FROM_DEVICE = 1 << 1, - RKNPU_MEM_SYNC_MASK = - RKNPU_MEM_SYNC_TO_DEVICE | RKNPU_MEM_SYNC_FROM_DEVICE + RKNPU_MEM_SYNC_MASK = RKNPU_MEM_SYNC_TO_DEVICE | + RKNPU_MEM_SYNC_FROM_DEVICE }; /* job mode definitions. */ @@ -134,6 +132,8 @@ enum e_rknpu_action { RKNPU_POWER_OFF = 21, RKNPU_GET_TOTAL_SRAM_SIZE = 22, RKNPU_GET_FREE_SRAM_SIZE = 23, + RKNPU_GET_IOMMU_DOMAIN_ID = 24, + RKNPU_SET_IOMMU_DOMAIN_ID = 25, }; /** @@ -147,6 +147,8 @@ enum e_rknpu_action { * @dma_addr: dma address that access by rknpu. * @sram_size: user-desired sram memory allocation size. * - this size value would be page-aligned internally. + * @iommu_domain_id: iommu domain id + * @reserved: just padding to be 64-bit aligned. */ struct rknpu_mem_create { __u32 handle; @@ -155,6 +157,8 @@ struct rknpu_mem_create { __u64 obj_addr; __u64 dma_addr; __u64 sram_size; + __s32 iommu_domain_id; + __u32 core_mask; }; /** @@ -249,7 +253,8 @@ struct rknpu_subcore_task { * @task_counter: task counter * @priority: submit priority * @task_obj_addr: address of task object - * @regcfg_obj_addr: address of register config object + * @iommu_domain_id: iommu domain id + * @reserved: just padding to be 64-bit aligned. * @task_base_addr: task base address * @hw_elapse_time: hardware elapse time * @core_mask: core mask of rknpu @@ -265,7 +270,8 @@ struct rknpu_submit { __u32 task_counter; __s32 priority; __u64 task_obj_addr; - __u64 regcfg_obj_addr; + __u32 iommu_domain_id; + __u32 reserved; __u64 task_base_addr; __s64 hw_elapse_time; __u32 core_mask; @@ -299,25 +305,25 @@ struct rknpu_action { #include -#define DRM_IOCTL_RKNPU_ACTION \ +#define DRM_IOCTL_RKNPU_ACTION \ DRM_IOWR(DRM_COMMAND_BASE + RKNPU_ACTION, struct rknpu_action) -#define DRM_IOCTL_RKNPU_SUBMIT \ +#define DRM_IOCTL_RKNPU_SUBMIT \ DRM_IOWR(DRM_COMMAND_BASE + RKNPU_SUBMIT, struct rknpu_submit) -#define DRM_IOCTL_RKNPU_MEM_CREATE \ +#define DRM_IOCTL_RKNPU_MEM_CREATE \ DRM_IOWR(DRM_COMMAND_BASE + RKNPU_MEM_CREATE, struct rknpu_mem_create) -#define DRM_IOCTL_RKNPU_MEM_MAP \ +#define DRM_IOCTL_RKNPU_MEM_MAP \ DRM_IOWR(DRM_COMMAND_BASE + RKNPU_MEM_MAP, struct rknpu_mem_map) -#define DRM_IOCTL_RKNPU_MEM_DESTROY \ +#define DRM_IOCTL_RKNPU_MEM_DESTROY \ DRM_IOWR(DRM_COMMAND_BASE + RKNPU_MEM_DESTROY, struct rknpu_mem_destroy) -#define DRM_IOCTL_RKNPU_MEM_SYNC \ +#define DRM_IOCTL_RKNPU_MEM_SYNC \ DRM_IOWR(DRM_COMMAND_BASE + RKNPU_MEM_SYNC, struct rknpu_mem_sync) #define IOCTL_RKNPU_ACTION RKNPU_IOWR(RKNPU_ACTION, struct rknpu_action) #define IOCTL_RKNPU_SUBMIT RKNPU_IOWR(RKNPU_SUBMIT, struct rknpu_submit) -#define IOCTL_RKNPU_MEM_CREATE \ +#define IOCTL_RKNPU_MEM_CREATE \ RKNPU_IOWR(RKNPU_MEM_CREATE, struct rknpu_mem_create) #define IOCTL_RKNPU_MEM_MAP RKNPU_IOWR(RKNPU_MEM_MAP, struct rknpu_mem_map) -#define IOCTL_RKNPU_MEM_DESTROY \ +#define IOCTL_RKNPU_MEM_DESTROY \ RKNPU_IOWR(RKNPU_MEM_DESTROY, struct rknpu_mem_destroy) #define IOCTL_RKNPU_MEM_SYNC RKNPU_IOWR(RKNPU_MEM_SYNC, struct rknpu_mem_sync) diff --git a/drivers/rknpu/include/rknpu_iommu.h b/drivers/rknpu/include/rknpu_iommu.h index aa680c9976147..40ea58e282d88 100644 --- a/drivers/rknpu/include/rknpu_iommu.h +++ b/drivers/rknpu/include/rknpu_iommu.h @@ -32,9 +32,29 @@ struct rknpu_iommu_dma_cookie { }; dma_addr_t rknpu_iommu_dma_alloc_iova(struct iommu_domain *domain, size_t size, - u64 dma_limit, struct device *dev); + u64 dma_limit, struct device *dev, + bool size_aligned); void rknpu_iommu_dma_free_iova(struct rknpu_iommu_dma_cookie *cookie, - dma_addr_t iova, size_t size); + dma_addr_t iova, size_t size, bool size_aligned); + +int rknpu_iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, + bool iova_aligned); + +void rknpu_iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, + bool iova_aligned); + +int rknpu_iommu_init_domain(struct rknpu_device *rknpu_dev); +int rknpu_iommu_switch_domain(struct rknpu_device *rknpu_dev, int domain_id); +void rknpu_iommu_free_domains(struct rknpu_device *rknpu_dev); +int rknpu_iommu_domain_get_and_switch(struct rknpu_device *rknpu_dev, + int domain_id); +int rknpu_iommu_domain_put(struct rknpu_device *rknpu_dev); + +#if KERNEL_VERSION(5, 10, 0) < LINUX_VERSION_CODE +int iommu_get_dma_cookie(struct iommu_domain *domain); +#endif #endif diff --git a/drivers/rknpu/include/rknpu_job.h b/drivers/rknpu/include/rknpu_job.h index cd0d1dfb8363c..46eb304ac3187 100644 --- a/drivers/rknpu/include/rknpu_job.h +++ b/drivers/rknpu/include/rknpu_job.h @@ -48,6 +48,7 @@ struct rknpu_job { ktime_t hw_recoder_time; ktime_t hw_elapse_time; atomic_t submit_count[RKNPU_MAX_CORES]; + int iommu_domain_id; }; irqreturn_t rknpu_core0_irq_handler(int irq, void *data); diff --git a/drivers/rknpu/include/rknpu_mem.h b/drivers/rknpu/include/rknpu_mem.h index 69975408f4341..9cc5d1d425218 100644 --- a/drivers/rknpu/include/rknpu_mem.h +++ b/drivers/rknpu/include/rknpu_mem.h @@ -37,10 +37,10 @@ struct rknpu_mem_object { unsigned int owner; }; -int rknpu_mem_create_ioctl(struct rknpu_device *rknpu_dev, unsigned long data, - struct file *file); -int rknpu_mem_destroy_ioctl(struct rknpu_device *rknpu_dev, unsigned long data, - struct file *file); +int rknpu_mem_create_ioctl(struct rknpu_device *rknpu_dev, struct file *file, + unsigned int cmd, unsigned long data); +int rknpu_mem_destroy_ioctl(struct rknpu_device *rknpu_dev, struct file *file, + unsigned long data); int rknpu_mem_sync_ioctl(struct rknpu_device *rknpu_dev, unsigned long data); #endif diff --git a/drivers/rknpu/rknpu_devfreq.c b/drivers/rknpu/rknpu_devfreq.c index a2a83a9ccd9e3..7a50390e90277 100644 --- a/drivers/rknpu/rknpu_devfreq.c +++ b/drivers/rknpu/rknpu_devfreq.c @@ -162,7 +162,6 @@ static int rk3588_npu_set_read_margin(struct device *dev, struct rockchip_opp_info *opp_info, u32 rm) { - struct rknpu_device *rknpu_dev = dev_get_drvdata(dev); u32 offset = 0, val = 0; int i, ret = 0; @@ -174,7 +173,7 @@ static int rk3588_npu_set_read_margin(struct device *dev, LOG_DEV_DEBUG(dev, "set rm to %d\n", rm); - for (i = 0; i < rknpu_dev->config->num_irqs; i++) { + for (i = 0; i < 3; i++) { ret = regmap_read(opp_info->grf, offset, &val); if (ret < 0) { LOG_DEV_ERROR(dev, "failed to get rm from 0x%x\n", @@ -365,7 +364,11 @@ int rknpu_devfreq_init(struct rknpu_device *rknpu_dev) err_remove_governor: devfreq_remove_governor(&devfreq_rknpu_ondemand); err_uinit_table: +#if KERNEL_VERSION(5, 10, 198) <= LINUX_VERSION_CODE rockchip_uninit_opp_table(dev, info); +#else + dev_pm_opp_of_remove_table(dev); +#endif return ret; } @@ -699,7 +702,11 @@ int rknpu_devfreq_init(struct rknpu_device *rknpu_dev) err_remove_governor: devfreq_remove_governor(&devfreq_rknpu_ondemand); err_remove_table: +#if KERNEL_VERSION(5, 10, 198) <= LINUX_VERSION_CODE rockchip_uninit_opp_table(dev, &rknpu_dev->opp_info); +#else + dev_pm_opp_of_remove_table(dev); +#endif rknpu_dev->devfreq = NULL; @@ -760,6 +767,10 @@ void rknpu_devfreq_remove(struct rknpu_device *rknpu_dev) } if (rknpu_dev->devfreq) devfreq_remove_governor(&devfreq_rknpu_ondemand); +#if KERNEL_VERSION(5, 10, 198) <= LINUX_VERSION_CODE rockchip_uninit_opp_table(rknpu_dev->dev, &rknpu_dev->opp_info); +#else + dev_pm_opp_of_remove_table(rknpu_dev->dev); +#endif } EXPORT_SYMBOL(rknpu_devfreq_remove); diff --git a/drivers/rknpu/rknpu_drv.c b/drivers/rknpu/rknpu_drv.c index e778d98964ad3..100b271af07e5 100644 --- a/drivers/rknpu/rknpu_drv.c +++ b/drivers/rknpu/rknpu_drv.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include @@ -44,6 +43,7 @@ #include "rknpu_drv.h" #include "rknpu_gem.h" #include "rknpu_devfreq.h" +#include "rknpu_iommu.h" #ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM #include @@ -72,11 +72,6 @@ module_param(bypass_soft_reset, int, 0644); MODULE_PARM_DESC(bypass_soft_reset, "bypass RKNPU soft reset if set it to 1, disabled by default"); -struct rknpu_irqs_data { - const char *name; - irqreturn_t (*irq_hdl)(int irq, void *ctx); -}; - static const struct rknpu_irqs_data rknpu_irqs[] = { { "npu_irq", rknpu_core0_irq_handler } }; @@ -87,13 +82,25 @@ static const struct rknpu_irqs_data rk3588_npu_irqs[] = { { "npu2_irq", rknpu_core2_irq_handler } }; -static const struct rknpu_reset_data rknpu_resets[] = { { "srst_a", - "srst_h" } }; +static const struct rknpu_amount_data rknpu_old_top_amount = { + .offset_clr_all = 0x8010, + .offset_dt_wr = 0x8034, + .offset_dt_rd = 0x8038, + .offset_wt_rd = 0x803c, +}; + +static const struct rknpu_amount_data rknpu_top_amount = { + .offset_clr_all = 0x2210, + .offset_dt_wr = 0x2234, + .offset_dt_rd = 0x2238, + .offset_wt_rd = 0x223c +}; -static const struct rknpu_reset_data rk3588_npu_resets[] = { - { "srst_a0", "srst_h0" }, - { "srst_a1", "srst_h1" }, - { "srst_a2", "srst_h2" } +static const struct rknpu_amount_data rknpu_core_amount = { + .offset_clr_all = 0x2410, + .offset_dt_wr = 0x2434, + .offset_dt_rd = 0x2438, + .offset_wt_rd = 0x243c, }; static const struct rknpu_config rk356x_rknpu_config = { @@ -105,15 +112,16 @@ static const struct rknpu_config rk356x_rknpu_config = { .pc_task_number_mask = 0xfff, .pc_task_status_offset = 0x3c, .pc_dma_ctrl = 0, - .bw_enable = 1, .irqs = rknpu_irqs, - .resets = rknpu_resets, .num_irqs = ARRAY_SIZE(rknpu_irqs), - .num_resets = ARRAY_SIZE(rknpu_resets), .nbuf_phyaddr = 0, .nbuf_size = 0, .max_submit_number = (1 << 12) - 1, .core_mask = 0x1, + .amount_top = &rknpu_old_top_amount, + .amount_core = NULL, + .state_init = NULL, + .cache_sgt_init = NULL, }; static const struct rknpu_config rk3588_rknpu_config = { @@ -125,15 +133,16 @@ static const struct rknpu_config rk3588_rknpu_config = { .pc_task_number_mask = 0xfff, .pc_task_status_offset = 0x3c, .pc_dma_ctrl = 0, - .bw_enable = 0, .irqs = rk3588_npu_irqs, - .resets = rk3588_npu_resets, .num_irqs = ARRAY_SIZE(rk3588_npu_irqs), - .num_resets = ARRAY_SIZE(rk3588_npu_resets), .nbuf_phyaddr = 0, .nbuf_size = 0, .max_submit_number = (1 << 12) - 1, .core_mask = 0x7, + .amount_top = NULL, + .amount_core = NULL, + .state_init = NULL, + .cache_sgt_init = NULL, }; static const struct rknpu_config rk3583_rknpu_config = { @@ -145,15 +154,16 @@ static const struct rknpu_config rk3583_rknpu_config = { .pc_task_number_mask = 0xfff, .pc_task_status_offset = 0x3c, .pc_dma_ctrl = 0, - .bw_enable = 0, .irqs = rk3588_npu_irqs, - .resets = rk3588_npu_resets, .num_irqs = 2, - .num_resets = 2, .nbuf_phyaddr = 0, .nbuf_size = 0, .max_submit_number = (1 << 12) - 1, .core_mask = 0x3, + .amount_top = NULL, + .amount_core = NULL, + .state_init = NULL, + .cache_sgt_init = NULL, }; static const struct rknpu_config rv1106_rknpu_config = { @@ -165,15 +175,16 @@ static const struct rknpu_config rv1106_rknpu_config = { .pc_task_number_mask = 0xffff, .pc_task_status_offset = 0x3c, .pc_dma_ctrl = 0, - .bw_enable = 1, .irqs = rknpu_irqs, - .resets = rknpu_resets, .num_irqs = ARRAY_SIZE(rknpu_irqs), - .num_resets = ARRAY_SIZE(rknpu_resets), .nbuf_phyaddr = 0, .nbuf_size = 0, .max_submit_number = (1 << 16) - 1, .core_mask = 0x1, + .amount_top = &rknpu_old_top_amount, + .amount_core = NULL, + .state_init = NULL, + .cache_sgt_init = NULL, }; static const struct rknpu_config rk3562_rknpu_config = { @@ -185,15 +196,16 @@ static const struct rknpu_config rk3562_rknpu_config = { .pc_task_number_mask = 0xffff, .pc_task_status_offset = 0x48, .pc_dma_ctrl = 1, - .bw_enable = 1, .irqs = rknpu_irqs, - .resets = rknpu_resets, .num_irqs = ARRAY_SIZE(rknpu_irqs), - .num_resets = ARRAY_SIZE(rknpu_resets), .nbuf_phyaddr = 0xfe400000, .nbuf_size = 256 * 1024, .max_submit_number = (1 << 16) - 1, .core_mask = 0x1, + .amount_top = &rknpu_old_top_amount, + .amount_core = NULL, + .state_init = NULL, + .cache_sgt_init = NULL, }; /* driver probe and init */ @@ -220,7 +232,6 @@ static const struct of_device_id rknpu_of_match[] = { }, {}, }; -MODULE_DEVICE_TABLE(of, rknpu_of_match); static int rknpu_get_drv_version(uint32_t *version) { @@ -234,13 +245,20 @@ static int rknpu_power_off(struct rknpu_device *rknpu_dev); static void rknpu_power_off_delay_work(struct work_struct *power_off_work) { + int ret = 0; struct rknpu_device *rknpu_dev = container_of(to_delayed_work(power_off_work), struct rknpu_device, power_off_work); mutex_lock(&rknpu_dev->power_lock); - if (atomic_dec_if_positive(&rknpu_dev->power_refcount) == 0) - rknpu_power_off(rknpu_dev); + if (atomic_dec_if_positive(&rknpu_dev->power_refcount) == 0) { + ret = rknpu_power_off(rknpu_dev); + if (ret) + atomic_inc(&rknpu_dev->power_refcount); + } mutex_unlock(&rknpu_dev->power_lock); + + if (ret) + rknpu_power_put_delay(rknpu_dev); } int rknpu_power_get(struct rknpu_device *rknpu_dev) @@ -260,14 +278,20 @@ int rknpu_power_put(struct rknpu_device *rknpu_dev) int ret = 0; mutex_lock(&rknpu_dev->power_lock); - if (atomic_dec_if_positive(&rknpu_dev->power_refcount) == 0) + if (atomic_dec_if_positive(&rknpu_dev->power_refcount) == 0) { ret = rknpu_power_off(rknpu_dev); + if (ret) + atomic_inc(&rknpu_dev->power_refcount); + } mutex_unlock(&rknpu_dev->power_lock); + if (ret) + rknpu_power_put_delay(rknpu_dev); + return ret; } -static int rknpu_power_put_delay(struct rknpu_device *rknpu_dev) +int rknpu_power_put_delay(struct rknpu_device *rknpu_dev) { if (rknpu_dev->power_put_delay == 0) return rknpu_power_put(rknpu_dev); @@ -375,6 +399,18 @@ static int rknpu_action(struct rknpu_device *rknpu_dev, args->value = 0; ret = 0; break; + case RKNPU_GET_IOMMU_DOMAIN_ID: + args->value = rknpu_dev->iommu_domain_id; + ret = 0; + break; + case RKNPU_SET_IOMMU_DOMAIN_ID: { + ret = rknpu_iommu_domain_get_and_switch( + rknpu_dev, *(int32_t *)&args->value); + if (ret) + break; + rknpu_iommu_domain_put(rknpu_dev); + break; + } default: ret = -EINVAL; break; @@ -464,7 +500,7 @@ static int rknpu_action_ioctl(struct rknpu_device *rknpu_dev, return ret; } -static long rknpu_ioctl(struct file *file, uint32_t cmd, unsigned long arg) +static long rknpu_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { long ret = -EINVAL; struct rknpu_device *rknpu_dev = NULL; @@ -476,22 +512,22 @@ static long rknpu_ioctl(struct file *file, uint32_t cmd, unsigned long arg) rknpu_power_get(rknpu_dev); - switch (cmd) { - case IOCTL_RKNPU_ACTION: + switch (_IOC_NR(cmd)) { + case RKNPU_ACTION: ret = rknpu_action_ioctl(rknpu_dev, arg); break; - case IOCTL_RKNPU_SUBMIT: + case RKNPU_SUBMIT: ret = rknpu_submit_ioctl(rknpu_dev, arg); break; - case IOCTL_RKNPU_MEM_CREATE: - ret = rknpu_mem_create_ioctl(rknpu_dev, arg, file); + case RKNPU_MEM_CREATE: + ret = rknpu_mem_create_ioctl(rknpu_dev, file, cmd, arg); break; case RKNPU_MEM_MAP: break; - case IOCTL_RKNPU_MEM_DESTROY: - ret = rknpu_mem_destroy_ioctl(rknpu_dev, arg, file); + case RKNPU_MEM_DESTROY: + ret = rknpu_mem_destroy_ioctl(rknpu_dev, file, arg); break; - case IOCTL_RKNPU_MEM_SYNC: + case RKNPU_MEM_SYNC: ret = rknpu_mem_sync_ioctl(rknpu_dev, arg); break; default: @@ -530,16 +566,16 @@ static int rknpu_action_ioctl(struct drm_device *dev, void *data, return rknpu_action(rknpu_dev, (struct rknpu_action *)data); } -#define RKNPU_IOCTL(func) \ - static int __##func(struct drm_device *dev, void *data, \ - struct drm_file *file_priv) \ - { \ - struct rknpu_device *rknpu_dev = dev_get_drvdata(dev->dev); \ - int ret = -EINVAL; \ - rknpu_power_get(rknpu_dev); \ - ret = func(dev, data, file_priv); \ - rknpu_power_put_delay(rknpu_dev); \ - return ret; \ +#define RKNPU_IOCTL(func) \ + static int __##func(struct drm_device *dev, void *data, \ + struct drm_file *file_priv) \ + { \ + struct rknpu_device *rknpu_dev = dev_get_drvdata(dev->dev); \ + int ret = -EINVAL; \ + rknpu_power_get(rknpu_dev); \ + ret = func(dev, data, file_priv); \ + rknpu_power_put_delay(rknpu_dev); \ + return ret; \ } RKNPU_IOCTL(rknpu_action_ioctl); @@ -878,6 +914,9 @@ static int rknpu_power_on(struct rknpu_device *rknpu_dev) ret); } + if (rknpu_dev->config->state_init != NULL) + rknpu_dev->config->state_init(rknpu_dev); + out: #ifndef FPGA_PLATFORM rknpu_devfreq_unlock(rknpu_dev); @@ -953,17 +992,24 @@ static int rknpu_register_irq(struct platform_device *pdev, { const struct rknpu_config *config = rknpu_dev->config; struct device *dev = &pdev->dev; +#if KERNEL_VERSION(6, 1, 0) > LINUX_VERSION_CODE + struct resource *res; +#endif int i, ret, irq; - irq = platform_get_irq_byname_optional(pdev, - config->irqs[0].name); - if (irq > 0) { +#if KERNEL_VERSION(6, 1, 0) > LINUX_VERSION_CODE + res = platform_get_resource_byname(pdev, IORESOURCE_IRQ, + config->irqs[0].name); + if (res) { /* there are irq names in dts */ for (i = 0; i < config->num_irqs; i++) { irq = platform_get_irq_byname(pdev, config->irqs[i].name); - if (irq < 0) + if (irq < 0) { + LOG_DEV_ERROR(dev, "no npu %s in dts\n", + config->irqs[i].name); return irq; + } ret = devm_request_irq(dev, irq, config->irqs[i].irq_hdl, @@ -990,6 +1036,28 @@ static int rknpu_register_irq(struct platform_device *pdev, return ret; } } +#else + /* there are irq names in dts */ + for (i = 0; i < config->num_irqs; i++) { + irq = platform_get_irq_byname(pdev, config->irqs[i].name); + if (irq < 0) { + irq = platform_get_irq(pdev, i); + if (irq < 0) { + LOG_DEV_ERROR(dev, "no npu %s in dts\n", + config->irqs[i].name); + return irq; + } + } + + ret = devm_request_irq(dev, irq, config->irqs[i].irq_hdl, + IRQF_SHARED, dev_name(dev), rknpu_dev); + if (ret < 0) { + LOG_DEV_ERROR(dev, "request %s failed: %d\n", + config->irqs[i].name, ret); + return ret; + } + } +#endif return 0; } @@ -1140,9 +1208,14 @@ static int rknpu_probe(struct platform_device *pdev) rknpu_dev->config = config; rknpu_dev->dev = dev; + dev_set_drvdata(dev, rknpu_dev); rknpu_dev->iommu_en = rknpu_is_iommu_enable(dev); - if (!rknpu_dev->iommu_en) { + if (rknpu_dev->iommu_en) { + rknpu_dev->iommu_group = iommu_group_get(dev); + if (!rknpu_dev->iommu_group) + return -EINVAL; + } else { /* Initialize reserved memory resources */ ret = of_reserved_mem_device_init(dev); if (!ret) { @@ -1197,6 +1270,7 @@ static int rknpu_probe(struct platform_device *pdev) spin_lock_init(&rknpu_dev->irq_lock); mutex_init(&rknpu_dev->power_lock); mutex_init(&rknpu_dev->reset_lock); + mutex_init(&rknpu_dev->domain_lock); for (i = 0; i < config->num_irqs; i++) { INIT_LIST_HEAD(&rknpu_dev->subcore_datas[i].todo_list); init_waitqueue_head(&rknpu_dev->subcore_datas[i].job_done_wq); @@ -1333,12 +1407,19 @@ static int rknpu_probe(struct platform_device *pdev) } if (IS_ENABLED(CONFIG_NO_GKI) && rknpu_dev->iommu_en && - rknpu_dev->config->nbuf_size > 0) + rknpu_dev->config->nbuf_size > 0) { rknpu_find_nbuf_resource(rknpu_dev); + if (rknpu_dev->config->cache_sgt_init != NULL) + rknpu_dev->config->cache_sgt_init(rknpu_dev); + } + + if (rknpu_dev->iommu_en) + rknpu_iommu_init_domain(rknpu_dev); rknpu_power_off(rknpu_dev); atomic_set(&rknpu_dev->power_refcount, 0); atomic_set(&rknpu_dev->cmdline_power_refcount, 0); + atomic_set(&rknpu_dev->iommu_domain_refcount, 0); rknpu_debugger_init(rknpu_dev); rknpu_init_timer(rknpu_dev); @@ -1372,17 +1453,32 @@ static int rknpu_remove(struct platform_device *pdev) cancel_delayed_work_sync(&rknpu_dev->power_off_work); destroy_workqueue(rknpu_dev->power_off_wq); - if (IS_ENABLED(CONFIG_ROCKCHIP_RKNPU_SRAM) && rknpu_dev->sram_mm) - rknpu_mm_destroy(rknpu_dev->sram_mm); - rknpu_debugger_remove(rknpu_dev); rknpu_cancel_timer(rknpu_dev); + if (rknpu_dev->config->cache_sgt_init != NULL) { + for (i = 0; i < RKNPU_CACHE_SG_TABLE_NUM; i++) { + if (rknpu_dev->cache_sgt[i]) { + sg_free_table(rknpu_dev->cache_sgt[i]); + kfree(rknpu_dev->cache_sgt[i]); + rknpu_dev->cache_sgt[i] = NULL; + } + } + } + for (i = 0; i < rknpu_dev->config->num_irqs; i++) { WARN_ON(rknpu_dev->subcore_datas[i].job); WARN_ON(!list_empty(&rknpu_dev->subcore_datas[i].todo_list)); } + if (IS_ENABLED(CONFIG_ROCKCHIP_RKNPU_SRAM) && rknpu_dev->sram_mm) + rknpu_mm_destroy(rknpu_dev->sram_mm); + + if (rknpu_dev->iommu_en) { + rknpu_iommu_free_domains(rknpu_dev); + iommu_group_put(rknpu_dev->iommu_group); + } + #ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM rknpu_drm_remove(rknpu_dev); #endif @@ -1414,6 +1510,26 @@ static int rknpu_remove(struct platform_device *pdev) } #ifndef FPGA_PLATFORM +#ifdef CONFIG_PM_SLEEP +static int rknpu_suspend(struct device *dev) +{ + struct rknpu_device *rknpu_dev = dev_get_drvdata(dev); + + rknpu_power_get(rknpu_dev); + + return pm_runtime_force_suspend(dev); +} + +static int rknpu_resume(struct device *dev) +{ + struct rknpu_device *rknpu_dev = dev_get_drvdata(dev); + + rknpu_power_put_delay(rknpu_dev); + + return pm_runtime_force_resume(dev); +} +#endif + static int rknpu_runtime_suspend(struct device *dev) { return rknpu_devfreq_runtime_suspend(dev); @@ -1425,10 +1541,8 @@ static int rknpu_runtime_resume(struct device *dev) } static const struct dev_pm_ops rknpu_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, - pm_runtime_force_resume) - SET_RUNTIME_PM_OPS(rknpu_runtime_suspend, rknpu_runtime_resume, - NULL) + SET_SYSTEM_SLEEP_PM_OPS(rknpu_suspend, rknpu_resume) SET_RUNTIME_PM_OPS( + rknpu_runtime_suspend, rknpu_runtime_resume, NULL) }; #endif diff --git a/drivers/rknpu/rknpu_gem.c b/drivers/rknpu/rknpu_gem.c index 6f790a6368f31..a5c5354ed85ce 100644 --- a/drivers/rknpu/rknpu_gem.c +++ b/drivers/rknpu/rknpu_gem.c @@ -10,11 +10,13 @@ #include #include +#include #include #include #include #include #include +#include #include #if KERNEL_VERSION(5, 10, 0) <= LINUX_VERSION_CODE @@ -36,6 +38,8 @@ static int rknpu_gem_get_pages(struct rknpu_gem_object *rknpu_obj) dma_addr_t dma_addr = 0; dma_addr_t phys = 0; int ret = -EINVAL, i = 0; + bool iova_aligned = + !(rknpu_obj->flags & RKNPU_MEM_IOMMU_LIMIT_IOVA_ALIGNMENT); rknpu_obj->pages = drm_gem_get_pages(&rknpu_obj->base); if (IS_ERR(rknpu_obj->pages)) { @@ -59,8 +63,9 @@ static int rknpu_gem_get_pages(struct rknpu_gem_object *rknpu_obj) goto put_pages; } - ret = dma_map_sg(drm->dev, rknpu_obj->sgt->sgl, rknpu_obj->sgt->nents, - DMA_BIDIRECTIONAL); + ret = rknpu_iommu_dma_map_sg(drm->dev, rknpu_obj->sgt->sgl, + rknpu_obj->sgt->nents, DMA_BIDIRECTIONAL, + iova_aligned); if (ret == 0) { ret = -EFAULT; LOG_DEV_ERROR(drm->dev, "%s: dma map %zu fail\n", __func__, @@ -94,8 +99,9 @@ static int rknpu_gem_get_pages(struct rknpu_gem_object *rknpu_obj) return 0; unmap_sg: - dma_unmap_sg(drm->dev, rknpu_obj->sgt->sgl, rknpu_obj->sgt->nents, - DMA_BIDIRECTIONAL); + rknpu_iommu_dma_unmap_sg(drm->dev, rknpu_obj->sgt->sgl, + rknpu_obj->sgt->nents, DMA_BIDIRECTIONAL, + iova_aligned); free_sgt: sg_free_table(rknpu_obj->sgt); @@ -110,6 +116,8 @@ static int rknpu_gem_get_pages(struct rknpu_gem_object *rknpu_obj) static void rknpu_gem_put_pages(struct rknpu_gem_object *rknpu_obj) { struct drm_device *drm = rknpu_obj->base.dev; + bool iova_aligned = + !(rknpu_obj->flags & RKNPU_MEM_IOMMU_LIMIT_IOVA_ALIGNMENT); if (rknpu_obj->flags & RKNPU_MEM_KERNEL_MAPPING) { vunmap(rknpu_obj->kv_addr); @@ -117,8 +125,9 @@ static void rknpu_gem_put_pages(struct rknpu_gem_object *rknpu_obj) } if (rknpu_obj->sgt != NULL) { - dma_unmap_sg(drm->dev, rknpu_obj->sgt->sgl, - rknpu_obj->sgt->nents, DMA_BIDIRECTIONAL); + rknpu_iommu_dma_unmap_sg(drm->dev, rknpu_obj->sgt->sgl, + rknpu_obj->sgt->nents, + DMA_BIDIRECTIONAL, iova_aligned); sg_free_table(rknpu_obj->sgt); kfree(rknpu_obj->sgt); } @@ -197,9 +206,9 @@ static int rknpu_gem_alloc_buf(struct rknpu_gem_object *rknpu_obj) return -ENOMEM; } - rknpu_obj->cookie = - dma_alloc_attrs(drm->dev, rknpu_obj->size, &rknpu_obj->dma_addr, - gfp_mask, rknpu_obj->dma_attrs); + rknpu_obj->cookie = dma_alloc_attrs(drm->dev, rknpu_obj->size, + &rknpu_obj->dma_addr, gfp_mask, + rknpu_obj->dma_attrs); if (!rknpu_obj->cookie) { /* * when RKNPU_MEM_CONTIGUOUS and IOMMU is available @@ -213,10 +222,9 @@ static int rknpu_gem_alloc_buf(struct rknpu_gem_object *rknpu_obj) rknpu_obj->size); rknpu_obj->dma_attrs &= ~DMA_ATTR_FORCE_CONTIGUOUS; rknpu_obj->flags |= RKNPU_MEM_NON_CONTIGUOUS; - rknpu_obj->cookie = - dma_alloc_attrs(drm->dev, rknpu_obj->size, - &rknpu_obj->dma_addr, gfp_mask, - rknpu_obj->dma_attrs); + rknpu_obj->cookie = dma_alloc_attrs( + drm->dev, rknpu_obj->size, &rknpu_obj->dma_addr, + gfp_mask, rknpu_obj->dma_attrs); if (!rknpu_obj->cookie) { LOG_DEV_ERROR( drm->dev, @@ -411,6 +419,50 @@ static void rknpu_gem_release(struct rknpu_gem_object *rknpu_obj) kfree(rknpu_obj); } +static int rknpu_iommu_map_with_cache_sgt(struct iommu_domain *domain, + struct rknpu_device *rknpu_dev, + struct rknpu_gem_object *rknpu_obj, + unsigned long cache_size) +{ + phys_addr_t cache_start = 0; + unsigned long iova_start = rknpu_obj->iova_start; + struct scatterlist *s = NULL; + unsigned long length = cache_size; + unsigned long size = 0; + int i = 0; + int ret = 0; + int index = 0; + + switch (rknpu_obj->core_mask) { + case RKNPU_CORE0_MASK: + index = 0; + break; + case RKNPU_CORE1_MASK: + index = 1; + break; + default: + break; + } + + for_each_sgtable_sg(rknpu_dev->cache_sgt[index], s, i) { + cache_start = rknpu_dev->nbuf_start + s->offset; + size = length < s->length ? length : s->length; + ret = iommu_map(domain, iova_start, cache_start, size, + IOMMU_READ | IOMMU_WRITE); + if (ret) { + LOG_ERROR("cache iommu_map error: %d\n", ret); + return ret; + } + length -= size; + iova_start += size; + + if (length == 0) + break; + } + + return ret; +} + static int rknpu_gem_alloc_buf_with_cache(struct rknpu_gem_object *rknpu_obj, enum rknpu_cache_type cache_type) { @@ -428,6 +480,8 @@ static int rknpu_gem_alloc_buf_with_cache(struct rknpu_gem_object *rknpu_obj, phys_addr_t cache_start = 0; unsigned long cache_offset = 0; unsigned long cache_size = 0; + bool iova_aligned = + !(rknpu_obj->flags & RKNPU_MEM_IOMMU_LIMIT_IOVA_ALIGNMENT); switch (cache_type) { case RKNPU_CACHE_SRAM: @@ -457,7 +511,8 @@ static int rknpu_gem_alloc_buf_with_cache(struct rknpu_gem_object *rknpu_obj, iovad = &cookie->iovad; rknpu_obj->iova_size = iova_align(iovad, cache_size + rknpu_obj->size); rknpu_obj->iova_start = rknpu_iommu_dma_alloc_iova( - domain, rknpu_obj->iova_size, dma_get_mask(drm->dev), drm->dev); + domain, rknpu_obj->iova_size, dma_get_mask(drm->dev), drm->dev, + iova_aligned); if (!rknpu_obj->iova_start) { LOG_ERROR("iommu_dma_alloc_iova failed\n"); return -ENOMEM; @@ -490,9 +545,14 @@ static int rknpu_gem_alloc_buf_with_cache(struct rknpu_gem_object *rknpu_obj, * |<- - - - - - - iova_size - - - - - - ->| * */ - ret = iommu_map(domain, rknpu_obj->iova_start, - cache_start + cache_offset, cache_size, - IOMMU_READ | IOMMU_WRITE); + if (!rknpu_obj->cache_with_sgt) + ret = iommu_map(domain, rknpu_obj->iova_start, + cache_start + cache_offset, cache_size, + IOMMU_READ | IOMMU_WRITE); + else + ret = rknpu_iommu_map_with_cache_sgt(domain, rknpu_dev, + rknpu_obj, cache_size); + if (ret) { LOG_ERROR("cache iommu_map error: %d\n", ret); goto free_iova; @@ -566,7 +626,8 @@ static int rknpu_gem_alloc_buf_with_cache(struct rknpu_gem_object *rknpu_obj, free_iova: rknpu_iommu_dma_free_iova((void *)domain->iova_cookie, - rknpu_obj->iova_start, rknpu_obj->iova_size); + rknpu_obj->iova_start, rknpu_obj->iova_size, + iova_aligned); return ret; } @@ -578,6 +639,8 @@ static void rknpu_gem_free_buf_with_cache(struct rknpu_gem_object *rknpu_obj, struct rknpu_device *rknpu_dev = drm->dev_private; struct iommu_domain *domain = NULL; unsigned long cache_size = 0; + bool iova_aligned = + !(rknpu_obj->flags & RKNPU_MEM_IOMMU_LIMIT_IOVA_ALIGNMENT); switch (cache_type) { case RKNPU_CACHE_SRAM: @@ -599,7 +662,7 @@ static void rknpu_gem_free_buf_with_cache(struct rknpu_gem_object *rknpu_obj, rknpu_obj->size); rknpu_iommu_dma_free_iova((void *)domain->iova_cookie, rknpu_obj->iova_start, - rknpu_obj->iova_size); + rknpu_obj->iova_size, iova_aligned); } if (rknpu_obj->pages) @@ -612,10 +675,10 @@ static void rknpu_gem_free_buf_with_cache(struct rknpu_gem_object *rknpu_obj, } } -struct rknpu_gem_object *rknpu_gem_object_create(struct drm_device *drm, - unsigned int flags, - unsigned long size, - unsigned long sram_size) +struct rknpu_gem_object * +rknpu_gem_object_create(struct drm_device *drm, unsigned int flags, + unsigned long size, unsigned long sram_size, + int iommu_domain_id, unsigned int core_mask) { struct rknpu_device *rknpu_dev = drm->dev_private; struct rknpu_gem_object *rknpu_obj = NULL; @@ -629,6 +692,18 @@ struct rknpu_gem_object *rknpu_gem_object_create(struct drm_device *drm, remain_ddr_size = round_up(size, PAGE_SIZE); + rknpu_obj = rknpu_gem_init(drm, remain_ddr_size); + if (IS_ERR(rknpu_obj)) + return rknpu_obj; + + if (rknpu_iommu_domain_get_and_switch(rknpu_dev, iommu_domain_id)) { + LOG_DEV_ERROR(rknpu_dev->dev, "%s error\n", __func__); + rknpu_gem_release(rknpu_obj); + return ERR_PTR(-EINVAL); + } + + rknpu_obj->iommu_domain_id = iommu_domain_id; + if (!rknpu_dev->iommu_en && (flags & RKNPU_MEM_NON_CONTIGUOUS)) { /* * when no IOMMU is available, all allocated buffers are @@ -639,6 +714,9 @@ struct rknpu_gem_object *rknpu_gem_object_create(struct drm_device *drm, "non-contiguous allocation is not supported without IOMMU, falling back to contiguous buffer\n"); } + /* set memory type and cache attribute from user side. */ + rknpu_obj->flags = flags; + if (IS_ENABLED(CONFIG_ROCKCHIP_RKNPU_SRAM) && (flags & RKNPU_MEM_TRY_ALLOC_SRAM) && rknpu_dev->sram_size > 0) { size_t sram_free_size = 0; @@ -647,12 +725,7 @@ struct rknpu_gem_object *rknpu_gem_object_create(struct drm_device *drm, if (sram_size != 0) sram_size = round_up(sram_size, PAGE_SIZE); - rknpu_obj = rknpu_gem_init(drm, remain_ddr_size); - if (IS_ERR(rknpu_obj)) - return rknpu_obj; - - /* set memory type and cache attribute from user side. */ - rknpu_obj->flags = flags; + rknpu_obj->cache_with_sgt = 0; sram_free_size = rknpu_dev->sram_mm->free_chunks * rknpu_dev->sram_mm->chunk_size; @@ -687,18 +760,22 @@ struct rknpu_gem_object *rknpu_gem_object_create(struct drm_device *drm, } else if (IS_ENABLED(CONFIG_NO_GKI) && (flags & RKNPU_MEM_TRY_ALLOC_NBUF) && rknpu_dev->nbuf_size > 0) { - size_t nbuf_size = 0; + size_t nbuf_size = rknpu_dev->nbuf_size; - rknpu_obj = rknpu_gem_init(drm, remain_ddr_size); - if (IS_ERR(rknpu_obj)) - return rknpu_obj; + rknpu_obj->cache_with_sgt = 0; - nbuf_size = remain_ddr_size <= rknpu_dev->nbuf_size ? - remain_ddr_size : - rknpu_dev->nbuf_size; + if (core_mask == RKNPU_CORE_AUTO_MASK || + core_mask == RKNPU_CORE0_MASK || + core_mask == RKNPU_CORE1_MASK) { + if (rknpu_dev->cache_sgt[0]) + rknpu_obj->cache_with_sgt = 1; + nbuf_size = rknpu_dev->nbuf_size / + rknpu_dev->config->num_irqs; + } - /* set memory type and cache attribute from user side. */ - rknpu_obj->flags = flags; + rknpu_obj->core_mask = core_mask; + nbuf_size = remain_ddr_size <= nbuf_size ? remain_ddr_size : + nbuf_size; if (nbuf_size > 0) { rknpu_obj->nbuf_size = nbuf_size; @@ -712,25 +789,19 @@ struct rknpu_gem_object *rknpu_gem_object_create(struct drm_device *drm, } if (remain_ddr_size > 0) { - rknpu_obj = rknpu_gem_init(drm, remain_ddr_size); - if (IS_ERR(rknpu_obj)) - return rknpu_obj; - - /* set memory type and cache attribute from user side. */ - rknpu_obj->flags = flags; - ret = rknpu_gem_alloc_buf(rknpu_obj); if (ret < 0) goto gem_release; } - if (rknpu_obj) - LOG_DEBUG( - "created dma addr: %pad, cookie: %p, ddr size: %lu, sram size: %lu, nbuf size: %lu, attrs: %#lx, flags: %#x\n", - &rknpu_obj->dma_addr, rknpu_obj->cookie, - rknpu_obj->size, rknpu_obj->sram_size, - rknpu_obj->nbuf_size, rknpu_obj->dma_attrs, - rknpu_obj->flags); + rknpu_iommu_domain_put(rknpu_dev); + + LOG_DEBUG( + "created dma addr: %pad, cookie: %p, ddr size: %lu, sram size: %lu, nbuf size: %lu, attrs: %#lx, flags: %#x, iommu domain id: %d\n", + &rknpu_obj->dma_addr, rknpu_obj->cookie, rknpu_obj->size, + rknpu_obj->sram_size, rknpu_obj->nbuf_size, + rknpu_obj->dma_attrs, rknpu_obj->flags, + rknpu_obj->iommu_domain_id); return rknpu_obj; @@ -742,18 +813,36 @@ struct rknpu_gem_object *rknpu_gem_object_create(struct drm_device *drm, gem_release: rknpu_gem_release(rknpu_obj); + rknpu_iommu_domain_put(rknpu_dev); + return ERR_PTR(ret); } void rknpu_gem_object_destroy(struct rknpu_gem_object *rknpu_obj) { struct drm_gem_object *obj = &rknpu_obj->base; + struct rknpu_device *rknpu_dev = obj->dev->dev_private; + int wait_count = 0; + int ret = -EINVAL; LOG_DEBUG( "destroy dma addr: %pad, cookie: %p, size: %lu, attrs: %#lx, flags: %#x, handle count: %d\n", &rknpu_obj->dma_addr, rknpu_obj->cookie, rknpu_obj->size, rknpu_obj->dma_attrs, rknpu_obj->flags, obj->handle_count); + do { + ret = rknpu_iommu_domain_get_and_switch( + rknpu_dev, rknpu_obj->iommu_domain_id); + + if (ret && ++wait_count >= 3) { + LOG_DEV_ERROR( + rknpu_dev->dev, + "failed to destroy dma addr: %pad, size: %lu\n", + &rknpu_obj->dma_addr, rknpu_obj->size); + return; + } + } while (ret); + /* * do not release memory region from exporter. * @@ -766,8 +855,6 @@ void rknpu_gem_object_destroy(struct rknpu_gem_object *rknpu_obj) } else { if (IS_ENABLED(CONFIG_ROCKCHIP_RKNPU_SRAM) && rknpu_obj->sram_size > 0) { - struct rknpu_device *rknpu_dev = obj->dev->dev_private; - if (rknpu_obj->sram_obj != NULL) rknpu_mm_free(rknpu_dev->sram_mm, rknpu_obj->sram_obj); @@ -783,9 +870,10 @@ void rknpu_gem_object_destroy(struct rknpu_gem_object *rknpu_obj) } rknpu_gem_release(rknpu_obj); + rknpu_iommu_domain_put(rknpu_dev); } -int rknpu_gem_create_ioctl(struct drm_device *dev, void *data, +int rknpu_gem_create_ioctl(struct drm_device *drm, void *data, struct drm_file *file_priv) { struct rknpu_mem_create *args = data; @@ -794,8 +882,10 @@ int rknpu_gem_create_ioctl(struct drm_device *dev, void *data, rknpu_obj = rknpu_gem_object_find(file_priv, args->handle); if (!rknpu_obj) { - rknpu_obj = rknpu_gem_object_create( - dev, args->flags, args->size, args->sram_size); + rknpu_obj = rknpu_gem_object_create(drm, args->flags, + args->size, args->sram_size, + args->iommu_domain_id, + args->core_mask); if (IS_ERR(rknpu_obj)) return PTR_ERR(rknpu_obj); @@ -831,19 +921,35 @@ int rknpu_gem_map_ioctl(struct drm_device *dev, void *data, #endif } -int rknpu_gem_destroy_ioctl(struct drm_device *dev, void *data, +int rknpu_gem_destroy_ioctl(struct drm_device *drm, void *data, struct drm_file *file_priv) { + struct rknpu_device *rknpu_dev = drm->dev_private; struct rknpu_gem_object *rknpu_obj = NULL; struct rknpu_mem_destroy *args = data; + int ret = 0; + int wait_count = 0; rknpu_obj = rknpu_gem_object_find(file_priv, args->handle); if (!rknpu_obj) return -EINVAL; - // rknpu_gem_object_put(&rknpu_obj->base); + do { + ret = rknpu_iommu_domain_get_and_switch( + rknpu_dev, rknpu_obj->iommu_domain_id); + + if (ret && ++wait_count >= 3) { + LOG_DEV_ERROR(rknpu_dev->dev, + "failed to destroy memory\n"); + return ret; + } + } while (ret); - return rknpu_gem_handle_destroy(file_priv, args->handle); + ret = rknpu_gem_handle_destroy(file_priv, args->handle); + + rknpu_iommu_domain_put(rknpu_dev); + + return ret; } #if RKNPU_GEM_ALLOC_FROM_PAGES @@ -901,6 +1007,53 @@ static int rknpu_gem_mmap_pages(struct rknpu_gem_object *rknpu_obj, } #endif +static int rknpu_remap_pfn_with_cache_sgt(struct rknpu_device *rknpu_dev, + struct rknpu_gem_object *rknpu_obj, + struct vm_area_struct *vma, + unsigned long cache_size) +{ + phys_addr_t cache_start = 0; + unsigned long vm_start = vma->vm_start; + struct scatterlist *s = NULL; + unsigned long length = cache_size; + unsigned long size = 0; + int i = 0; + int ret = 0; + int index = 0; + + switch (rknpu_obj->core_mask) { + case RKNPU_CORE0_MASK: + index = 0; + break; + case RKNPU_CORE1_MASK: + index = 1; + break; + default: + break; + } + + for_each_sgtable_sg(rknpu_dev->cache_sgt[index], s, i) { + cache_start = rknpu_dev->nbuf_start + s->offset; + size = length < s->length ? length : s->length; + + vma->vm_pgoff = __phys_to_pfn(cache_start); + ret = remap_pfn_range(vma, vm_start, vma->vm_pgoff, size, + vma->vm_page_prot); + + if (ret) { + LOG_ERROR("cache remap_pfn_range error: %d\n", ret); + return ret; + } + length -= size; + vm_start += size; + + if (length == 0) + break; + } + + return ret; +} + static int rknpu_gem_mmap_cache(struct rknpu_gem_object *rknpu_obj, struct vm_area_struct *vma, enum rknpu_cache_type cache_type) @@ -946,10 +1099,16 @@ static int rknpu_gem_mmap_cache(struct rknpu_gem_object *rknpu_obj, * NOTE: This conversion carries a risk because the resulting PFN is not a true * page frame number and may not be valid or usable in all contexts. */ - vma->vm_pgoff = __phys_to_pfn(cache_start + cache_offset); - ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, cache_size, - vma->vm_page_prot); + if (!rknpu_obj->cache_with_sgt) { + vma->vm_pgoff = __phys_to_pfn(cache_start + cache_offset); + + ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, + cache_size, vma->vm_page_prot); + } else + ret = rknpu_remap_pfn_with_cache_sgt(rknpu_dev, rknpu_obj, vma, + cache_size); + if (ret) return -EAGAIN; @@ -1020,7 +1179,11 @@ static int rknpu_gem_mmap_buffer(struct rknpu_gem_object *rknpu_obj, void rknpu_gem_free_object(struct drm_gem_object *obj) { + struct rknpu_device *rknpu_dev = obj->dev->dev_private; + + rknpu_power_get(rknpu_dev); rknpu_gem_object_destroy(to_rknpu_obj(obj)); + rknpu_power_put_delay(rknpu_dev); } int rknpu_gem_dumb_create(struct drm_file *file_priv, struct drm_device *drm, @@ -1044,7 +1207,7 @@ int rknpu_gem_dumb_create(struct drm_file *file_priv, struct drm_device *drm, else flags = RKNPU_MEM_CONTIGUOUS | RKNPU_MEM_WRITE_COMBINE; - rknpu_obj = rknpu_gem_object_create(drm, flags, args->size, 0); + rknpu_obj = rknpu_gem_object_create(drm, flags, args->size, 0, 0, 0); if (IS_ERR(rknpu_obj)) { LOG_DEV_ERROR(drm->dev, "gem object allocate failed.\n"); return PTR_ERR(rknpu_obj); @@ -1336,7 +1499,7 @@ int rknpu_gem_prime_vmap(struct drm_gem_object *obj, struct iosys_map *map) return -EINVAL; vaddr = vmap(rknpu_obj->pages, rknpu_obj->num_pages, VM_MAP, - PAGE_KERNEL); + PAGE_KERNEL); if (!vaddr) return -ENOMEM; @@ -1367,16 +1530,78 @@ int rknpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) return rknpu_gem_mmap_obj(obj, vma); } +static int rknpu_cache_sync_with_sg(struct rknpu_device *rknpu_dev, + struct rknpu_gem_object *rknpu_obj, + unsigned long *length, + unsigned long *offset, uint32_t dir) +{ + struct scatterlist *s = NULL; + int i = 0; + int index = 0; + void __iomem *cache_start = 0; + unsigned long cache_length = 0; + + switch (rknpu_obj->core_mask) { + case RKNPU_CORE0_MASK: + index = 0; + break; + case RKNPU_CORE1_MASK: + index = 1; + break; + default: + break; + } + + for_each_sgtable_sg(rknpu_dev->cache_sgt[index], s, i) { + cache_start = rknpu_dev->nbuf_base_io + s->offset; + cache_length = (*offset + *length) <= s->length ? + *length : + s->length - *offset; + if (dir & RKNPU_MEM_SYNC_TO_DEVICE) { +#if KERNEL_VERSION(6, 1, 0) > LINUX_VERSION_CODE + __dma_map_area(cache_start, cache_length, + DMA_TO_DEVICE); +#else + dcache_clean_poc((unsigned long)cache_start, + (unsigned long)cache_start + + cache_length); +#endif + } + + if (dir & RKNPU_MEM_SYNC_FROM_DEVICE) { +#if KERNEL_VERSION(6, 1, 0) > LINUX_VERSION_CODE + __dma_unmap_area(cache_start, cache_length, + DMA_FROM_DEVICE); +#else + dcache_inval_poc((unsigned long)cache_start, + (unsigned long)cache_start + + cache_length); +#endif + } + + *length = (*offset + *length) <= s->length ? + 0 : + *length - cache_length; + *offset = 0; + + if (*length == 0) + break; + } + + return 0; +} + static int rknpu_cache_sync(struct rknpu_gem_object *rknpu_obj, unsigned long *length, unsigned long *offset, - enum rknpu_cache_type cache_type) + enum rknpu_cache_type cache_type, uint32_t dir) { -#if KERNEL_VERSION(6, 1, 0) > LINUX_VERSION_CODE struct drm_gem_object *obj = &rknpu_obj->base; struct rknpu_device *rknpu_dev = obj->dev->dev_private; void __iomem *cache_base_io = NULL; unsigned long cache_offset = 0; unsigned long cache_size = 0; + void __iomem *cache_start = 0; + unsigned long cache_length = 0; switch (cache_type) { case RKNPU_CACHE_SRAM: @@ -1395,26 +1620,46 @@ static int rknpu_cache_sync(struct rknpu_gem_object *rknpu_obj, return -EINVAL; } - if ((*offset + *length) <= cache_size) { - __dma_map_area(cache_base_io + *offset + cache_offset, *length, - DMA_TO_DEVICE); - __dma_unmap_area(cache_base_io + *offset + cache_offset, - *length, DMA_FROM_DEVICE); - *length = 0; - *offset = 0; - } else if (*offset >= cache_size) { + if (*offset >= cache_size) { *offset -= cache_size; - } else { - unsigned long cache_length = cache_size - *offset; + return 0; + } - __dma_map_area(cache_base_io + *offset + cache_offset, - cache_length, DMA_TO_DEVICE); - __dma_unmap_area(cache_base_io + *offset + cache_offset, - cache_length, DMA_FROM_DEVICE); - *length -= cache_length; + if (!rknpu_obj->cache_with_sgt) { + cache_start = cache_base_io + cache_offset; + cache_length = (*offset + *length) <= cache_size ? + *length : + cache_size - *offset; + if (dir & RKNPU_MEM_SYNC_TO_DEVICE) { +#if KERNEL_VERSION(6, 1, 0) > LINUX_VERSION_CODE + __dma_map_area(cache_start, cache_length, + DMA_TO_DEVICE); +#else + dcache_clean_poc((unsigned long)cache_start, + (unsigned long)cache_start + + cache_length); +#endif + } + + if (dir & RKNPU_MEM_SYNC_FROM_DEVICE) { +#if KERNEL_VERSION(6, 1, 0) > LINUX_VERSION_CODE + __dma_unmap_area(cache_start, cache_length, + DMA_FROM_DEVICE); +#else + dcache_inval_poc((unsigned long)cache_start, + (unsigned long)cache_start + + cache_length); +#endif + } + + *length = (*offset + *length) <= cache_size ? + 0 : + *length - cache_length; *offset = 0; + } else { + rknpu_cache_sync_with_sg(rknpu_dev, rknpu_obj, length, offset, + dir); } -#endif return 0; } @@ -1439,6 +1684,12 @@ int rknpu_gem_sync_ioctl(struct drm_device *dev, void *data, if (!(rknpu_obj->flags & RKNPU_MEM_CACHEABLE)) return -EINVAL; + if (rknpu_iommu_domain_get_and_switch(rknpu_dev, + rknpu_obj->iommu_domain_id)) { + LOG_DEV_ERROR(rknpu_dev->dev, "%s error\n", __func__); + return -EINVAL; + } + if (!(rknpu_obj->flags & RKNPU_MEM_NON_CONTIGUOUS)) { if (args->flags & RKNPU_MEM_SYNC_TO_DEVICE) { dma_sync_single_range_for_device( @@ -1461,15 +1712,14 @@ int rknpu_gem_sync_ioctl(struct drm_device *dev, void *data, IS_ENABLED(CONFIG_ROCKCHIP_RKNPU_SRAM) && rknpu_obj->sram_size > 0) { rknpu_cache_sync(rknpu_obj, &length, &offset, - RKNPU_CACHE_SRAM); + RKNPU_CACHE_SRAM, args->flags); } else if (IS_ENABLED(CONFIG_NO_GKI) && rknpu_obj->nbuf_size > 0) { rknpu_cache_sync(rknpu_obj, &length, &offset, - RKNPU_CACHE_NBUF); + RKNPU_CACHE_NBUF, args->flags); } - for_each_sg(rknpu_obj->sgt->sgl, sg, rknpu_obj->sgt->nents, - i) { + for_each_sg(rknpu_obj->sgt->sgl, sg, rknpu_obj->sgt->nents, i) { if (length == 0) break; @@ -1501,5 +1751,7 @@ int rknpu_gem_sync_ioctl(struct drm_device *dev, void *data, } } + rknpu_iommu_domain_put(rknpu_dev); + return 0; } diff --git a/drivers/rknpu/rknpu_iommu.c b/drivers/rknpu/rknpu_iommu.c index 01620d9c30560..efa97f39c8cc2 100644 --- a/drivers/rknpu/rknpu_iommu.c +++ b/drivers/rknpu/rknpu_iommu.c @@ -4,17 +4,24 @@ * Author: Felix Zeng */ +#include +#include +#include + #include "rknpu_iommu.h" +#define RKNPU_SWITCH_DOMAIN_WAIT_TIME_MS 6000 + dma_addr_t rknpu_iommu_dma_alloc_iova(struct iommu_domain *domain, size_t size, - u64 dma_limit, struct device *dev) + u64 dma_limit, struct device *dev, + bool size_aligned) { struct rknpu_iommu_dma_cookie *cookie = (void *)domain->iova_cookie; struct iova_domain *iovad = &cookie->iovad; unsigned long shift, iova_len, iova = 0; -#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) - dma_addr_t limit; -#endif + unsigned long limit_pfn; + struct iova *new_iova = NULL; + bool alloc_fast = size_aligned; shift = iova_shift(iovad); iova_len = size >> shift; @@ -42,20 +49,571 @@ dma_addr_t rknpu_iommu_dma_alloc_iova(struct iommu_domain *domain, size_t size, min_t(u64, dma_limit, domain->geometry.aperture_end); #if (KERNEL_VERSION(5, 4, 0) <= LINUX_VERSION_CODE) - iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift, true); + limit_pfn = dma_limit >> shift; #else - limit = min_t(dma_addr_t, dma_limit >> shift, iovad->end_pfn); - - iova = alloc_iova_fast(iovad, iova_len, limit, true); + limit_pfn = min_t(dma_addr_t, dma_limit >> shift, iovad->end_pfn); #endif + if (alloc_fast) { + iova = alloc_iova_fast(iovad, iova_len, limit_pfn, true); + } else { + new_iova = alloc_iova(iovad, iova_len, limit_pfn, size_aligned); + if (!new_iova) + return 0; + iova = new_iova->pfn_lo; + } + return (dma_addr_t)iova << shift; } void rknpu_iommu_dma_free_iova(struct rknpu_iommu_dma_cookie *cookie, - dma_addr_t iova, size_t size) + dma_addr_t iova, size_t size, bool size_aligned) +{ + struct iova_domain *iovad = &cookie->iovad; + bool alloc_fast = size_aligned; + + if (alloc_fast) + free_iova_fast(iovad, iova_pfn(iovad, iova), + size >> iova_shift(iovad)); + else + free_iova(iovad, iova_pfn(iovad, iova)); +} + +static int rknpu_dma_info_to_prot(enum dma_data_direction dir, bool coherent) +{ + int prot = coherent ? IOMMU_CACHE : 0; + + switch (dir) { + case DMA_BIDIRECTIONAL: + return prot | IOMMU_READ | IOMMU_WRITE; + case DMA_TO_DEVICE: + return prot | IOMMU_READ; + case DMA_FROM_DEVICE: + return prot | IOMMU_WRITE; + default: + return 0; + } +} + +/* + * Prepare a successfully-mapped scatterlist to give back to the caller. + * + * At this point the segments are already laid out by iommu_dma_map_sg() to + * avoid individually crossing any boundaries, so we merely need to check a + * segment's start address to avoid concatenating across one. + */ +static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents, + dma_addr_t dma_addr) +{ + struct scatterlist *s, *cur = sg; + unsigned long seg_mask = dma_get_seg_boundary(dev); + unsigned int cur_len = 0, max_len = dma_get_max_seg_size(dev); + int i, count = 0; + + for_each_sg(sg, s, nents, i) { + /* Restore this segment's original unaligned fields first */ +#if KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE + dma_addr_t s_dma_addr = sg_dma_address(s); +#endif + unsigned int s_iova_off = sg_dma_address(s); + unsigned int s_length = sg_dma_len(s); + unsigned int s_iova_len = s->length; + + sg_dma_address(s) = DMA_MAPPING_ERROR; + sg_dma_len(s) = 0; + +#if KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE + if (sg_is_dma_bus_address(s)) { + if (i > 0) + cur = sg_next(cur); + + sg_dma_unmark_bus_address(s); + sg_dma_address(cur) = s_dma_addr; + sg_dma_len(cur) = s_length; + sg_dma_mark_bus_address(cur); + count++; + cur_len = 0; + continue; + } +#endif + + s->offset += s_iova_off; + s->length = s_length; + + /* + * Now fill in the real DMA data. If... + * - there is a valid output segment to append to + * - and this segment starts on an IOVA page boundary + * - but doesn't fall at a segment boundary + * - and wouldn't make the resulting output segment too long + */ + if (cur_len && !s_iova_off && (dma_addr & seg_mask) && + (max_len - cur_len >= s_length)) { + /* ...then concatenate it with the previous one */ + cur_len += s_length; + } else { + /* Otherwise start the next output segment */ + if (i > 0) + cur = sg_next(cur); + cur_len = s_length; + count++; + + sg_dma_address(cur) = dma_addr + s_iova_off; + } + + sg_dma_len(cur) = cur_len; + dma_addr += s_iova_len; + + if (s_length + s_iova_off < s_iova_len) + cur_len = 0; + } + return count; +} + +/* + * If mapping failed, then just restore the original list, + * but making sure the DMA fields are invalidated. + */ +static void __invalidate_sg(struct scatterlist *sg, int nents) +{ + struct scatterlist *s; + int i; + +#if KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE + for_each_sg(sg, s, nents, i) { + if (sg_is_dma_bus_address(s)) { + sg_dma_unmark_bus_address(s); + } else { + if (sg_dma_address(s) != DMA_MAPPING_ERROR) + s->offset += sg_dma_address(s); + if (sg_dma_len(s)) + s->length = sg_dma_len(s); + } + sg_dma_address(s) = DMA_MAPPING_ERROR; + sg_dma_len(s) = 0; + } +#else + for_each_sg(sg, s, nents, i) { + if (sg_dma_address(s) != DMA_MAPPING_ERROR) + s->offset += sg_dma_address(s); + if (sg_dma_len(s)) + s->length = sg_dma_len(s); + sg_dma_address(s) = DMA_MAPPING_ERROR; + sg_dma_len(s) = 0; + } +#endif +} + +int rknpu_iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, + bool iova_aligned) { + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); + struct rknpu_iommu_dma_cookie *cookie = (void *)domain->iova_cookie; struct iova_domain *iovad = &cookie->iovad; + struct scatterlist *s = NULL, *prev = NULL; + int prot = rknpu_dma_info_to_prot(dir, dev_is_dma_coherent(dev)); + dma_addr_t iova; + unsigned long iova_len = 0; + unsigned long mask = dma_get_seg_boundary(dev); + ssize_t ret = -EINVAL; + int i = 0; + + if (iova_aligned) + return dma_map_sg(dev, sg, nents, dir); + + /* + * Work out how much IOVA space we need, and align the segments to + * IOVA granules for the IOMMU driver to handle. With some clever + * trickery we can modify the list in-place, but reversibly, by + * stashing the unaligned parts in the as-yet-unused DMA fields. + */ + for_each_sg(sg, s, nents, i) { + size_t s_iova_off = iova_offset(iovad, s->offset); + size_t s_length = s->length; + size_t pad_len = (mask - iova_len + 1) & mask; + + sg_dma_address(s) = s_iova_off; + sg_dma_len(s) = s_length; + s->offset -= s_iova_off; + s_length = iova_align(iovad, s_length + s_iova_off); + s->length = s_length; + + /* + * Due to the alignment of our single IOVA allocation, we can + * depend on these assumptions about the segment boundary mask: + * - If mask size >= IOVA size, then the IOVA range cannot + * possibly fall across a boundary, so we don't care. + * - If mask size < IOVA size, then the IOVA range must start + * exactly on a boundary, therefore we can lay things out + * based purely on segment lengths without needing to know + * the actual addresses beforehand. + * - The mask must be a power of 2, so pad_len == 0 if + * iova_len == 0, thus we cannot dereference prev the first + * time through here (i.e. before it has a meaningful value). + */ + if (pad_len && pad_len < s_length - 1) { + prev->length += pad_len; + iova_len += pad_len; + } + + iova_len += s_length; + prev = s; + } + + if (!iova_len) { + ret = __finalise_sg(dev, sg, nents, 0); + goto out; + } + + iova = rknpu_iommu_dma_alloc_iova(domain, iova_len, dma_get_mask(dev), + dev, iova_aligned); + if (!iova) { + ret = -ENOMEM; + LOG_ERROR("failed to allocate IOVA: %zd\n", ret); + goto out_restore_sg; + } + + ret = iommu_map_sg(domain, iova, sg, nents, prot); + if (ret < 0 || ret < iova_len) { + LOG_ERROR("failed to map SG: %zd\n", ret); + goto out_free_iova; + } + + return __finalise_sg(dev, sg, nents, iova); + +out_free_iova: + rknpu_iommu_dma_free_iova(cookie, iova, iova_len, iova_aligned); +out_restore_sg: + __invalidate_sg(sg, nents); +out: - free_iova_fast(iovad, iova_pfn(iovad, iova), size >> iova_shift(iovad)); + if (ret < 0) + ret = 0; + + return ret; } + +void rknpu_iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, + bool iova_aligned) +{ + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); + struct rknpu_iommu_dma_cookie *cookie = (void *)domain->iova_cookie; + struct iova_domain *iovad = &cookie->iovad; + size_t iova_off = 0; + dma_addr_t end = 0, start = 0; + struct scatterlist *tmp = NULL; + dma_addr_t dma_addr = 0; + size_t size = 0; + int i = 0; + + if (iova_aligned) + return dma_unmap_sg(dev, sg, nents, dir); + +#if KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE + /* + * The scatterlist segments are mapped into a single + * contiguous IOVA allocation, the start and end points + * just have to be determined. + */ + for_each_sg(sg, tmp, nents, i) { + if (sg_is_dma_bus_address(tmp)) { + sg_dma_unmark_bus_address(tmp); + continue; + } + + if (sg_dma_len(tmp) == 0) + break; + + start = sg_dma_address(tmp); + break; + } + + nents -= i; + for_each_sg(tmp, tmp, nents, i) { + if (sg_is_dma_bus_address(tmp)) { + sg_dma_unmark_bus_address(tmp); + continue; + } + + if (sg_dma_len(tmp) == 0) + break; + + end = sg_dma_address(tmp) + sg_dma_len(tmp); + } +#else + start = sg_dma_address(sg); + for_each_sg(sg_next(sg), tmp, nents - 1, i) { + if (sg_dma_len(tmp) == 0) + break; + sg = tmp; + } + end = sg_dma_address(sg) + sg_dma_len(sg); +#endif + + dma_addr = start; + size = end - start; + iova_off = iova_offset(iovad, start); + + if (end) { + dma_addr -= iova_off; + size = iova_align(iovad, size + iova_off); + iommu_unmap(domain, dma_addr, size); + rknpu_iommu_dma_free_iova(cookie, dma_addr, size, iova_aligned); + } +} + +#if defined(CONFIG_IOMMU_API) && defined(CONFIG_NO_GKI) + +#if KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE +struct iommu_group { + struct kobject kobj; + struct kobject *devices_kobj; + struct list_head devices; +#ifdef __ANDROID_COMMON_KERNEL__ + struct xarray pasid_array; +#endif + struct mutex mutex; + void *iommu_data; + void (*iommu_data_release)(void *iommu_data); + char *name; + int id; + struct iommu_domain *default_domain; + struct iommu_domain *blocking_domain; + struct iommu_domain *domain; + struct list_head entry; + unsigned int owner_cnt; + void *owner; +}; +#else +struct iommu_group { + struct kobject kobj; + struct kobject *devices_kobj; + struct list_head devices; + struct mutex mutex; + struct blocking_notifier_head notifier; + void *iommu_data; + void (*iommu_data_release)(void *iommu_data); + char *name; + int id; + struct iommu_domain *default_domain; + struct iommu_domain *domain; + struct list_head entry; +}; +#endif + +int rknpu_iommu_init_domain(struct rknpu_device *rknpu_dev) +{ + // init domain 0 + if (!rknpu_dev->iommu_domains[0]) { + rknpu_dev->iommu_domain_id = 0; + rknpu_dev->iommu_domains[rknpu_dev->iommu_domain_id] = + iommu_get_domain_for_dev(rknpu_dev->dev); + rknpu_dev->iommu_domain_num = 1; + } + return 0; +} + +int rknpu_iommu_switch_domain(struct rknpu_device *rknpu_dev, int domain_id) +{ + struct iommu_domain *src_domain = NULL; + struct iommu_domain *dst_domain = NULL; + struct bus_type *bus = NULL; + int src_domain_id = 0; + int ret = -EINVAL; + + if (!rknpu_dev->iommu_en) + return -EINVAL; + + if (domain_id < 0 || domain_id > (RKNPU_MAX_IOMMU_DOMAIN_NUM - 1)) { + LOG_DEV_ERROR( + rknpu_dev->dev, + "invalid iommu domain id: %d, reuse domain id: %d\n", + domain_id, rknpu_dev->iommu_domain_id); + return -EINVAL; + } + + bus = rknpu_dev->dev->bus; + if (!bus) + return -EFAULT; + + src_domain_id = rknpu_dev->iommu_domain_id; + if (domain_id == src_domain_id) { + return 0; + } + + src_domain = iommu_get_domain_for_dev(rknpu_dev->dev); + if (src_domain != rknpu_dev->iommu_domains[src_domain_id]) { + LOG_DEV_ERROR( + rknpu_dev->dev, + "mismatch domain get from iommu_get_domain_for_dev\n"); + return -EINVAL; + } + + dst_domain = rknpu_dev->iommu_domains[domain_id]; + if (dst_domain != NULL) { + iommu_detach_device(src_domain, rknpu_dev->dev); + ret = iommu_attach_device(dst_domain, rknpu_dev->dev); + if (ret) { + LOG_DEV_ERROR( + rknpu_dev->dev, + "failed to attach dst iommu domain, id: %d, ret: %d\n", + domain_id, ret); + if (iommu_attach_device(src_domain, rknpu_dev->dev)) { + LOG_DEV_ERROR( + rknpu_dev->dev, + "failed to reattach src iommu domain, id: %d\n", + src_domain_id); + } + return ret; + } + rknpu_dev->iommu_domain_id = domain_id; + } else { + uint64_t dma_limit = 1ULL << 32; + + dst_domain = iommu_domain_alloc(bus); + if (!dst_domain) { + LOG_DEV_ERROR(rknpu_dev->dev, + "failed to allocate iommu domain\n"); + return -EIO; + } + // init domain iova_cookie + iommu_get_dma_cookie(dst_domain); + + iommu_detach_device(src_domain, rknpu_dev->dev); + ret = iommu_attach_device(dst_domain, rknpu_dev->dev); + if (ret) { + LOG_DEV_ERROR( + rknpu_dev->dev, + "failed to attach iommu domain, id: %d, ret: %d\n", + domain_id, ret); + iommu_domain_free(dst_domain); + return ret; + } + + // set domain type to dma domain + dst_domain->type |= __IOMMU_DOMAIN_DMA_API; + // iommu dma init domain + iommu_setup_dma_ops(rknpu_dev->dev, 0, dma_limit); + + rknpu_dev->iommu_domain_id = domain_id; + rknpu_dev->iommu_domains[domain_id] = dst_domain; + rknpu_dev->iommu_domain_num++; + } + + // reset default iommu domain + rknpu_dev->iommu_group->default_domain = dst_domain; + + LOG_INFO("switch iommu domain from %d to %d\n", src_domain_id, + domain_id); + + return ret; +} + +int rknpu_iommu_domain_get_and_switch(struct rknpu_device *rknpu_dev, + int domain_id) +{ + unsigned long timeout_jiffies = + msecs_to_jiffies(RKNPU_SWITCH_DOMAIN_WAIT_TIME_MS); + unsigned long start = jiffies; + int ret = -EINVAL; + + while (true) { + mutex_lock(&rknpu_dev->domain_lock); + + if (domain_id == rknpu_dev->iommu_domain_id) { + atomic_inc(&rknpu_dev->iommu_domain_refcount); + mutex_unlock(&rknpu_dev->domain_lock); + break; + } + + if (atomic_read(&rknpu_dev->iommu_domain_refcount) == 0) { + ret = rknpu_iommu_switch_domain(rknpu_dev, domain_id); + if (ret) { + LOG_DEV_ERROR( + rknpu_dev->dev, + "failed to switch iommu domain, id: %d, ret: %d\n", + domain_id, ret); + mutex_unlock(&rknpu_dev->domain_lock); + return ret; + } + atomic_inc(&rknpu_dev->iommu_domain_refcount); + mutex_unlock(&rknpu_dev->domain_lock); + break; + } + + mutex_unlock(&rknpu_dev->domain_lock); + + usleep_range(10, 100); + if (time_after(jiffies, start + timeout_jiffies)) { + LOG_DEV_ERROR( + rknpu_dev->dev, + "switch iommu domain time out, failed to switch iommu domain, id: %d\n", + domain_id); + return -EINVAL; + } + } + + return 0; +} + +int rknpu_iommu_domain_put(struct rknpu_device *rknpu_dev) +{ + atomic_dec(&rknpu_dev->iommu_domain_refcount); + + return 0; +} + +void rknpu_iommu_free_domains(struct rknpu_device *rknpu_dev) +{ + int i = 0; + + if (rknpu_iommu_domain_get_and_switch(rknpu_dev, 0)) { + LOG_DEV_ERROR(rknpu_dev->dev, "%s error\n", __func__); + return; + } + + for (i = 1; i < RKNPU_MAX_IOMMU_DOMAIN_NUM; i++) { + struct iommu_domain *domain = rknpu_dev->iommu_domains[i]; + + if (domain == NULL) + continue; + + iommu_detach_device(domain, rknpu_dev->dev); + iommu_domain_free(domain); + + rknpu_dev->iommu_domains[i] = NULL; + } + + rknpu_iommu_domain_put(rknpu_dev); +} + +#else + +int rknpu_iommu_init_domain(struct rknpu_device *rknpu_dev) +{ + return 0; +} + +int rknpu_iommu_switch_domain(struct rknpu_device *rknpu_dev, int domain_id) +{ + return 0; +} + +int rknpu_iommu_domain_get_and_switch(struct rknpu_device *rknpu_dev, + int domain_id) +{ + return 0; +} + +int rknpu_iommu_domain_put(struct rknpu_device *rknpu_dev) +{ + return 0; +} + +void rknpu_iommu_free_domains(struct rknpu_device *rknpu_dev) +{ +} + +#endif diff --git a/drivers/rknpu/rknpu_job.c b/drivers/rknpu/rknpu_job.c index 6dc94b59b3ddb..23ed8e5cf8419 100644 --- a/drivers/rknpu/rknpu_job.c +++ b/drivers/rknpu/rknpu_job.c @@ -14,8 +14,9 @@ #include "rknpu_reset.h" #include "rknpu_gem.h" #include "rknpu_fence.h" -#include "rknpu_job.h" #include "rknpu_mem.h" +#include "rknpu_iommu.h" +#include "rknpu_job.h" #define _REG_READ(base, offset) readl(base + (offset)) #define _REG_WRITE(base, value, offset) writel(value, base + (offset)) @@ -128,6 +129,7 @@ static inline struct rknpu_job *rknpu_job_alloc(struct rknpu_device *rknpu_dev, struct rknpu_submit *args) { struct rknpu_job *job = NULL; + int i = 0; #ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM struct rknpu_gem_object *task_obj = NULL; #endif @@ -143,6 +145,9 @@ static inline struct rknpu_job *rknpu_job_alloc(struct rknpu_device *rknpu_dev, ((args->core_mask & RKNPU_CORE2_MASK) >> 2); atomic_set(&job->run_count, job->use_core_num); atomic_set(&job->interrupt_count, job->use_core_num); + job->iommu_domain_id = args->iommu_domain_id; + for (i = 0; i < rknpu_dev->config->num_irqs; i++) + atomic_set(&job->submit_count[i], 0); #ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM task_obj = (struct rknpu_gem_object *)(uintptr_t)args->task_obj_addr; if (task_obj) @@ -205,8 +210,10 @@ static inline int rknpu_job_wait(struct rknpu_job *job) (elapse_time_us < args->timeout * 1000); spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); LOG_ERROR( - "job: %p, wait_count: %d, continue wait: %d, commit elapse time: %lldus, wait time: %lldus, timeout: %uus\n", - job, wait_count, continue_wait, + "job: %p, mask: %#x, job iommu domain id: %d, dev iommu domain id: %d, wait_count: %d, continue wait: %d, commit elapse time: %lldus, wait time: %lldus, timeout: %uus\n", + job, args->core_mask, job->iommu_domain_id, + rknpu_dev->iommu_domain_id, wait_count, + continue_wait, (job->hw_commit_time == 0 ? 0 : elapse_time_us), ktime_us_delta(ktime_get(), job->timestamp), args->timeout * 1000); @@ -446,9 +453,8 @@ static void rknpu_job_next(struct rknpu_device *rknpu_dev, int core_index) job->hw_recoder_time = job->hw_commit_time; spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); - if (atomic_dec_and_test(&job->run_count)) { + if (atomic_dec_and_test(&job->run_count)) rknpu_job_commit(job); - } } static void rknpu_job_done(struct rknpu_job *job, int ret, int core_index) @@ -479,6 +485,8 @@ static void rknpu_job_done(struct rknpu_job *job, int ret, int core_index) if (atomic_dec_and_test(&job->interrupt_count)) { int use_core_num = job->use_core_num; + rknpu_iommu_domain_put(rknpu_dev); + job->flags |= RKNPU_JOB_DONE; job->ret = ret; @@ -529,6 +537,11 @@ static void rknpu_job_schedule(struct rknpu_job *job) atomic_set(&job->interrupt_count, job->use_core_num); } + if (rknpu_iommu_domain_get_and_switch(rknpu_dev, job->iommu_domain_id)) { + job->ret = -EINVAL; + return; + } + spin_lock_irqsave(&rknpu_dev->irq_lock, flags); for (i = 0; i < rknpu_dev->config->num_irqs; i++) { if (job->args->core_mask & rknpu_core_mask(i)) { @@ -552,6 +565,8 @@ static void rknpu_job_abort(struct rknpu_job *job) unsigned long flags; int i = 0; + rknpu_iommu_domain_put(rknpu_dev); + msleep(100); spin_lock_irqsave(&rknpu_dev->irq_lock, flags); @@ -885,11 +900,6 @@ int rknpu_get_bw_priority(struct rknpu_device *rknpu_dev, uint32_t *priority, { void __iomem *base = rknpu_dev->bw_priority_base; - if (!rknpu_dev->config->bw_enable) { - LOG_WARN("Get bw_priority is not supported on this device!\n"); - return 0; - } - if (!base) return -EINVAL; @@ -914,11 +924,6 @@ int rknpu_set_bw_priority(struct rknpu_device *rknpu_dev, uint32_t priority, { void __iomem *base = rknpu_dev->bw_priority_base; - if (!rknpu_dev->config->bw_enable) { - LOG_WARN("Set bw_priority is not supported on this device!\n"); - return 0; - } - if (!base) return -EINVAL; @@ -941,28 +946,41 @@ int rknpu_set_bw_priority(struct rknpu_device *rknpu_dev, uint32_t priority, int rknpu_clear_rw_amount(struct rknpu_device *rknpu_dev) { void __iomem *rknpu_core_base = rknpu_dev->base[0]; + const struct rknpu_config *config = rknpu_dev->config; unsigned long flags; - if (!rknpu_dev->config->bw_enable) { + if (config->amount_top == NULL) { LOG_WARN("Clear rw_amount is not supported on this device!\n"); return 0; } - if (rknpu_dev->config->pc_dma_ctrl) { + if (config->pc_dma_ctrl) { uint32_t pc_data_addr = 0; spin_lock_irqsave(&rknpu_dev->irq_lock, flags); pc_data_addr = REG_READ(RKNPU_OFFSET_PC_DATA_ADDR); REG_WRITE(0x1, RKNPU_OFFSET_PC_DATA_ADDR); - REG_WRITE(0x80000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT); - REG_WRITE(0x00000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT); + REG_WRITE(0x80000101, config->amount_top->offset_clr_all); + REG_WRITE(0x00000101, config->amount_top->offset_clr_all); + if (config->amount_core) { + REG_WRITE(0x80000101, + config->amount_core->offset_clr_all); + REG_WRITE(0x00000101, + config->amount_core->offset_clr_all); + } REG_WRITE(pc_data_addr, RKNPU_OFFSET_PC_DATA_ADDR); spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); } else { spin_lock(&rknpu_dev->lock); - REG_WRITE(0x80000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT); - REG_WRITE(0x00000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT); + REG_WRITE(0x80000101, config->amount_top->offset_clr_all); + REG_WRITE(0x00000101, config->amount_top->offset_clr_all); + if (config->amount_core) { + REG_WRITE(0x80000101, + config->amount_core->offset_clr_all); + REG_WRITE(0x00000101, + config->amount_core->offset_clr_all); + } spin_unlock(&rknpu_dev->lock); } @@ -973,23 +991,42 @@ int rknpu_get_rw_amount(struct rknpu_device *rknpu_dev, uint32_t *dt_wr, uint32_t *dt_rd, uint32_t *wd_rd) { void __iomem *rknpu_core_base = rknpu_dev->base[0]; - int amount_scale = rknpu_dev->config->pc_data_amount_scale; + const struct rknpu_config *config = rknpu_dev->config; + int amount_scale = config->pc_data_amount_scale; - if (!rknpu_dev->config->bw_enable) { + if (config->amount_top == NULL) { LOG_WARN("Get rw_amount is not supported on this device!\n"); return 0; } spin_lock(&rknpu_dev->lock); - if (dt_wr != NULL) - *dt_wr = REG_READ(RKNPU_OFFSET_DT_WR_AMOUNT) * amount_scale; + if (dt_wr != NULL) { + *dt_wr = REG_READ(config->amount_top->offset_dt_wr) * + amount_scale; + if (config->amount_core) { + *dt_wr += REG_READ(config->amount_core->offset_dt_wr) * + amount_scale; + } + } - if (dt_rd != NULL) - *dt_rd = REG_READ(RKNPU_OFFSET_DT_RD_AMOUNT) * amount_scale; + if (dt_rd != NULL) { + *dt_rd = REG_READ(config->amount_top->offset_dt_rd) * + amount_scale; + if (config->amount_core) { + *dt_rd += REG_READ(config->amount_core->offset_dt_rd) * + amount_scale; + } + } - if (wd_rd != NULL) - *wd_rd = REG_READ(RKNPU_OFFSET_WT_RD_AMOUNT) * amount_scale; + if (wd_rd != NULL) { + *wd_rd = REG_READ(config->amount_top->offset_wt_rd) * + amount_scale; + if (config->amount_core) { + *wd_rd += REG_READ(config->amount_core->offset_wt_rd) * + amount_scale; + } + } spin_unlock(&rknpu_dev->lock); @@ -998,12 +1035,13 @@ int rknpu_get_rw_amount(struct rknpu_device *rknpu_dev, uint32_t *dt_wr, int rknpu_get_total_rw_amount(struct rknpu_device *rknpu_dev, uint32_t *amount) { + const struct rknpu_config *config = rknpu_dev->config; uint32_t dt_wr = 0; uint32_t dt_rd = 0; uint32_t wd_rd = 0; int ret = -EINVAL; - if (!rknpu_dev->config->bw_enable) { + if (config->amount_top == NULL) { LOG_WARN( "Get total_rw_amount is not supported on this device!\n"); return 0; diff --git a/drivers/rknpu/rknpu_mem.c b/drivers/rknpu/rknpu_mem.c index 858c21f484f5e..31aede9515959 100644 --- a/drivers/rknpu/rknpu_mem.c +++ b/drivers/rknpu/rknpu_mem.c @@ -17,8 +17,8 @@ #ifdef CONFIG_ROCKCHIP_RKNPU_DMA_HEAP -int rknpu_mem_create_ioctl(struct rknpu_device *rknpu_dev, unsigned long data, - struct file *file) +int rknpu_mem_create_ioctl(struct rknpu_device *rknpu_dev, struct file *file, + unsigned int cmd, unsigned long data) { struct rknpu_mem_create args; int ret = -EINVAL; @@ -33,14 +33,20 @@ int rknpu_mem_create_ioctl(struct rknpu_device *rknpu_dev, unsigned long data, struct rknpu_session *session = NULL; int i, fd; unsigned int length, page_count; + unsigned int in_size = _IOC_SIZE(cmd); + unsigned int k_size = sizeof(struct rknpu_mem_create); + char *k_data = (char *)&args; if (unlikely(copy_from_user(&args, (struct rknpu_mem_create *)data, - sizeof(struct rknpu_mem_create)))) { + in_size))) { LOG_ERROR("%s: copy_from_user failed\n", __func__); ret = -EFAULT; return ret; } + if (k_size > in_size) + memset(k_data + in_size, 0, k_size - in_size); + if (args.flags & RKNPU_MEM_NON_CONTIGUOUS) { LOG_ERROR("%s: malloc iommu memory unsupported in current!\n", __func__); @@ -147,7 +153,7 @@ int rknpu_mem_create_ioctl(struct rknpu_device *rknpu_dev, unsigned long data, (__u64)rknpu_obj->dma_addr); if (unlikely(copy_to_user((struct rknpu_mem_create *)data, &args, - sizeof(struct rknpu_mem_create)))) { + in_size))) { LOG_ERROR("%s: copy_to_user failed\n", __func__); ret = -EFAULT; goto err_unmap_kv_addr; @@ -194,8 +200,8 @@ int rknpu_mem_create_ioctl(struct rknpu_device *rknpu_dev, unsigned long data, return ret; } -int rknpu_mem_destroy_ioctl(struct rknpu_device *rknpu_dev, unsigned long data, - struct file *file) +int rknpu_mem_destroy_ioctl(struct rknpu_device *rknpu_dev, struct file *file, + unsigned long data) { struct rknpu_mem_object *rknpu_obj, *entry, *q; struct rknpu_session *session = NULL; diff --git a/drivers/rknpu/rknpu_reset.c b/drivers/rknpu/rknpu_reset.c index 91c9b75d68e77..7bcf75b028602 100644 --- a/drivers/rknpu/rknpu_reset.c +++ b/drivers/rknpu/rknpu_reset.c @@ -28,27 +28,34 @@ static inline struct reset_control *rknpu_reset_control_get(struct device *dev, int rknpu_reset_get(struct rknpu_device *rknpu_dev) { #ifndef FPGA_PLATFORM - struct reset_control *srst_a = NULL; - struct reset_control *srst_h = NULL; int i = 0; + int num_srsts = 0; - for (i = 0; i < rknpu_dev->config->num_resets; i++) { - srst_a = rknpu_reset_control_get( - rknpu_dev->dev, - rknpu_dev->config->resets[i].srst_a_name); - if (IS_ERR(srst_a)) - return PTR_ERR(srst_a); + num_srsts = of_count_phandle_with_args(rknpu_dev->dev->of_node, + "resets", "#reset-cells"); + if (num_srsts <= 0) { + LOG_DEV_ERROR(rknpu_dev->dev, + "failed to get rknpu resets from dtb\n"); + return num_srsts; + } - rknpu_dev->srst_a[i] = srst_a; + rknpu_dev->srsts = devm_kcalloc(rknpu_dev->dev, num_srsts, + sizeof(*rknpu_dev->srsts), GFP_KERNEL); + if (!rknpu_dev->srsts) + return -ENOMEM; + + for (i = 0; i < num_srsts; ++i) { + rknpu_dev->srsts[i] = devm_reset_control_get_exclusive_by_index( + rknpu_dev->dev, i); + if (IS_ERR(rknpu_dev->srsts[i])) { + rknpu_dev->num_srsts = i; + return PTR_ERR(rknpu_dev->srsts[i]); + } + } - srst_h = rknpu_reset_control_get( - rknpu_dev->dev, - rknpu_dev->config->resets[i].srst_h_name); - if (IS_ERR(srst_h)) - return PTR_ERR(srst_h); + rknpu_dev->num_srsts = num_srsts; - rknpu_dev->srst_h[i] = srst_h; - } + return num_srsts; #endif return 0; @@ -93,7 +100,7 @@ int rknpu_soft_reset(struct rknpu_device *rknpu_dev) #ifndef FPGA_PLATFORM struct iommu_domain *domain = NULL; struct rknpu_subcore_data *subcore_data = NULL; - int ret = -EINVAL, i = 0; + int ret = 0, i = 0; if (rknpu_dev->bypass_soft_reset) { LOG_WARN("bypass soft reset\n"); @@ -112,17 +119,17 @@ int rknpu_soft_reset(struct rknpu_device *rknpu_dev) wake_up(&subcore_data->job_done_wq); } - LOG_INFO("soft reset\n"); + LOG_INFO("soft reset, num: %d\n", rknpu_dev->num_srsts); - for (i = 0; i < rknpu_dev->config->num_resets; i++) { - ret = rknpu_reset_assert(rknpu_dev->srst_a[i]); - ret |= rknpu_reset_assert(rknpu_dev->srst_h[i]); + for (i = 0; i < rknpu_dev->num_srsts; ++i) + ret |= rknpu_reset_assert(rknpu_dev->srsts[i]); - udelay(10); + udelay(10); - ret |= rknpu_reset_deassert(rknpu_dev->srst_a[i]); - ret |= rknpu_reset_deassert(rknpu_dev->srst_h[i]); - } + for (i = 0; i < rknpu_dev->num_srsts; ++i) + ret |= rknpu_reset_deassert(rknpu_dev->srsts[i]); + + udelay(10); if (ret) { LOG_DEV_ERROR(rknpu_dev->dev, @@ -141,6 +148,9 @@ int rknpu_soft_reset(struct rknpu_device *rknpu_dev) rknpu_dev->soft_reseting = false; + if (rknpu_dev->config->state_init != NULL) + rknpu_dev->config->state_init(rknpu_dev); + mutex_unlock(&rknpu_dev->reset_lock); #endif