From f632fb297de1784cfa4cb319aae9d3587ab51470 Mon Sep 17 00:00:00 2001 From: medcl Date: Mon, 20 Jan 2025 21:01:39 +0800 Subject: [PATCH] refactor: refactoring yuque connector --- assets/connector/yuque/directory.png | Bin 0 -> 1493 bytes assets/connector/yuque/folder.png | Bin 0 -> 943 bytes assets/connector/yuque/icon.png | Bin 1077 -> 3635 bytes config/generated.go | 1 + plugins/connectors/yuque/book.go | 18 ++++ plugins/connectors/yuque/collect.go | 137 +++++++++++++++++++++++++-- plugins/connectors/yuque/plugin.go | 15 +-- 7 files changed, 157 insertions(+), 14 deletions(-) create mode 100644 assets/connector/yuque/directory.png create mode 100644 assets/connector/yuque/folder.png diff --git a/assets/connector/yuque/directory.png b/assets/connector/yuque/directory.png new file mode 100644 index 0000000000000000000000000000000000000000..9e7ce5b1de3ade222ba0b8f4e29e7a60ef3210c1 GIT binary patch literal 1493 zcmV;`1uFW9P)y>R~*Mb!|uYe!9=%;WKB>}cA?RY*kcq}~k&TU)J%_8@4D4T)ko*lh@bghZl{ZDLlLx;s0s zhiM7yyxpDI8FobXlgwe?%mPREe}Ent6PT<;+}Z(xhITRE#;+%;%|W<@GTGr6q{Gb zD+KTz@CEQsp#zHv5C(n*j=B|DnfV>)0{+f-Kp_D_z#X8?{kY1?1aJ~qmhCL`7P!R5 zPacdMF6G;quLpDhV{Dvzz8C`N1n%bU$S1&WK>IoeRc`JBALTa61o!~>qYMFg!H2-q z>IT^};PbMC;ti(UWCEDF$y>z7xeYP_TFMZJSHyA~WY2)tcsI6RZ_+@`>IPX4kS2fO z!Ugv4-;bgwwzeZjj_~Bk6O%t(*BKZXu{-`w1sP!y$PpR#S+wry65r8W6lo)4(4t+jfniA|d}ncC{=>Zq@;FWIMz&6_vd zwy``PaQX6OZr!@Y%*+gazaO8^$BP#)c>MS=s;UCe*w{#GYb%PPpzAuCrV))siO1t6 ze_OY1<;s;ST)TFSWHO20U&_>kK{lJEsi}#co*vt_vWH;a>L`=RpsFg0q7VoKn4FxX zudk1}xj6vh@i-SRUZkq33SHOHb)8TsL?95z8-S{+q|<4ts+>(u1%ttY=UK&?;k_-{ z1A)MrlhoDKtqHukx?0-Z3sGBJ>*)4!iX+*x+3cF%x~@A4#tYv0NzsY`UI*Yc6#?9I z=Hy-O6|M^K?AbFn-IWDbFWK1G7`JcVroFwLU@%BJoh~{qP*oKR-zJI@3WZ9tb5($) zr6tauJ?v5G`Tc%z{rYv2Z8Dh@4Gj&hRf3A5hyw=>tY0-)Dw?KAyLc*#6`XH8nIhH`}r~eE6`NA=t*%XMm<@Zn`TAm7%cL0eDSC0Ivh^8dnAI`Fw7= zD+`VVSUt*1rBY7al^s`mz{JEvNyAYuaWoK$L?SdaG%!0m3qT@~pufMLGiT1QWy=;a znGCU5jJ7uUQlt6#dB(@b9e)$ZX0t>h5e^Up3?Uvk4sHE;fRXqB1+M)6SozabE-gS0@F~!W@wd6shjGrxU|it@ v@C)!0@RuD2r3mm=QJE9;IsmVE=LPr|0d+ayVzVOm00000NkvXXu0mjfg9OtI literal 0 HcmV?d00001 diff --git a/assets/connector/yuque/folder.png b/assets/connector/yuque/folder.png new file mode 100644 index 0000000000000000000000000000000000000000..948a6b69036b96fd1faa12238b61813ae172d088 GIT binary patch literal 943 zcmeAS@N?(olHy`uVBq!ia0vp^4j|0I1|(Ny7TyC=oCO|{#S9F3${@^GvDChdfq^;E z)5S5QV$R#yr@f^EMUL&aS=F{IHAp~crHgdXhJ^x(0cjIkc}gSSoJxHhf1zx)Z{g&% zU-Vb#?dFb+YHHMRa_x|gUh&nnD{zY6g$pZ8Z@J3(tUHc7dK=X!gA>4Hs< zsav08Sfd}r^xOP3l>O+3Hg_uTbH zg>$F13NqUnBV%um>)cmdB5l6$G{rDzblTKt>*3f_ggmqJV)!JiDo=Z zGnfq)o41=TNN@Qgx8~2S!)|_Sw^p{#JCwHdqgUtV`sFuH{&o5IC4Z}?lDqKcs{NcX z?^)urrUkrQ)N=Tb-1(9hyRY}F_s`#WHn^&Ihtz{&?+=wRTYm(cU04$MXr>U*GQZ=_ zxs#We&-=ZZp<3*X+?x-6H^uJoe%Ptn@@HPu8adgF>l&8JYNz>cG`^0-kAo}=5W_LZ;olh%{yDqu5>6VP6<*becf93=n zE@56}@#_uHA$6R0gn;pJ=IHc=Vt%XE<{y+OSbd+<>erh$q8caqc3x#;Xn5V|y#4<* z=?h$m46x)O$QHo8z)iwIw?T?zL7zQqdCk6kTE_XXwJPSy+x5l^f&yo*KNkOb^}c+W zqnnGj71^Hb%M4NYDEL(0b;HD9L-BY$ar^B*X8+u^P)>Z~jGLFl+!=UJeUqMg+FRrL zWfse9li4X&%Y0r;JI(B{^jIC+fmNED&M0y|C|%sQOx~9fn1=T-e_%8Jp5wt3aiMPe llHW&9#Jb#JhxuUE|LtrnuTFB5Q~+}zgQu&X%Q~loCIE*(sg?i$ literal 0 HcmV?d00001 diff --git a/assets/connector/yuque/icon.png b/assets/connector/yuque/icon.png index db0620cad55e25aa7d3a555a70afc1bd9a065a0c..1cf3d5d78c20e72c4e524e54cf4bcaf3cf059393 100644 GIT binary patch literal 3635 zcmV-34$Se1P)P&5>hH83aV73f)t5L5eOtc@X{cp7DY%2qJmUynzn|7G?WlZ z!A+bx!S=>>{Jt;u?m1^3KAbb>&Y3%R?|N;?ADXdN_xQ}rndkXGZ~vKduc#_N$j#hn z0d91A6M!2nz>RKccw_(5pFn`bKmsIE{u#gsdkx$r!XX0(MA%{g0Ys|L>e}5mvHQ|T zzR~af-$>Q(J=-5^C@-pY&KTht;0fUCz$>9%F4O(t+y88J0EVe^S-SyF@h4k%}F#q)+hj^KH>i|{AF2Y^ol&#$K3 zs2p%e;qNTmV=E;rrLanA5v$d`>VBi2!G6V?>y2^u8~w4l;j@R_S9l-rC~)_B0k|7@ z#42y~1r3Fc-H-O`;8-0pF9vGk&sRL0+mvq)@YldkT`d4_L3!B1t^NYFK-yf`T@SNj z=sCkY1pgT^57FUXRD^5v*WUwt3fQ<-01R-yg?HOfARmZ8(9ja0AWq?!pzhg42;>lD z!?`|qb$@frA>dc(r;FhL6v}ReheF{8EEBVkCs54YLqUhd`M|SR@f<&toZ_*NRpsMW z**+2gh5ZF=3*CFQI3IBLh~lH-+<8VU&TVO!tItC|IIs_R?^*yj6wC6lY#mETg=ekU zykSD)tr?pOgG%`xLd+n65fpwt)OUafSml6USFK-&Ak&=8 zW({VT8bcydGs)B>q!;9wtPk$ZTRBzvC0AO=NVD+e|1`2x&PaQ4QCTnw) z)`8;KqgCa$jXSwxW>2k5Du%PYb1Y<6NK67KuP>kGO!qX)MVC&}#)#DH&;e)N`qi^* zxvTYEs2SKX8h}brb%;XkgVs}0eblQluHT>QQ(;@(HwuLo+_h;R@7Zz?TiHnvGR-J# zQS;r*W~|s1{^#;hzB2z+F7(dRPSVDjj!|0r%rsoT0w);>fQuiWikd*l3e6hnMnCJQGO=5H@eVnRm?G&KF;OjQ>3UBJCu> zAgFIJnjYZfY8WhNu_QQ9{#K);VFhps#uA>Ne)csS%s;&PDwG>)V zXu(vvfy1|cl)ck?Sg{4UTC8fHL&W5w$uC9%5TmV5;ks1${n}J`7{v`UvsK8|^6a_) zpl4Siq7Ak}EtAOv_ieqG>DDwtf?%3eB(jcQ)xs7_g0*`)#ogA`#k?Qq&c*9kbh#kx zr@u4wbH!4z!l|Xx{K(`FQ)mHIwx@B`Z@K1dop=i$+V)H2Hshr$$2fBKUl>o?16y*a zT)%$aU>oiM+DELuRt7u-@7kxc19M=|bF{1msYy#Ww-#${`B}NYt-``@naw!3X+PU0 zxA8{r93m2bHi67*@c`#WHDc8yS5Dj^ge!w?QkZbuSO+Fx#GHQ+t}s7JaT< zPZNwvmBkGr?yhib@fH63rN83T@;npic$q{aiO_j(EdHYaxUx|b9L!yJfQAS5*03HmpFR)7>2X~ zyf0tOV6m<5AOKEqZPs_EjPd*FFX$&XM}K3a1c9u`*fKWFN4C8WBMG_Ygvn2;Pem{y zrA1URiQ!VVz=|#Ea_27fTEgM(^)^OO3=?B5h?_dAQpYg^lZ*p(&pEH z^bj|9X31<;DN@#S0TMBY7(^8#rkT-g&X&$*I>{K_q6hS4HJ*Gz-ypR|M-oAl2>IL6 z*v>1u752sq+Y0w=POi$ww!feKvwOKI9cQ++q5Mmym`cV;B_-2>+~yt$vJ{Rap(J9s zd(%G38qusSs)MroTkVk^P#;|MjaKi%!Z@yXUaqfyyL?8KC`G_(fhru{_H*1ncMmh^ zWFk19Gh9nWF*FfY3OFVt{yrBXq_7!b)Zf-gWaG{PvIiG84%d8AU`C zXMoZ#HYVe2Zg1@G2~e!9z@QXp=Y|~>SsEgrGn8kvkpR@_(Vyf$*o|qN4tXH>%up3p ztmPduJNfM12boGIR#m$raS5XZKhm-Hy;NQ;Md{Q@7j<<7cN}^n- z<=DdiftZ0KJpbA|5bf_?y%uMJ%0PVk$6m3zn^)!T{;dc3jqN{6TT+U3#C=m1rDwD1 znb@Qngo6NIP`^NqcSMDS{~SXkWcUaAc-Vbo$;dpsM0M~ zNJJ2k(hCp_hGPpS_}?q90;ZX?gNoHi*pW2`JwW@h6Y}bT3|sa0Y{7v|yZD1WA7L`- zkY5Mo0TykK97`hQixFdlR1yj;c=Y&}Y8L3rz2{_`j4d)$fFi2rWrznUqJ5{}aa+P65(*Ywf%JFs~g6KrKKR|G|S%Sj;na zZn%X%`tgr4*Pg!SnC}Z^xR5WB*{pm`Cc;EAj)?HY$s>H_^z-DZsLBA-i^cwGdx2HI zBNUzoI4+PVEZ7ycw5NFF_QUMkxc$0W-e6d=T^90X42E{nVk#MDzI&d>PCU&&p88i* zEn`Vqd#EehAy;uH&n|JUi3o+w zZBou>S9omxTRe68+x++CkC4HA*0~)uRg`+U;7%l<`i0x z*_>|CBeOXjla@!{l~Ru8HmLXzD(v1`8xN3m0Z_)9<$Q1E{I!TKan{y@aQm7?Y z%cX3I#k|Y$r8hWo?kG>4{SL2pFVaa`w8Ye4HO}okWkwyvyYP0djyJ}CF=AxUWn%!G z1vU>16vM@AiQoUu--BAF$7k5wo?_d?EU8Ioi7?lhX0|=aLcYw&?gesNuw1NgeCZ8N zcQ3G5beTxnj3+I|ld*cUB9$`rhAW73W$-9I9OJg+u^dsqqEo)V766_Belc>tjLFsn z09W!Z7qewvx-#EK+!nPaVk-J7QkE`BnQ2dzR`M+4@8JCRhy74$7~FkpyMtxs1od5c7*H|5{G<5D*~NDi z9u~M9lX11*3p@ur1RT2_AY4_P`~ALa69KFl=i#%z3p@z?+c5b?{bJ;C;KRVv>(bt9 zqYf?_fM^r6+KYnx&`{{xbH>S4jR#A5&e002ovPDHLk FV1h!p8(07U literal 1077 zcmeAS@N?(olHy`uVBq!ia0vp^4j|0I3?%1nZ+yeRz!($Y6XFWwre94DyOC~qrV@nC zg2^+LMyKlHZe;;QCO?Y-lE&w%fQ-z$=@72rnF_GVipkHT4WL?p3SxmOfm{Qi_Oq28 zPvX1p$EMxQfM`hr>U|C}3MiX(FKzO(=qb-)fNY4Y5!m*OyXgjJfM&)Rov8pafQo?# zsM7Fs1<-IHGCEsfbh;w>R_dguv3jQ~GjC=3-Ae(hi2<6HaVIVEdXC}g%Jf?xcV^uI zkwBDoE7Ryq70}I7z%IrUFF=TYf5 z)2BR%(>VduRhe)*6KF`%oeZG!fWEN3P?~Zl-R*Lb@0EgptNAfEbAYDh-%FkFEH?LE zTG9Q~uW#cW>Xkcy#yn#iK{|ZCk%$`GN%#`YRG6!vfr$Y^@FSl(^sBym9f|>Ek=M zZe2BPYEMsNLuqz;a(s-Zi?gGdzM6RJ!j?r03``oHE{-7)hu>aJuNHC?VE^#+orRMq zhbl{}OR~>Z5nn^8>i2tJ=X`6J(6ONI|8v3nJIZhFoNHS-`~L6$hYdCtAJ&)_UBGa_ zQ}oTvGfpSx-dM9aV~whX(qxI`z!?2Qfn{@CH*&8k6+2oPuy!NIriL3S&yvg?O0B%q zk24ldoEgowre)IaC7%v%>9(kzG;?S7bK~Vc@{61Q|MN`CU=Nt?Ce1Xbcjhy7#!$mL zH%SLc{SeWD>AvCz8z*W>%x0CyLyW89aOwyi#*|Kakx&$g;-w>f@ytU42@ z`@y^A`F*ZFZkv^z7yeA=~%U9?U&=R;!}@xqd+}^RLBU zyfQvDF7=5#!lr2TU~|jU*>cx2C)(OH9{o`3leqk&%=8uQJKT5}9{=S2vFus=DaH%@ zJ6AokKgoDOH>N}LXPf&I*XU=tTbm6n8LIfXtdB5s@ZE|u{oinE`xAK$CHceRR~Vke z7G9gU_}jcQ|Dz9ov?_D9Ib5@Gmv-<8h8BUj*Csr#yx;!oZ{7Oq(mZT?t>2yUQWacp z9P{(|g}5DGLYIB5^3UJ6@O=H5%*Q*W3Xkpg%l`b?<>#8pU~&I^1E-uo;rZ3chF+E? zbL+R9 300 { - panic(res.Body) + panic(errors.Errorf("%v,%v", res.StatusCode, string(res.Body))) } } @@ -93,12 +95,23 @@ func (this *Plugin) collect(connector *common.Connector, datasource *common.Data log.Infof("finished collecting for %v", currentUser.Group.Login) } +// Define a temporary struct for sorting that includes the Level +type FolderInfo struct { + // Temporary struct with RichLabel and Level + RichLabel common.RichLabel + Level int +} + func (this *Plugin) collectBooks(connector *common.Connector, datasource *common.DataSource, login, token string, cfg *YuqueConfig) { const limit = 100 offset := 0 for { + if global.ShuttingDown() { + break + } + res := get(fmt.Sprintf("/api/v2/groups/%s/repos?offse=%v&limit=%v", login, offset, limit), token) books := struct { Books []Book `json:"data"` @@ -124,6 +137,83 @@ func (this *Plugin) collectBooks(connector *common.Connector, datasource *common } bookID := bookDetail.Book.ID + bookSlug := bookDetail.Book.Slug + + //index toc + // Create a map to store folder info by doc's slug, now using RichLabel + bookTocMap := make(map[string][]common.RichLabel) + if !cfg.SkipIndexingBookToc { + res = get(fmt.Sprintf("/api/v2/repos/%v/toc", bookID), token) + bookToc := struct { + BookToc []BookToc `json:"data"` + }{} + err = util.FromJSONBytes(res.Body, &bookToc) + if err != nil { + panic(err) + } + + log.Debug("book:", bookSlug, ",", bookID, ",toc:", len(bookToc.BookToc)) + + // Create a map for quick lookup by UUID to find parent-child relationships + lookup := make(map[string]BookToc) + + // Populate lookup map + for _, doc := range bookToc.BookToc { + lookup[doc.UUID] = doc + } + + // Iterate over documents to build the folder info for docs + for _, doc := range bookToc.BookToc { + if doc.Type == "DOC" { + // Create a slice to store the folder path for the document + folderPath := []FolderInfo{} + currentDoc := doc + // Traverse upwards to construct the folder path + for currentDoc.ParentUUID != "" { + folderPath = append([]FolderInfo{ + { + RichLabel: common.RichLabel{ + Key: currentDoc.Slug, + Label: currentDoc.Title, + Icon: "folder", + }, + Level: currentDoc.Level, + }, + }, folderPath...) // Prepend to the path + currentDoc = lookup[currentDoc.ParentUUID] + } + // Add the current document itself to the path + folderPath = append([]FolderInfo{ + { + RichLabel: common.RichLabel{ + Key: currentDoc.Slug, + Label: currentDoc.Title, + Icon: "folder", + }, + Level: currentDoc.Level, + }, + }, folderPath...) + + // Sort the folderPath array by the Level field + sort.SliceStable(folderPath, func(i, j int) bool { + return folderPath[i].Level < folderPath[j].Level + }) + + // Extract the RichLabel part of the sorted folder path and store in bookTocMap + var sortedLabels []common.RichLabel + sortedLabels = append(sortedLabels, common.RichLabel{ + Key: bookDetail.Book.Slug, + Label: bookDetail.Book.Name, + Icon: "folder", + }) + for _, folderInfo := range folderPath { + sortedLabels = append(sortedLabels, folderInfo.RichLabel) + } + bookTocMap[doc.Slug] = sortedLabels + } + } + + } if cfg.IndexingBooks && (bookDetail.Book.Public > 0 || (cfg.IncludePrivateBook)) { @@ -148,6 +238,16 @@ func (this *Plugin) collectBooks(connector *common.Connector, datasource *common //Thumbnail: bookDetail.Book., } + if !cfg.SkipIndexingBookToc { + if v, ok := bookTocMap[bookDetail.Book.Slug]; ok { + document.RichCategories = v + } else { + log.Debug("missing toc info:", bookDetail.Book.Slug, ",", bookDetail.Book.Name) + } + } + + log.Debug(bookDetail.Book.Slug, ", folders:", len(document.RichCategories)) + document.Metadata = util.MapStr{ "public": bookDetail.Book.Public, "slug": bookDetail.Book.Slug, @@ -170,12 +270,13 @@ func (this *Plugin) collectBooks(connector *common.Connector, datasource *common this.save(document) } else { - log.Debug("skip book:", bookDetail.Book.Name, ",", bookDetail.Book.Public) + log.Info("skip book:", bookDetail.Book.Name, ",", bookDetail.Book.Public) } //get docs in repo if cfg.IndexingDocs { - this.collectDocs(connector, datasource, login, bookID, token, cfg) + log.Debugf("collecting docs in book: %v, toc: %v", bookSlug, len(bookTocMap)) + this.collectDocs(connector, datasource, login, bookSlug, bookID, token, cfg, &bookTocMap) } } @@ -188,13 +289,17 @@ func (this *Plugin) collectBooks(connector *common.Connector, datasource *common } -func (this *Plugin) collectDocs(connector *common.Connector, datasource *common.DataSource, login string, bookID int64, token string, cfg *YuqueConfig) { +func (this *Plugin) collectDocs(connector *common.Connector, datasource *common.DataSource, login string, bookSlug string, bookID int64, token string, cfg *YuqueConfig, toc *map[string][]common.RichLabel) { const limit = 100 offset := 0 for { + if global.ShuttingDown() { + break + } + res := get(fmt.Sprintf("/api/v2/repos/%v/docs?offse=%v&limit=%v&optional_properties=tags,hits,latest_version_id", bookID, offset, limit), token) doc := struct { Meta struct { @@ -208,12 +313,17 @@ func (this *Plugin) collectDocs(connector *common.Connector, datasource *common. panic(err) } - log.Infof("fetched %v docs for %v, book: %v, offset: %v, total: %v", len(doc.Docs), login, bookID, offset, doc.Meta.Total) + log.Infof("fetched %v docs for %v, book: %v, offset: %v, total: %v", len(doc.Docs), login, bookSlug, offset, doc.Meta.Total) for _, doc := range doc.Docs { + + if global.ShuttingDown() { + break + } + if cfg.IndexingDocs && (doc.Public > 0 || (cfg.IncludePrivateDoc)) { //get doc details - this.collectDocDetails(connector, datasource, bookID, doc.ID, token, cfg) + this.collectDocDetails(connector, datasource, bookID, doc.ID, token, cfg, toc) } else { log.Debug("skip doc:", doc.Title, ",", doc.Public) } @@ -228,7 +338,7 @@ func (this *Plugin) collectDocs(connector *common.Connector, datasource *common. } -func (this *Plugin) collectDocDetails(connector *common.Connector, datasource *common.DataSource, bookID int64, docID int64, token string, cfg *YuqueConfig) { +func (this *Plugin) collectDocDetails(connector *common.Connector, datasource *common.DataSource, bookID int64, docID int64, token string, cfg *YuqueConfig, toc *map[string][]common.RichLabel) { res := get(fmt.Sprintf("/api/v2/repos/%v/docs/%v", bookID, docID), token) doc := struct { @@ -264,6 +374,14 @@ func (this *Plugin) collectDocDetails(connector *common.Connector, datasource *c Thumbnail: doc.Doc.Cover, } + if !cfg.SkipIndexingBookToc && toc != nil { + if v, ok := (*toc)[doc.Doc.Slug]; ok { + document.RichCategories = v + } else { + log.Debug("missing toc info:", doc.Doc.Title, ",", doc.Doc.Slug, ",", document.URL) + } + } + document.Metadata = util.MapStr{ "public": doc.Doc.Public, "slug": doc.Doc.Slug, @@ -306,6 +424,11 @@ func (this *Plugin) collectUsers(connector *common.Connector, datasource *common offset := 0 for { + + if global.ShuttingDown() { + break + } + // Fetch users in the current group with pagination res := get(fmt.Sprintf("/api/v2/groups/%s/users?offset=%d", login, offset), token) var users struct { diff --git a/plugins/connectors/yuque/plugin.go b/plugins/connectors/yuque/plugin.go index 237ca56..e142742 100644 --- a/plugins/connectors/yuque/plugin.go +++ b/plugins/connectors/yuque/plugin.go @@ -19,13 +19,14 @@ import ( const YuqueKey = "yuque" type YuqueConfig struct { - Token string `config:"token"` - IncludePrivateBook bool `config:"include_private_book"` - IncludePrivateDoc bool `config:"include_private_doc"` - IndexingBooks bool `config:"indexing_books"` - IndexingDocs bool `config:"indexing_docs"` - IndexingUsers bool `config:"indexing_users"` - IndexingGroups bool `config:"indexing_groups"` + Token string `config:"token"` + IncludePrivateBook bool `config:"include_private_book"` + IncludePrivateDoc bool `config:"include_private_doc"` + IndexingBooks bool `config:"indexing_books"` + SkipIndexingBookToc bool `config:"skip_indexing_book_toc"` + IndexingDocs bool `config:"indexing_docs"` + IndexingUsers bool `config:"indexing_users"` + IndexingGroups bool `config:"indexing_groups"` } type Plugin struct {