-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.go
150 lines (120 loc) · 3.86 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
package main
import (
"context"
"encoding/json"
"fmt"
"os"
"strings"
"github.com/MontFerret/ferret/pkg/compiler"
"github.com/MontFerret/ferret/pkg/drivers"
"github.com/MontFerret/ferret/pkg/drivers/cdp"
"github.com/MontFerret/ferret/pkg/drivers/http"
"github.com/gin-gonic/gin"
)
type NewCodeGo struct {
Title string `json:"title"`
Author string `json:"author"`
Time string `json:"time"`
DiscussTag string `json:"discuss_tag"`
Link string `json:"link"`
Content string `json:"content"`
}
func main() {
interviews, err := getGolangInterview()
if err != nil {
fmt.Println(err)
os.Exit(1)
}
for _, interview := range interviews {
//interview.Content=strings.Replace(interview.Content,"\n","<\br>",-1)
fmt.Println(fmt.Sprintf("%s:\n %s \n%s \n%s \n%s \n %s\n", interview.Title, interview.Author, interview.Link, interview.Time, interview.DiscussTag, strings.Replace(interview.Content,"\\n","<br>",-1)))
}
r:=gin.Default()
r.LoadHTMLGlob("templates/*")
//r.Static("/static","./static")
r.GET("/", func(c *gin.Context) {
c.HTML(200,"view.gohtml",interviews)
})
r.Run(":8888")
}
func getGolangInterview() ([]*NewCodeGo, error) {
query := `
LET parturl="http://www.nowcoder.com/discuss/tag/640?type=2&order=3&pageSize=30&expTag=0&query=&page="
//LET index =['1','2','3','4','5','6']
LET index =['21','22']
let allarticle=(
for i in index
LET doc = DOCUMENT(parturl+i, {
driver: "cdp"
})
LET articles = ELEMENTS(doc, '.discuss-detail')
LET links = (
FOR article IN articles
let urlmap=element(article,'a')
let url= "http://newcoder.com" + urlmap.attributes.href
let newurl= left(url,34)
RETURN newurl
)
LET inner =(
FOR link IN links
NAVIGATE(doc, link, 20000)
click(doc,'.pop-close')
WAIT_ELEMENT(doc, '.nk-content', 5000)
LET texter = ELEMENT(doc, '.nk-content')
LET title = ELEMENT(texter, '.post-title')
LET name = ELEMENT(texter, '.post-name')
LET time = ELEMENT(texter, '.post-time')
LET content =ELEMENT(texter, '.nc-post-content')
LET discusstag=ELEMENT(texter, '.discuss-tags-mod')
RETURN {
title: title.innerText,
author : name.innerText,
time : time.innerText,
discuss_tag: discusstag.innerText,
link : link,
content : content.innerText
}
)
RETURN inner
)
let interview=(
for out in allarticle
for each in out
return each
)
return interview
`
comp := compiler.New()
program, err := comp.Compile(query)
if err != nil {
return nil, err
}
// create a root context
ctx := context.Background()
// enable HTML drivers
// by default, Ferret Runtime does not know about any HTML drivers
// all HTML manipulations are done via functions from standard library
// that assume that at least one driver is available
ctx = drivers.WithContext(ctx, cdp.NewDriver(),drivers.AsDefault())
ctx = drivers.WithContext(ctx, http.NewDriver())
out, err := program.Run(ctx)
fmt.Printf("%s\n",out)
if err != nil {
return nil, err
}
fmt.Println()
res := make([]*NewCodeGo ,0,50)
//res=repalcebr(res)
err = json.Unmarshal(out, &res)
if err != nil {
return nil, err
}
return res, nil
}
//func repalcebr(res []*NewCodeGo) []*NewCodeGo {
//
// for _, n := range res{
// n.Content= strings.Replace(n.Content,"\\n","<br>",-1)
// }
// return res
//}