Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/#236 cookies #242

Merged
merged 15 commits into from
Mar 15, 2019
80 changes: 80 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -462,3 +462,83 @@ func run(q string) ([]byte, error) {
}

```

## Cookies

### Non-incognito mode

By default, ``CDP`` driver execute each query in an incognito mode in order to avoid any collisions related to some persisted cookies from previous queries.
However, sometimes it might not be a desirable behavior and a query needs to be executed within a Chrome tab with earlier persisted cookies.
In order to do that, we need to inform the driver to execute all queries in regular tabs. Here is how to do that:

#### CLI

```sh
ferret --cdp-keep-cookies my-query.fql
```

#### Code

```go
package main

import (
"context"
"encoding/json"
"fmt"
"os"

"github.com/MontFerret/ferret/pkg/compiler"
"github.com/MontFerret/ferret/pkg/drivers"
"github.com/MontFerret/ferret/pkg/drivers/cdp"
)

func run(q string) ([]byte, error) {
comp := compiler.New()
program := comp.MustCompile(q)

// create a root context
ctx := context.Background()

// we inform the driver to keep cookies between queries
ctx = drivers.WithContext(
ctx,
cdp.NewDriver(cdp.WithKeepCookies()),
drivers.AsDefault(),
)

return program.Run(ctx)
}
```

#### Query
```
LET doc = DOCUMENT("https://www.google.com", {
driver: "cdp",
keepCookies: true
})
```

### Cookies manipulation
For more precise work, you can set/get/delete cookies manually during and after page load:

```
LET doc = DOCUMENT("https://www.google.com", {
driver: "cdp",
cookies: [
{
name: "foo",
value: "bar"
}
]
})

COOKIES_SET(doc, { name: "baz", value: "qaz"}, { name: "daz", value: "gag" })
COOKIES_DEL(doc, "foo")

LET c = COOKIES_GET(doc, "baz")

FOR cookie IN doc.cookies
RETURN cookie.name

```
21 changes: 14 additions & 7 deletions cli/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@ import (
)

type Options struct {
Cdp string
Params map[string]interface{}
Proxy string
UserAgent string
ShowTime bool
Cdp string
Params map[string]interface{}
Proxy string
UserAgent string
ShowTime bool
KeepCookies bool
}

func (opts Options) WithContext(ctx context.Context) (context.Context, context.CancelFunc) {
Expand All @@ -28,11 +29,17 @@ func (opts Options) WithContext(ctx context.Context) (context.Context, context.C
drivers.AsDefault(),
)

cdpDriver := cdp.NewDriver(
cdpOpts := []cdp.Option{
cdp.WithAddress(opts.Cdp),
cdp.WithProxy(opts.Proxy),
cdp.WithUserAgent(opts.UserAgent),
)
}

if opts.KeepCookies {
cdpOpts = append(cdpOpts, cdp.WithKeepCookies())
}

cdpDriver := cdp.NewDriver(cdpOpts...)

ctx = drivers.WithContext(
ctx,
Expand Down
12 changes: 12 additions & 0 deletions e2e/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"fmt"
"github.com/labstack/echo"
"net/http"
"path/filepath"
)

Expand All @@ -23,6 +24,17 @@ func New(settings Settings) *Server {
e.Debug = false
e.HideBanner = true

e.Use(func(handlerFunc echo.HandlerFunc) echo.HandlerFunc {
return func(ctx echo.Context) error {
ctx.SetCookie(&http.Cookie{
Name: "x-ferret",
Value: "e2e",
HttpOnly: false,
})

return handlerFunc(ctx)
}
})
e.Static("/", settings.Dir)
e.File("/", filepath.Join(settings.Dir, "index.html"))

Expand Down
21 changes: 21 additions & 0 deletions e2e/tests/doc_cookie_del_d.fql
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
LET url = @dynamic
LET doc = DOCUMENT(url, {
driver: "cdp",
cookies: [{
name: "x-e2e",
value: "test"
}, {
name: "x-e2e-2",
value: "test2"
}]
})

COOKIE_DEL(doc, COOKIE_GET(doc, "x-e2e"), "x-e2e-2")

LET cookie1 = COOKIE_GET(doc, "x-e2e")
LET cookie2 = COOKIE_GET(doc, "x-e2e-2")

LET expected = "nonenone"
LET actual = TYPENAME(cookie1) + TYPENAME(cookie2)

RETURN EXPECT(expected, actual)
10 changes: 10 additions & 0 deletions e2e/tests/doc_cookie_get_d.fql
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
LET url = @dynamic
LET doc = DOCUMENT(url, {
driver: "cdp"
})

LET cookiesPath = LENGTH(doc.cookies) > 0 ? "ok" : "false"
LET cookie = COOKIE_GET(doc, "x-ferret")
LET expected = "ok e2e"

RETURN EXPECT(expected, cookiesPath + " " + cookie.value)
14 changes: 14 additions & 0 deletions e2e/tests/doc_cookie_load_d.fql
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
LET url = @dynamic
LET doc = DOCUMENT(url, {
driver: "cdp",
cookies: [{
name: "x-e2e",
value: "test"
}]
})

LET cookiesPath = LENGTH(doc.cookies) > 1 ? "ok" : "false"
LET cookie = COOKIE_GET(doc, "x-e2e")
LET expected = "ok test"

RETURN EXPECT(expected, cookiesPath + " " + cookie.value)
14 changes: 14 additions & 0 deletions e2e/tests/doc_cookie_set_d.fql
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
LET url = @dynamic
LET doc = DOCUMENT("0.0.0.0:8081", {
driver: "cdp"
})

COOKIE_SET(doc, {
name: "x-e2e",
value: "test"
})

LET cookie = COOKIE_GET(doc, "x-e2e")
LET expected = "test"

RETURN EXPECT(expected, cookie.value)
29 changes: 29 additions & 0 deletions examples/cookies.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package main

import (
"context"
"encoding/json"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unused imports here

"fmt"
"os"

"github.com/MontFerret/ferret/pkg/compiler"
"github.com/MontFerret/ferret/pkg/drivers"
"github.com/MontFerret/ferret/pkg/drivers/cdp"
)

func run(q string) ([]byte, error) {
comp := compiler.New()
program := comp.MustCompile(q)

// create a root context
ctx := context.Background()

// we inform the driver to keep cookies between queries
ctx = drivers.WithContext(
ctx,
cdp.NewDriver(cdp.WithKeepCookies()),
drivers.AsDefault(),
)

return program.Run(ctx)
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here go fmt adds a new line

4 changes: 3 additions & 1 deletion examples/crawler.fql
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
LET doc = DOCUMENT('https://www.theverge.com/tech', true)
LET doc = DOCUMENT('https://www.theverge.com/tech', {
driver: "cdp"
})
WAIT_ELEMENT(doc, '.c-compact-river__entry', 5000)
LET articles = ELEMENTS(doc, '.c-entry-box--compact__image-wrapper')
LET links = (
Expand Down
24 changes: 16 additions & 8 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@ import (
"encoding/json"
"flag"
"fmt"
"github.com/MontFerret/ferret/cli"
"github.com/MontFerret/ferret/cli/browser"
"github.com/MontFerret/ferret/pkg/runtime/core"
"io/ioutil"
"os"
"strings"

"github.com/MontFerret/ferret/cli"
"github.com/MontFerret/ferret/cli/browser"
"github.com/MontFerret/ferret/pkg/runtime/core"
)

type Params []string
Expand Down Expand Up @@ -65,6 +66,12 @@ var (
"launch Chrome",
)

cdpKeepCookies = flag.Bool(
"cdp-keep-cookies",
false,
"keep cookies between queries (i.e. do not open tabs in incognito mode)",
)

proxyAddress = flag.String(
"proxy",
"",
Expand Down Expand Up @@ -153,11 +160,12 @@ func main() {
}

opts := cli.Options{
Cdp: cdpConn,
Params: p,
Proxy: *proxyAddress,
UserAgent: *userAgent,
ShowTime: *showTime,
Cdp: cdpConn,
Params: p,
Proxy: *proxyAddress,
UserAgent: *userAgent,
ShowTime: *showTime,
KeepCookies: *cdpKeepCookies,
}

stat, _ := os.Stdin.Stat()
Expand Down
Loading