From 985cd3fc1f30679dc2a625344e3a434202ddb328 Mon Sep 17 00:00:00 2001 From: Tim Voronov Date: Tue, 26 Feb 2019 18:35:10 -0500 Subject: [PATCH 01/13] Stashed --- README.md | 17 +++++++++++++++++ pkg/drivers/cdp/driver.go | 30 ++++++++++++++++++++---------- pkg/drivers/cdp/options.go | 15 +++++++++++++++ 3 files changed, 52 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index a918a06b..c64cb1e9 100644 --- a/README.md +++ b/README.md @@ -462,3 +462,20 @@ func run(q string) ([]byte, error) { } ``` + +## Cookies + +By default, ``CDP`` driver execute each query in an incognito mode in order to avoid any collisions related to some persisted cookies from previous queries. +However, sometimes it might not be a desirable behavior and a query needs to be executed within a Chrome tab with earlier persisted cookies. +In order to do that, we need to inform the driver to execute all queries in regular tabs. Here is how to do that: + +#### CLI + +```sh + +``` + +#### Code + +```go +``` \ No newline at end of file diff --git a/pkg/drivers/cdp/driver.go b/pkg/drivers/cdp/driver.go index d3daa6d7..99447a45 100644 --- a/pkg/drivers/cdp/driver.go +++ b/pkg/drivers/cdp/driver.go @@ -39,7 +39,7 @@ func NewDriver(opts ...Option) *Driver { } func (drv *Driver) Name() string { - return DriverName + return drv.options.name } func (drv *Driver) GetDocument(ctx context.Context, targetURL values.String) (drivers.HTMLDocument, error) { @@ -52,7 +52,7 @@ func (drv *Driver) GetDocument(ctx context.Context, targetURL values.String) (dr Error(). Timestamp(). Err(err). - Str("driver", DriverName). + Str("driver", drv.options.name). Msg("failed to initialize the driver") return nil, err @@ -64,9 +64,14 @@ func (drv *Driver) GetDocument(ctx context.Context, targetURL values.String) (dr url = BlankPageURL } - // Create a new target belonging to the browser context, similar - // to opening a new tab in an incognito window. - createTargetArgs := target.NewCreateTargetArgs(url).SetBrowserContextID(drv.contextID) + // Create a new target belonging to the browser context + createTargetArgs := target.NewCreateTargetArgs(url) + + if drv.options.cookies == false { + // Set it to an incognito mode + createTargetArgs.SetBrowserContextID(drv.contextID) + } + createTarget, err := drv.client.Target.CreateTarget(ctx, createTargetArgs) if err != nil { @@ -74,7 +79,7 @@ func (drv *Driver) GetDocument(ctx context.Context, targetURL values.String) (dr Error(). Timestamp(). Err(err). - Str("driver", DriverName). + Str("driver", drv.options.name). Msg("failed to create a browser target") return nil, err @@ -88,7 +93,7 @@ func (drv *Driver) GetDocument(ctx context.Context, targetURL values.String) (dr Error(). Timestamp(). Err(err). - Str("driver", DriverName). + Str("driver", drv.options.name). Msg("failed to establish a connection") return nil, err @@ -184,6 +189,14 @@ func (drv *Driver) init(ctx context.Context) error { return errors.Wrap(err, "failed to initialize driver") } + drv.conn = bconn + drv.client = bc + drv.session = sess + + if drv.options.cookies { + return nil + } + createCtx, err := bc.Target.CreateBrowserContext(ctx) if err != nil { @@ -193,9 +206,6 @@ func (drv *Driver) init(ctx context.Context) error { return err } - drv.conn = bconn - drv.client = bc - drv.session = sess drv.contextID = createCtx.BrowserContextID } diff --git a/pkg/drivers/cdp/options.go b/pkg/drivers/cdp/options.go index 6f033243..67d077e7 100644 --- a/pkg/drivers/cdp/options.go +++ b/pkg/drivers/cdp/options.go @@ -2,9 +2,11 @@ package cdp type ( Options struct { + name string proxy string userAgent string address string + cookies bool } Option func(opts *Options) @@ -14,6 +16,7 @@ const DefaultAddress = "http://127.0.0.1:9222" func newOptions(setters []Option) *Options { opts := new(Options) + opts.name = DriverName opts.address = DefaultAddress for _, setter := range setters { @@ -40,3 +43,15 @@ func WithUserAgent(value string) Option { opts.userAgent = value } } + +func WithCookies() Option { + return func(opts *Options) { + opts.cookies = true + } +} + +func WithCustomName(name string) Option { + return func(opts *Options) { + opts.name = name + } +} From 815e0c7ae01bdcbc9bb34ad51171b446b2939b2f Mon Sep 17 00:00:00 2001 From: Tim Voronov Date: Tue, 26 Feb 2019 22:22:22 -0500 Subject: [PATCH 02/13] Added KeepCookies option to CDP driver --- README.md | 31 ++++++++++++++++++++++++++++++- cli/options.go | 21 ++++++++++++++------- examples/cookies.go | 29 +++++++++++++++++++++++++++++ main.go | 24 ++++++++++++++++-------- pkg/drivers/cdp/driver.go | 4 ++-- pkg/drivers/cdp/options.go | 14 +++++++------- 6 files changed, 98 insertions(+), 25 deletions(-) create mode 100644 examples/cookies.go diff --git a/README.md b/README.md index c64cb1e9..eebfc1e9 100644 --- a/README.md +++ b/README.md @@ -472,10 +472,39 @@ In order to do that, we need to inform the driver to execute all queries in regu #### CLI ```sh - + ferret --cdp-keep-cookies my-query.fql ``` #### Code ```go +package main + +import ( + "context" + "encoding/json" + "fmt" + "os" + + "github.com/MontFerret/ferret/pkg/compiler" + "github.com/MontFerret/ferret/pkg/drivers" + "github.com/MontFerret/ferret/pkg/drivers/cdp" +) + +func run(q string) ([]byte, error) { + comp := compiler.New() + program := comp.MustCompile(q) + + // create a root context + ctx := context.Background() + + // we inform the driver to keep cookies between queries + ctx = drivers.WithContext( + ctx, + cdp.NewDriver(cdp.WithKeepCookies()), + drivers.AsDefault(), + ) + + return program.Run(ctx) +} ``` \ No newline at end of file diff --git a/cli/options.go b/cli/options.go index 83f55307..9804bd81 100644 --- a/cli/options.go +++ b/cli/options.go @@ -9,11 +9,12 @@ import ( ) type Options struct { - Cdp string - Params map[string]interface{} - Proxy string - UserAgent string - ShowTime bool + Cdp string + Params map[string]interface{} + Proxy string + UserAgent string + ShowTime bool + KeepCookies bool } func (opts Options) WithContext(ctx context.Context) (context.Context, context.CancelFunc) { @@ -28,11 +29,17 @@ func (opts Options) WithContext(ctx context.Context) (context.Context, context.C drivers.AsDefault(), ) - cdpDriver := cdp.NewDriver( + cdpOpts := []cdp.Option{ cdp.WithAddress(opts.Cdp), cdp.WithProxy(opts.Proxy), cdp.WithUserAgent(opts.UserAgent), - ) + } + + if opts.KeepCookies { + cdpOpts = append(cdpOpts, cdp.WithKeepCookies()) + } + + cdpDriver := cdp.NewDriver(cdpOpts...) ctx = drivers.WithContext( ctx, diff --git a/examples/cookies.go b/examples/cookies.go new file mode 100644 index 00000000..cef159c3 --- /dev/null +++ b/examples/cookies.go @@ -0,0 +1,29 @@ +package main + +import ( + "context" + "encoding/json" + "fmt" + "os" + + "github.com/MontFerret/ferret/pkg/compiler" + "github.com/MontFerret/ferret/pkg/drivers" + "github.com/MontFerret/ferret/pkg/drivers/cdp" +) + +func run(q string) ([]byte, error) { + comp := compiler.New() + program := comp.MustCompile(q) + + // create a root context + ctx := context.Background() + + // we inform the driver to keep cookies between queries + ctx = drivers.WithContext( + ctx, + cdp.NewDriver(cdp.WithKeepCookies()), + drivers.AsDefault(), + ) + + return program.Run(ctx) +} \ No newline at end of file diff --git a/main.go b/main.go index fb6e8ae4..a5deaf41 100644 --- a/main.go +++ b/main.go @@ -5,12 +5,13 @@ import ( "encoding/json" "flag" "fmt" - "github.com/MontFerret/ferret/cli" - "github.com/MontFerret/ferret/cli/browser" - "github.com/MontFerret/ferret/pkg/runtime/core" "io/ioutil" "os" "strings" + + "github.com/MontFerret/ferret/cli" + "github.com/MontFerret/ferret/cli/browser" + "github.com/MontFerret/ferret/pkg/runtime/core" ) type Params []string @@ -65,6 +66,12 @@ var ( "launch Chrome", ) + cdpKeepCookies = flag.Bool( + "cdp-keep-cookies", + false, + "keep cookies between queries (i.e. do not open tabs in incognito mode)", + ) + proxyAddress = flag.String( "proxy", "", @@ -153,11 +160,12 @@ func main() { } opts := cli.Options{ - Cdp: cdpConn, - Params: p, - Proxy: *proxyAddress, - UserAgent: *userAgent, - ShowTime: *showTime, + Cdp: cdpConn, + Params: p, + Proxy: *proxyAddress, + UserAgent: *userAgent, + ShowTime: *showTime, + KeepCookies: *cdpKeepCookies, } stat, _ := os.Stdin.Stat() diff --git a/pkg/drivers/cdp/driver.go b/pkg/drivers/cdp/driver.go index 99447a45..923e0d2c 100644 --- a/pkg/drivers/cdp/driver.go +++ b/pkg/drivers/cdp/driver.go @@ -67,7 +67,7 @@ func (drv *Driver) GetDocument(ctx context.Context, targetURL values.String) (dr // Create a new target belonging to the browser context createTargetArgs := target.NewCreateTargetArgs(url) - if drv.options.cookies == false { + if drv.options.keepCookies == false { // Set it to an incognito mode createTargetArgs.SetBrowserContextID(drv.contextID) } @@ -193,7 +193,7 @@ func (drv *Driver) init(ctx context.Context) error { drv.client = bc drv.session = sess - if drv.options.cookies { + if drv.options.keepCookies { return nil } diff --git a/pkg/drivers/cdp/options.go b/pkg/drivers/cdp/options.go index 67d077e7..93b81291 100644 --- a/pkg/drivers/cdp/options.go +++ b/pkg/drivers/cdp/options.go @@ -2,11 +2,11 @@ package cdp type ( Options struct { - name string - proxy string - userAgent string - address string - cookies bool + name string + proxy string + userAgent string + address string + keepCookies bool } Option func(opts *Options) @@ -44,9 +44,9 @@ func WithUserAgent(value string) Option { } } -func WithCookies() Option { +func WithKeepCookies() Option { return func(opts *Options) { - opts.cookies = true + opts.keepCookies = true } } From 6d468c3a5cf57a58c2492fcdf92890e41a88f98d Mon Sep 17 00:00:00 2001 From: Tim Voronov Date: Wed, 27 Feb 2019 16:11:00 -0500 Subject: [PATCH 03/13] Added LoadDocumentParams --- pkg/drivers/cdp/document.go | 41 ++++++++ pkg/drivers/cdp/driver.go | 77 ++++++++++++--- pkg/drivers/cdp/helpers.go | 58 ++++++++++++ pkg/drivers/cdp/options.go | 24 ++--- pkg/drivers/common/getter.go | 17 ++++ pkg/drivers/common/setter.go | 2 + pkg/drivers/cookie.go | 175 ++++++++++++++++++++++++++++++++++ pkg/drivers/driver.go | 11 ++- pkg/drivers/header.go | 135 ++++++++++++++++++++++++++ pkg/drivers/http/document.go | 40 +++++++- pkg/drivers/http/driver.go | 57 ++++++++--- pkg/drivers/pdf.go | 4 +- pkg/drivers/type.go | 8 +- pkg/drivers/value.go | 4 + pkg/stdlib/html/document.go | 178 +++++++++++++++++++++++++++++++++-- 15 files changed, 776 insertions(+), 55 deletions(-) create mode 100644 pkg/drivers/cookie.go create mode 100644 pkg/drivers/header.go diff --git a/pkg/drivers/cdp/document.go b/pkg/drivers/cdp/document.go index e2b989b3..85701ac3 100644 --- a/pkg/drivers/cdp/document.go +++ b/pkg/drivers/cdp/document.go @@ -18,6 +18,7 @@ import ( "github.com/mafredri/cdp" "github.com/mafredri/cdp/protocol/dom" "github.com/mafredri/cdp/protocol/input" + "github.com/mafredri/cdp/protocol/network" "github.com/mafredri/cdp/protocol/page" "github.com/mafredri/cdp/rpcc" "github.com/pkg/errors" @@ -315,6 +316,46 @@ func (doc *HTMLDocument) GetURL() core.Value { return doc.url } +func (doc *HTMLDocument) GetCookies(ctx context.Context) (*values.Array, error) { + doc.Lock() + defer doc.Unlock() + + repl, err := doc.client.Network.GetAllCookies(ctx) + + if err != nil { + return values.NewArray(0), err + } + + if repl.Cookies == nil { + return values.NewArray(0), nil + } + + cookies := values.NewArray(len(repl.Cookies)) + + for _, c := range repl.Cookies { + cookies.Push(toDriverCookie(c)) + } + + return cookies, nil +} + +func (doc *HTMLDocument) SetCookies(ctx context.Context, cookies ...drivers.Cookie) error { + doc.Lock() + defer doc.Unlock() + + if len(cookies) == 0 { + return nil + } + + params := make([]network.CookieParam, 0, len(cookies)) + + for i, c := range cookies { + params[i] = fromDriverCookie(c) + } + + return doc.client.Network.SetCookies(ctx, network.NewSetCookiesArgs(params)) +} + func (doc *HTMLDocument) SetURL(ctx context.Context, url values.String) error { return doc.Navigate(ctx, url) } diff --git a/pkg/drivers/cdp/driver.go b/pkg/drivers/cdp/driver.go index 923e0d2c..3345dbe9 100644 --- a/pkg/drivers/cdp/driver.go +++ b/pkg/drivers/cdp/driver.go @@ -2,15 +2,16 @@ package cdp import ( "context" + "encoding/json" "sync" "github.com/MontFerret/ferret/pkg/drivers" "github.com/MontFerret/ferret/pkg/drivers/common" "github.com/MontFerret/ferret/pkg/runtime/logging" - "github.com/MontFerret/ferret/pkg/runtime/values" "github.com/mafredri/cdp" "github.com/mafredri/cdp/devtool" "github.com/mafredri/cdp/protocol/emulation" + "github.com/mafredri/cdp/protocol/network" "github.com/mafredri/cdp/protocol/page" "github.com/mafredri/cdp/protocol/target" "github.com/mafredri/cdp/rpcc" @@ -33,16 +34,16 @@ type Driver struct { func NewDriver(opts ...Option) *Driver { drv := new(Driver) drv.options = newOptions(opts) - drv.dev = devtool.New(drv.options.address) + drv.dev = devtool.New(drv.options.Address) return drv } func (drv *Driver) Name() string { - return drv.options.name + return drv.options.Name } -func (drv *Driver) GetDocument(ctx context.Context, targetURL values.String) (drivers.HTMLDocument, error) { +func (drv *Driver) LoadDocument(ctx context.Context, params drivers.LoadDocumentParams) (drivers.HTMLDocument, error) { logger := logging.FromContext(ctx) err := drv.init(ctx) @@ -52,13 +53,13 @@ func (drv *Driver) GetDocument(ctx context.Context, targetURL values.String) (dr Error(). Timestamp(). Err(err). - Str("driver", drv.options.name). + Str("driver", drv.options.Name). Msg("failed to initialize the driver") return nil, err } - url := targetURL.String() + url := params.Url if url == "" { url = BlankPageURL @@ -67,7 +68,7 @@ func (drv *Driver) GetDocument(ctx context.Context, targetURL values.String) (dr // Create a new target belonging to the browser context createTargetArgs := target.NewCreateTargetArgs(url) - if drv.options.keepCookies == false { + if drv.options.KeepCookies == false && params.KeepCookies == false { // Set it to an incognito mode createTargetArgs.SetBrowserContextID(drv.contextID) } @@ -79,7 +80,7 @@ func (drv *Driver) GetDocument(ctx context.Context, targetURL values.String) (dr Error(). Timestamp(). Err(err). - Str("driver", drv.options.name). + Str("driver", drv.options.Name). Msg("failed to create a browser target") return nil, err @@ -93,7 +94,7 @@ func (drv *Driver) GetDocument(ctx context.Context, targetURL values.String) (dr Error(). Timestamp(). Err(err). - Str("driver", drv.options.name). + Str("driver", drv.options.Name). Msg("failed to establish a connection") return nil, err @@ -122,7 +123,13 @@ func (drv *Driver) GetDocument(ctx context.Context, targetURL values.String) (dr }, func() error { - ua := common.GetUserAgent(drv.options.userAgent) + var ua string + + if params.UserAgent != "" { + ua = common.GetUserAgent(params.UserAgent) + } else { + ua = common.GetUserAgent(drv.options.UserAgent) + } logger. Debug(). @@ -140,6 +147,54 @@ func (drv *Driver) GetDocument(ctx context.Context, targetURL values.String) (dr emulation.NewSetUserAgentOverrideArgs(ua), ) }, + + func() error { + if params.Cookies != nil { + cookies := make([]network.CookieParam, 0, len(params.Cookies)) + + for i, c := range params.Cookies { + cookies[i] = fromDriverCookie(c) + + logger. + Debug(). + Timestamp(). + Str("cookie", c.Name). + Msg("set cookie") + } + + return client.Network.SetCookies( + ctx, + network.NewSetCookiesArgs(cookies), + ) + } + + return nil + }, + + func() error { + if params.Header != nil { + j, err := json.Marshal(params.Header) + + if err != nil { + return err + } + + for k := range params.Header { + logger. + Debug(). + Timestamp(). + Str("header", k). + Msg("set header") + } + + return client.Network.SetExtraHTTPHeaders( + ctx, + network.NewSetExtraHTTPHeadersArgs(network.Headers(j)), + ) + } + + return nil + }, ) if err != nil { @@ -193,7 +248,7 @@ func (drv *Driver) init(ctx context.Context) error { drv.client = bc drv.session = sess - if drv.options.keepCookies { + if drv.options.KeepCookies { return nil } diff --git a/pkg/drivers/cdp/helpers.go b/pkg/drivers/cdp/helpers.go index 40685471..410032cc 100644 --- a/pkg/drivers/cdp/helpers.go +++ b/pkg/drivers/cdp/helpers.go @@ -4,8 +4,12 @@ import ( "bytes" "context" "errors" + "github.com/MontFerret/ferret/pkg/drivers" + "github.com/mafredri/cdp/protocol/network" "math" + "net/http" "strings" + "time" "github.com/MontFerret/ferret/pkg/drivers/cdp/eval" "github.com/MontFerret/ferret/pkg/drivers/cdp/events" @@ -402,3 +406,57 @@ func createEventBroker(client *cdp.Client) (*events.EventBroker, error) { return broker, nil } + +func fromDriverCookie(cookie drivers.Cookie) network.CookieParam { + sameSite := network.CookieSameSiteNotSet + + switch cookie.SameSite { + case http.SameSiteLaxMode: + sameSite = network.CookieSameSiteLax + break + case http.SameSiteStrictMode: + sameSite = network.CookieSameSiteStrict + break + default: + sameSite = network.CookieSameSiteNotSet + break + } + + return network.CookieParam{ + Name: cookie.Name, + Value: cookie.Value, + Secure: &cookie.Secure, + Path: &cookie.Path, + Domain: &cookie.Domain, + HTTPOnly: &cookie.HttpOnly, + SameSite: sameSite, + Expires: network.TimeSinceEpoch(cookie.Expires.Unix()), + } +} + +func toDriverCookie(c network.Cookie) drivers.Cookie { + sameSite := http.SameSiteDefaultMode + + switch c.SameSite { + case network.CookieSameSiteLax: + sameSite = http.SameSiteLaxMode + break + case network.CookieSameSiteStrict: + sameSite = http.SameSiteStrictMode + break + default: + sameSite = http.SameSiteDefaultMode + break + } + + return drivers.Cookie{ + Name: c.Name, + Value: c.Value, + Path: c.Path, + Domain: c.Domain, + Expires: time.Unix(int64(c.Expires), 0), + SameSite: sameSite, + Secure: c.Secure, + HttpOnly: c.HTTPOnly, + } +} diff --git a/pkg/drivers/cdp/options.go b/pkg/drivers/cdp/options.go index 93b81291..c92e2bdf 100644 --- a/pkg/drivers/cdp/options.go +++ b/pkg/drivers/cdp/options.go @@ -2,11 +2,11 @@ package cdp type ( Options struct { - name string - proxy string - userAgent string - address string - keepCookies bool + Name string + Proxy string + UserAgent string + Address string + KeepCookies bool } Option func(opts *Options) @@ -16,8 +16,8 @@ const DefaultAddress = "http://127.0.0.1:9222" func newOptions(setters []Option) *Options { opts := new(Options) - opts.name = DriverName - opts.address = DefaultAddress + opts.Name = DriverName + opts.Address = DefaultAddress for _, setter := range setters { setter(opts) @@ -28,30 +28,30 @@ func newOptions(setters []Option) *Options { func WithAddress(address string) Option { return func(opts *Options) { - opts.address = address + opts.Address = address } } func WithProxy(address string) Option { return func(opts *Options) { - opts.proxy = address + opts.Proxy = address } } func WithUserAgent(value string) Option { return func(opts *Options) { - opts.userAgent = value + opts.UserAgent = value } } func WithKeepCookies() Option { return func(opts *Options) { - opts.keepCookies = true + opts.KeepCookies = true } } func WithCustomName(name string) Option { return func(opts *Options) { - opts.name = name + opts.Name = name } } diff --git a/pkg/drivers/common/getter.go b/pkg/drivers/common/getter.go index e37d64b3..6a247326 100644 --- a/pkg/drivers/common/getter.go +++ b/pkg/drivers/common/getter.go @@ -22,6 +22,23 @@ func GetInDocument(ctx context.Context, doc drivers.HTMLDocument, path []core.Va switch segment { case "url", "URL": return doc.GetURL(), nil + case "cookies": + if len(path) == 1 { + return doc.GetCookies(ctx) + } + + switch idx := path[1].(type) { + case values.Int: + cookies, err := doc.GetCookies(ctx) + + if err != nil { + return values.None, err + } + + return cookies.Get(idx), nil + default: + return values.None, core.TypeError(idx.Type(), types.Int) + } case "body": return doc.QuerySelector(ctx, "body"), nil case "head": diff --git a/pkg/drivers/common/setter.go b/pkg/drivers/common/setter.go index 78851f8a..7dd356da 100644 --- a/pkg/drivers/common/setter.go +++ b/pkg/drivers/common/setter.go @@ -22,6 +22,8 @@ func SetInDocument(ctx context.Context, doc drivers.HTMLDocument, path []core.Va switch segment { case "url", "URL": return doc.SetURL(ctx, values.NewString(value.String())) + case "cookies": + default: return SetInNode(ctx, doc, path, value) } diff --git a/pkg/drivers/cookie.go b/pkg/drivers/cookie.go new file mode 100644 index 00000000..eb4ab152 --- /dev/null +++ b/pkg/drivers/cookie.go @@ -0,0 +1,175 @@ +package drivers + +import ( + "context" + "encoding/json" + "fmt" + "hash/fnv" + "net/http" + "strconv" + "strings" + "time" + + "github.com/MontFerret/ferret/pkg/runtime/core" + "github.com/MontFerret/ferret/pkg/runtime/values" + "github.com/MontFerret/ferret/pkg/runtime/values/types" +) + +// Cookie Cookie object +type Cookie struct { + Name string `json:"name"` + Value string `json:"value"` + + Path string `json:"path"` + Domain string `json:"domain"` + Expires time.Time `json:"expires"` + + MaxAge int `json:"max_age"` + Secure bool `json:"secure"` + HttpOnly bool `json:"http_only"` + SameSite http.SameSite `json:"same_site"` +} + +func (c Cookie) Type() core.Type { + return HTTPCookieType +} + +func (c Cookie) String() string { + return fmt.Sprintf("%s=%s", c.Name, c.Value) +} + +func (c Cookie) Compare(other core.Value) int64 { + if other.Type() != HTTPCookieType { + return Compare(HTTPCookieType, other.Type()) + } + + oc := other.(Cookie) + + if c.Name != oc.Name { + return int64(strings.Compare(c.Name, oc.Name)) + } + + if c.Value != oc.Value { + return int64(strings.Compare(c.Value, oc.Value)) + } + + if c.Path != oc.Path { + return int64(strings.Compare(c.Path, oc.Path)) + } + + if c.Domain != oc.Domain { + return int64(strings.Compare(c.Domain, oc.Domain)) + } + + if c.Expires.After(oc.Expires) { + return 1 + } else if c.Expires.Before(oc.Expires) { + return -1 + } + + if c.MaxAge > oc.MaxAge { + return 1 + } else if c.MaxAge < oc.MaxAge { + return -1 + } + + if c.Secure && !oc.Secure { + return 1 + } else if !c.Secure && oc.Secure { + return -1 + } + + if c.HttpOnly && !oc.HttpOnly { + return 1 + } else if !c.HttpOnly && oc.HttpOnly { + return -1 + } + + if c.SameSite > oc.SameSite { + return 1 + } else if c.SameSite < oc.SameSite { + return -1 + } + + return 0 +} + +func (c Cookie) Unwrap() interface{} { + return c.Value +} + +func (c Cookie) Hash() uint64 { + h := fnv.New64a() + + h.Write([]byte(c.Type().String())) + h.Write([]byte(":")) + h.Write([]byte(c.Name)) + h.Write([]byte(c.Value)) + h.Write([]byte(c.Path)) + h.Write([]byte(c.Domain)) + h.Write([]byte(c.Expires.String())) + h.Write([]byte(strconv.Itoa(c.MaxAge))) + h.Write([]byte(fmt.Sprintf("%t", c.Secure))) + h.Write([]byte(fmt.Sprintf("%t", c.HttpOnly))) + h.Write([]byte(strconv.Itoa(int(c.SameSite)))) + + return h.Sum64() +} + +func (c Cookie) Copy() core.Value { + return *(&c) +} + +func (c Cookie) MarshalJSON() ([]byte, error) { + out, err := json.Marshal(c) + + if err != nil { + return nil, err + } + + return out, err +} + +func (c Cookie) GetIn(_ context.Context, path []core.Value) (core.Value, error) { + if len(path) == 0 { + return values.None, nil + } + + segment := path[0] + + err := core.ValidateType(segment, types.String) + + if err != nil { + return values.None, err + } + + switch segment.(values.String) { + case "name": + return values.NewString(c.Name), nil + case "value": + return values.NewString(c.Value), nil + case "path": + return values.NewString(c.Path), nil + case "domain": + return values.NewString(c.Domain), nil + case "expires": + return values.NewDateTime(c.Expires), nil + case "maxAge": + return values.NewInt(c.MaxAge), nil + case "secure": + return values.NewBoolean(c.Secure), nil + case "httpOnly": + return values.NewBoolean(c.HttpOnly), nil + case "sameSite": + switch c.SameSite { + case http.SameSiteLaxMode: + return values.NewString("Lax"), nil + case http.SameSiteStrictMode: + return values.NewString("Strict"), nil + default: + return values.NewString("Default"), nil + } + default: + return values.None, nil + } +} diff --git a/pkg/drivers/driver.go b/pkg/drivers/driver.go index c964ef97..02005d84 100644 --- a/pkg/drivers/driver.go +++ b/pkg/drivers/driver.go @@ -6,7 +6,6 @@ import ( "time" "github.com/MontFerret/ferret/pkg/runtime/core" - "github.com/MontFerret/ferret/pkg/runtime/values" ) const DefaultTimeout = time.Second * 30 @@ -19,10 +18,18 @@ type ( drivers map[string]Driver } + LoadDocumentParams struct { + Url string + UserAgent string + KeepCookies bool + Cookies []Cookie + Header Header + } + Driver interface { io.Closer Name() string - GetDocument(ctx context.Context, url values.String) (HTMLDocument, error) + LoadDocument(ctx context.Context, params LoadDocumentParams) (HTMLDocument, error) } ) diff --git a/pkg/drivers/header.go b/pkg/drivers/header.go new file mode 100644 index 00000000..e69bdd0d --- /dev/null +++ b/pkg/drivers/header.go @@ -0,0 +1,135 @@ +package drivers + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "hash/fnv" + "net/textproto" + "sort" + "strings" + + "github.com/MontFerret/ferret/pkg/runtime/core" + "github.com/MontFerret/ferret/pkg/runtime/values" + "github.com/MontFerret/ferret/pkg/runtime/values/types" +) + +// Cookie Cookie object +type Header map[string][]string + +func (h Header) Type() core.Type { + return HTTPHeaderType +} + +func (h Header) String() string { + var buf bytes.Buffer + + for k := range h { + buf.WriteString(fmt.Sprintf("%s=%s;", k, h.Get(k))) + } + + return buf.String() +} + +func (h Header) Compare(other core.Value) int64 { + if other.Type() != HTTPHeaderType { + return Compare(HTTPHeaderType, other.Type()) + } + + oh := other.(Header) + + if len(h) > len(oh) { + return 1 + } else if len(h) < len(oh) { + return -1 + } + + for k := range h { + c := strings.Compare(h.Get(k), oh.Get(k)) + + if c != 0 { + return int64(c) + } + } + + return 0 +} + +func (h Header) Unwrap() interface{} { + return h +} + +func (h Header) Hash() uint64 { + hash := fnv.New64a() + + hash.Write([]byte(h.Type().String())) + hash.Write([]byte(":")) + hash.Write([]byte("{")) + + keys := make([]string, 0, len(h)) + + for key := range h { + keys = append(keys, key) + } + + // order does not really matter + // but it will give us a consistent hash sum + sort.Strings(keys) + endIndex := len(keys) - 1 + + for idx, key := range keys { + hash.Write([]byte(key)) + hash.Write([]byte(":")) + + value := h.Get(key) + + hash.Write([]byte(value)) + + if idx != endIndex { + hash.Write([]byte(",")) + } + } + + hash.Write([]byte("}")) + + return hash.Sum64() +} + +func (h Header) Copy() core.Value { + return *(&h) +} + +func (h Header) MarshalJSON() ([]byte, error) { + out, err := json.Marshal(h) + + if err != nil { + return nil, err + } + + return out, err +} + +func (h Header) Set(key, value string) { + textproto.MIMEHeader(h).Set(key, value) +} + +func (h Header) Get(key string) string { + return textproto.MIMEHeader(h).Get(key) +} + +func (h Header) GetIn(_ context.Context, path []core.Value) (core.Value, error) { + if len(path) == 0 { + return values.None, nil + } + + segment := path[0] + + err := core.ValidateType(segment, types.String) + + if err != nil { + return values.None, err + } + + return values.NewString(h.Get(segment.String())), nil +} diff --git a/pkg/drivers/http/document.go b/pkg/drivers/http/document.go index 0cbc75b9..c2c60046 100644 --- a/pkg/drivers/http/document.go +++ b/pkg/drivers/http/document.go @@ -12,14 +12,16 @@ import ( ) type HTMLDocument struct { - url values.String docNode *goquery.Document element drivers.HTMLElement + url values.String + cookies []drivers.Cookie } func NewHTMLDocument( - url string, node *goquery.Document, + url string, + cookies []drivers.Cookie, ) (drivers.HTMLDocument, error) { if url == "" { return nil, core.Error(core.ErrMissedArgument, "document url") @@ -35,7 +37,7 @@ func NewHTMLDocument( return nil, err } - return &HTMLDocument{values.NewString(url), node, el}, nil + return &HTMLDocument{node, el, values.NewString(url), cookies}, nil } func (doc *HTMLDocument) MarshalJSON() ([]byte, error) { @@ -82,7 +84,7 @@ func (doc *HTMLDocument) Hash() uint64 { } func (doc *HTMLDocument) Copy() core.Value { - cp, err := NewHTMLDocument(string(doc.url), doc.docNode) + cp, err := NewHTMLDocument(doc.docNode, string(doc.url), doc.cookies) if err != nil { return values.None @@ -92,7 +94,17 @@ func (doc *HTMLDocument) Copy() core.Value { } func (doc *HTMLDocument) Clone() core.Value { - cp, err := NewHTMLDocument(string(doc.url), goquery.CloneDocument(doc.docNode)) + var cookies []drivers.Cookie + + if doc.cookies != nil { + cookies = make([]drivers.Cookie, 0, len(doc.cookies)) + + for i, c := range doc.cookies { + cookies[i] = c + } + } + + cp, err := NewHTMLDocument(goquery.CloneDocument(doc.docNode), string(doc.url), cookies) if err != nil { return values.None @@ -161,6 +173,24 @@ func (doc *HTMLDocument) SetURL(_ context.Context, _ values.String) error { return core.ErrInvalidOperation } +func (doc *HTMLDocument) GetCookies(_ context.Context) (*values.Array, error) { + if doc.cookies == nil { + return values.NewArray(0), nil + } + + arr := values.NewArray(len(doc.cookies)) + + for _, c := range doc.cookies { + arr.Push(c) + } + + return arr, nil +} + +func (doc *HTMLDocument) SetCookies(_ context.Context, _ ...drivers.Cookie) error { + return core.ErrNotSupported +} + func (doc *HTMLDocument) Navigate(_ context.Context, _ values.String) error { return core.ErrNotSupported } diff --git a/pkg/drivers/http/driver.go b/pkg/drivers/http/driver.go index e1aacbda..c175c6f0 100644 --- a/pkg/drivers/http/driver.go +++ b/pkg/drivers/http/driver.go @@ -11,7 +11,6 @@ import ( "github.com/MontFerret/ferret/pkg/runtime/logging" "github.com/MontFerret/ferret/pkg/runtime/values" "github.com/PuerkitoBio/goquery" - "github.com/corpix/uarand" "github.com/pkg/errors" "github.com/sethgrid/pester" ) @@ -63,39 +62,67 @@ func (drv *Driver) Name() string { return DriverName } -func (drv *Driver) GetDocument(ctx context.Context, targetURL values.String) (drivers.HTMLDocument, error) { - u := targetURL.String() - req, err := http.NewRequest(http.MethodGet, u, nil) +func (drv *Driver) LoadDocument(ctx context.Context, params drivers.LoadDocumentParams) (drivers.HTMLDocument, error) { + req, err := http.NewRequest(http.MethodGet, params.Url, nil) if err != nil { return nil, err } + logger := logging.FromContext(ctx) + req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8") req.Header.Set("Accept-Language", "en-US,en;q=0.9,ru;q=0.8") req.Header.Set("Cache-Control", "no-cache") req.Header.Set("Pragma", "no-cache") + if params.Header != nil { + for k := range params.Header { + req.Header.Add(k, params.Header.Get(k)) + + logger. + Debug(). + Timestamp(). + Str("header", k). + Msg("set header") + } + } + + if params.Cookies != nil { + for _, c := range params.Cookies { + req.AddCookie(&http.Cookie{ + Name: c.Name, + Value: c.Value, + }) + + logger. + Debug(). + Timestamp(). + Str("cookie", c.Name). + Msg("set cookie") + } + } + req = req.WithContext(ctx) - ua := common.GetUserAgent(drv.options.userAgent) + var ua string + + if params.UserAgent != "" { + ua = common.GetUserAgent(params.UserAgent) + } else { + ua = common.GetUserAgent(drv.options.userAgent) + } - logger := logging.FromContext(ctx) logger. Debug(). Timestamp(). Str("user-agent", ua). Msg("using User-Agent") - // use custom user agent - if ua != "" { - req.Header.Set("User-Agent", uarand.GetRandom()) - } - resp, err := drv.client.Do(req) if err != nil { - return nil, errors.Wrapf(err, "failed to retrieve a document %s", u) + return nil, errors.Wrapf(err, "failed to retrieve a document %s", params.Url) } defer resp.Body.Close() @@ -103,10 +130,10 @@ func (drv *Driver) GetDocument(ctx context.Context, targetURL values.String) (dr doc, err := goquery.NewDocumentFromReader(resp.Body) if err != nil { - return nil, errors.Wrapf(err, "failed to parse a document %s", u) + return nil, errors.Wrapf(err, "failed to parse a document %s", params.Url) } - return NewHTMLDocument(u, doc) + return NewHTMLDocument(doc, params.Url, params.Cookies) } func (drv *Driver) ParseDocument(_ context.Context, str values.String) (drivers.HTMLDocument, error) { @@ -118,7 +145,7 @@ func (drv *Driver) ParseDocument(_ context.Context, str values.String) (drivers. return nil, errors.Wrap(err, "failed to parse a document") } - return NewHTMLDocument("#string", doc) + return NewHTMLDocument(doc, "#string", nil) } func (drv *Driver) Close() error { diff --git a/pkg/drivers/pdf.go b/pkg/drivers/pdf.go index 658cd1b8..ac8ef321 100644 --- a/pkg/drivers/pdf.go +++ b/pkg/drivers/pdf.go @@ -6,7 +6,7 @@ import "github.com/MontFerret/ferret/pkg/runtime/values" type PDFParams struct { // Paper orientation. Defaults to false. Landscape values.Boolean - // Display header and footer. Defaults to false. + // Display values and footer. Defaults to false. DisplayHeaderFooter values.Boolean // Print background graphics. Defaults to false. PrintBackground values.Boolean @@ -28,7 +28,7 @@ type PDFParams struct { PageRanges values.String // Whether to silently ignore invalid but successfully parsed page ranges, such as '3-2'. Defaults to false. IgnoreInvalidPageRanges values.Boolean - // HTML template for the print header. Should be valid HTML markup with following classes used to inject printing values into them: - `date`: formatted print date - `title`: document title - `url`: document location - `pageNumber`: current page number - `totalPages`: total pages in the document + // HTML template for the print values. Should be valid HTML markup with following classes used to inject printing values into them: - `date`: formatted print date - `title`: document title - `url`: document location - `pageNumber`: current page number - `totalPages`: total pages in the document // For example, `` would generate span containing the title. HeaderTemplate values.String // HTML template for the print footer. Should use the same format as the `headerTemplate`. diff --git a/pkg/drivers/type.go b/pkg/drivers/type.go index 8a1645d7..730cff20 100644 --- a/pkg/drivers/type.go +++ b/pkg/drivers/type.go @@ -3,14 +3,18 @@ package drivers import "github.com/MontFerret/ferret/pkg/runtime/core" var ( + HTTPHeaderType = core.NewType("HTTPHeader") + HTTPCookieType = core.NewType("HTTPCookie") HTMLElementType = core.NewType("HTMLElement") HTMLDocumentType = core.NewType("HTMLDocument") ) // Comparison table of builtin types var typeComparisonTable = map[core.Type]uint64{ - HTMLElementType: 0, - HTMLDocumentType: 1, + HTTPHeaderType: 0, + HTTPCookieType: 1, + HTMLElementType: 2, + HTMLDocumentType: 3, } func Compare(first, second core.Type) int64 { diff --git a/pkg/drivers/value.go b/pkg/drivers/value.go index 8cfd36ff..52aa355d 100644 --- a/pkg/drivers/value.go +++ b/pkg/drivers/value.go @@ -88,6 +88,10 @@ type ( SetURL(ctx context.Context, url values.String) error + GetCookies(ctx context.Context) (*values.Array, error) + + SetCookies(ctx context.Context, cookies ...Cookie) error + Navigate(ctx context.Context, url values.String) error NavigateBack(ctx context.Context, skip values.Int) (values.Boolean, error) diff --git a/pkg/stdlib/html/document.go b/pkg/stdlib/html/document.go index 46720fba..abcdfe0e 100644 --- a/pkg/stdlib/html/document.go +++ b/pkg/stdlib/html/document.go @@ -2,6 +2,8 @@ package html import ( "context" + "net/http" + "strings" "time" "github.com/MontFerret/ferret/pkg/drivers" @@ -12,6 +14,7 @@ import ( ) type DocumentLoadParams struct { + drivers.LoadDocumentParams Driver string Timeout time.Duration } @@ -43,9 +46,9 @@ func Document(ctx context.Context, args ...core.Value) (core.Value, error) { var params DocumentLoadParams if len(args) == 1 { - params = newDefaultDocLoadParams() + params = newDefaultDocLoadParams(url) } else { - p, err := newDocLoadParams(args[1]) + p, err := newDocLoadParams(url, args[1]) if err != nil { return values.None, err @@ -63,17 +66,20 @@ func Document(ctx context.Context, args ...core.Value) (core.Value, error) { return values.None, err } - return drv.GetDocument(ctx, url) + return drv.LoadDocument(ctx, params.LoadDocumentParams) } -func newDefaultDocLoadParams() DocumentLoadParams { +func newDefaultDocLoadParams(url values.String) DocumentLoadParams { return DocumentLoadParams{ + LoadDocumentParams: drivers.LoadDocumentParams{ + Url: url.String(), + }, Timeout: time.Second * 30, } } -func newDocLoadParams(arg core.Value) (DocumentLoadParams, error) { - res := newDefaultDocLoadParams() +func newDocLoadParams(url values.String, arg core.Value) (DocumentLoadParams, error) { + res := newDefaultDocLoadParams(url) if err := core.ValidateType(arg, types.Boolean, types.String, types.Object); err != nil { return res, err @@ -103,6 +109,58 @@ func newDocLoadParams(arg core.Value) (DocumentLoadParams, error) { res.Timeout = time.Duration(timeout.(values.Int)) + time.Millisecond } + userAgent, exists := obj.Get(values.NewString("userAgent")) + + if exists { + if err := core.ValidateType(userAgent, types.String); err != nil { + return res, err + } + + res.UserAgent = userAgent.String() + } + + keepCookies, exists := obj.Get(values.NewString("keepCookies")) + + if exists { + if err := core.ValidateType(userAgent, types.Boolean); err != nil { + return res, err + } + + res.KeepCookies = bool(keepCookies.(values.Boolean)) + } + + cookies, exists := obj.Get(values.NewString("cookies")) + + if exists { + if err := core.ValidateType(userAgent, types.Array); err != nil { + return res, err + } + + cookies, err := parseCookies(cookies.(*values.Array)) + + if err != nil { + return res, err + } + + res.Cookies = cookies + } + + header, exists := obj.Get(values.NewString("header")) + + if exists { + if err := core.ValidateType(userAgent, types.Object); err != nil { + return res, err + } + + header, err := parseHeader(header.(*values.Object)) + + if err != nil { + return res, err + } + + res.Header = header + } + break case types.String: res.Driver = arg.(values.String).String() @@ -121,3 +179,111 @@ func newDocLoadParams(arg core.Value) (DocumentLoadParams, error) { return res, nil } + +func parseCookies(cookies *values.Array) ([]drivers.Cookie, error) { + var err error + res := make([]drivers.Cookie, 0, cookies.Length()) + + cookies.ForEach(func(value core.Value, idx int) bool { + if err = core.ValidateType(value, types.Object); err != nil { + return false + } + + co := value.(*values.Object) + + cookie := drivers.Cookie{ + Name: co.MustGet("name").String(), + Value: co.MustGet("value").String(), + Path: co.MustGet("path").String(), + Domain: co.MustGet("domain").String(), + } + + maxAge, exists := co.Get("maxAge") + + if exists { + if err = core.ValidateType(maxAge, types.Int); err != nil { + return false + } + + cookie.MaxAge = int(maxAge.(values.Int)) + } + + expires, exists := co.Get("expires") + + if exists { + if err = core.ValidateType(maxAge, types.DateTime, types.String); err != nil { + return false + } + + if expires.Type() == types.DateTime { + cookie.Expires = expires.(values.DateTime).Unwrap().(time.Time) + } else { + t, e := time.Parse(expires.String(), values.DefaultTimeLayout) + + if e != nil { + err = e + + return false + } + + cookie.Expires = t + } + } + + sameSite, exists := co.Get("sameSite") + + if exists { + sameSite := strings.ToLower(sameSite.String()) + + switch sameSite { + case "lax": + cookie.SameSite = http.SameSiteLaxMode + break + case "strict": + cookie.SameSite = http.SameSiteStrictMode + break + default: + cookie.SameSite = http.SameSiteDefaultMode + break + } + } + + httpOnly, exists := co.Get("httpOnly") + + if exists { + if err = core.ValidateType(httpOnly, types.Boolean); err != nil { + return false + } + + cookie.HttpOnly = bool(httpOnly.(values.Boolean)) + } + + secure, exists := co.Get("secure") + + if exists { + if err = core.ValidateType(secure, types.Boolean); err != nil { + return false + } + + cookie.Secure = bool(secure.(values.Boolean)) + } + + res[idx] = cookie + + return true + }) + + return res, err +} + +func parseHeader(header *values.Object) (drivers.Header, error) { + res := make(drivers.Header) + + header.ForEach(func(value core.Value, key string) bool { + res.Set(key, value.String()) + + return true + }) + + return res, nil +} From efb3791822c0a56fbb3beeedccfc2a82336920d8 Mon Sep 17 00:00:00 2001 From: Tim Voronov Date: Wed, 27 Feb 2019 18:11:49 -0500 Subject: [PATCH 04/13] Added COOKIE_GET and COOKIE_SET methods --- examples/crawler.fql | 4 +- pkg/drivers/cdp/document.go | 2 +- pkg/drivers/cdp/helpers.go | 6 +- pkg/drivers/cookie.go | 22 ++--- pkg/drivers/driver.go | 4 +- pkg/drivers/header.go | 26 +++--- pkg/drivers/http/document.go | 10 +-- pkg/drivers/value.go | 2 +- pkg/runtime/values/array.go | 10 +++ pkg/stdlib/html/cookie_get.go | 56 +++++++++++++ pkg/stdlib/html/cookie_set.go | 42 ++++++++++ pkg/stdlib/html/document.go | 154 +++++++++++++++++++--------------- pkg/stdlib/html/lib.go | 2 + 13 files changed, 234 insertions(+), 106 deletions(-) create mode 100644 pkg/stdlib/html/cookie_get.go create mode 100644 pkg/stdlib/html/cookie_set.go diff --git a/examples/crawler.fql b/examples/crawler.fql index 959e7049..22322787 100644 --- a/examples/crawler.fql +++ b/examples/crawler.fql @@ -1,4 +1,6 @@ -LET doc = DOCUMENT('https://www.theverge.com/tech', true) +LET doc = DOCUMENT('https://www.theverge.com/tech', { + driver: "cdp" +}) WAIT_ELEMENT(doc, '.c-compact-river__entry', 5000) LET articles = ELEMENTS(doc, '.c-entry-box--compact__image-wrapper') LET links = ( diff --git a/pkg/drivers/cdp/document.go b/pkg/drivers/cdp/document.go index 85701ac3..3eafb3c5 100644 --- a/pkg/drivers/cdp/document.go +++ b/pkg/drivers/cdp/document.go @@ -339,7 +339,7 @@ func (doc *HTMLDocument) GetCookies(ctx context.Context) (*values.Array, error) return cookies, nil } -func (doc *HTMLDocument) SetCookies(ctx context.Context, cookies ...drivers.Cookie) error { +func (doc *HTMLDocument) SetCookies(ctx context.Context, cookies ...drivers.HTTPCookie) error { doc.Lock() defer doc.Unlock() diff --git a/pkg/drivers/cdp/helpers.go b/pkg/drivers/cdp/helpers.go index 410032cc..0f9df690 100644 --- a/pkg/drivers/cdp/helpers.go +++ b/pkg/drivers/cdp/helpers.go @@ -407,7 +407,7 @@ func createEventBroker(client *cdp.Client) (*events.EventBroker, error) { return broker, nil } -func fromDriverCookie(cookie drivers.Cookie) network.CookieParam { +func fromDriverCookie(cookie drivers.HTTPCookie) network.CookieParam { sameSite := network.CookieSameSiteNotSet switch cookie.SameSite { @@ -434,7 +434,7 @@ func fromDriverCookie(cookie drivers.Cookie) network.CookieParam { } } -func toDriverCookie(c network.Cookie) drivers.Cookie { +func toDriverCookie(c network.Cookie) drivers.HTTPCookie { sameSite := http.SameSiteDefaultMode switch c.SameSite { @@ -449,7 +449,7 @@ func toDriverCookie(c network.Cookie) drivers.Cookie { break } - return drivers.Cookie{ + return drivers.HTTPCookie{ Name: c.Name, Value: c.Value, Path: c.Path, diff --git a/pkg/drivers/cookie.go b/pkg/drivers/cookie.go index eb4ab152..1b95e023 100644 --- a/pkg/drivers/cookie.go +++ b/pkg/drivers/cookie.go @@ -15,8 +15,8 @@ import ( "github.com/MontFerret/ferret/pkg/runtime/values/types" ) -// Cookie Cookie object -type Cookie struct { +// HTTPCookie HTTPCookie object +type HTTPCookie struct { Name string `json:"name"` Value string `json:"value"` @@ -30,20 +30,20 @@ type Cookie struct { SameSite http.SameSite `json:"same_site"` } -func (c Cookie) Type() core.Type { +func (c HTTPCookie) Type() core.Type { return HTTPCookieType } -func (c Cookie) String() string { +func (c HTTPCookie) String() string { return fmt.Sprintf("%s=%s", c.Name, c.Value) } -func (c Cookie) Compare(other core.Value) int64 { +func (c HTTPCookie) Compare(other core.Value) int64 { if other.Type() != HTTPCookieType { return Compare(HTTPCookieType, other.Type()) } - oc := other.(Cookie) + oc := other.(HTTPCookie) if c.Name != oc.Name { return int64(strings.Compare(c.Name, oc.Name)) @@ -94,11 +94,11 @@ func (c Cookie) Compare(other core.Value) int64 { return 0 } -func (c Cookie) Unwrap() interface{} { +func (c HTTPCookie) Unwrap() interface{} { return c.Value } -func (c Cookie) Hash() uint64 { +func (c HTTPCookie) Hash() uint64 { h := fnv.New64a() h.Write([]byte(c.Type().String())) @@ -116,11 +116,11 @@ func (c Cookie) Hash() uint64 { return h.Sum64() } -func (c Cookie) Copy() core.Value { +func (c HTTPCookie) Copy() core.Value { return *(&c) } -func (c Cookie) MarshalJSON() ([]byte, error) { +func (c HTTPCookie) MarshalJSON() ([]byte, error) { out, err := json.Marshal(c) if err != nil { @@ -130,7 +130,7 @@ func (c Cookie) MarshalJSON() ([]byte, error) { return out, err } -func (c Cookie) GetIn(_ context.Context, path []core.Value) (core.Value, error) { +func (c HTTPCookie) GetIn(_ context.Context, path []core.Value) (core.Value, error) { if len(path) == 0 { return values.None, nil } diff --git a/pkg/drivers/driver.go b/pkg/drivers/driver.go index 02005d84..0ff25f86 100644 --- a/pkg/drivers/driver.go +++ b/pkg/drivers/driver.go @@ -22,8 +22,8 @@ type ( Url string UserAgent string KeepCookies bool - Cookies []Cookie - Header Header + Cookies []HTTPCookie + Header HTTPHeader } Driver interface { diff --git a/pkg/drivers/header.go b/pkg/drivers/header.go index e69bdd0d..b0432da7 100644 --- a/pkg/drivers/header.go +++ b/pkg/drivers/header.go @@ -15,14 +15,14 @@ import ( "github.com/MontFerret/ferret/pkg/runtime/values/types" ) -// Cookie Cookie object -type Header map[string][]string +// HTTPCookie HTTPCookie object +type HTTPHeader map[string][]string -func (h Header) Type() core.Type { +func (h HTTPHeader) Type() core.Type { return HTTPHeaderType } -func (h Header) String() string { +func (h HTTPHeader) String() string { var buf bytes.Buffer for k := range h { @@ -32,12 +32,12 @@ func (h Header) String() string { return buf.String() } -func (h Header) Compare(other core.Value) int64 { +func (h HTTPHeader) Compare(other core.Value) int64 { if other.Type() != HTTPHeaderType { return Compare(HTTPHeaderType, other.Type()) } - oh := other.(Header) + oh := other.(HTTPHeader) if len(h) > len(oh) { return 1 @@ -56,11 +56,11 @@ func (h Header) Compare(other core.Value) int64 { return 0 } -func (h Header) Unwrap() interface{} { +func (h HTTPHeader) Unwrap() interface{} { return h } -func (h Header) Hash() uint64 { +func (h HTTPHeader) Hash() uint64 { hash := fnv.New64a() hash.Write([]byte(h.Type().String())) @@ -96,11 +96,11 @@ func (h Header) Hash() uint64 { return hash.Sum64() } -func (h Header) Copy() core.Value { +func (h HTTPHeader) Copy() core.Value { return *(&h) } -func (h Header) MarshalJSON() ([]byte, error) { +func (h HTTPHeader) MarshalJSON() ([]byte, error) { out, err := json.Marshal(h) if err != nil { @@ -110,15 +110,15 @@ func (h Header) MarshalJSON() ([]byte, error) { return out, err } -func (h Header) Set(key, value string) { +func (h HTTPHeader) Set(key, value string) { textproto.MIMEHeader(h).Set(key, value) } -func (h Header) Get(key string) string { +func (h HTTPHeader) Get(key string) string { return textproto.MIMEHeader(h).Get(key) } -func (h Header) GetIn(_ context.Context, path []core.Value) (core.Value, error) { +func (h HTTPHeader) GetIn(_ context.Context, path []core.Value) (core.Value, error) { if len(path) == 0 { return values.None, nil } diff --git a/pkg/drivers/http/document.go b/pkg/drivers/http/document.go index c2c60046..c109f249 100644 --- a/pkg/drivers/http/document.go +++ b/pkg/drivers/http/document.go @@ -15,13 +15,13 @@ type HTMLDocument struct { docNode *goquery.Document element drivers.HTMLElement url values.String - cookies []drivers.Cookie + cookies []drivers.HTTPCookie } func NewHTMLDocument( node *goquery.Document, url string, - cookies []drivers.Cookie, + cookies []drivers.HTTPCookie, ) (drivers.HTMLDocument, error) { if url == "" { return nil, core.Error(core.ErrMissedArgument, "document url") @@ -94,10 +94,10 @@ func (doc *HTMLDocument) Copy() core.Value { } func (doc *HTMLDocument) Clone() core.Value { - var cookies []drivers.Cookie + var cookies []drivers.HTTPCookie if doc.cookies != nil { - cookies = make([]drivers.Cookie, 0, len(doc.cookies)) + cookies = make([]drivers.HTTPCookie, 0, len(doc.cookies)) for i, c := range doc.cookies { cookies[i] = c @@ -187,7 +187,7 @@ func (doc *HTMLDocument) GetCookies(_ context.Context) (*values.Array, error) { return arr, nil } -func (doc *HTMLDocument) SetCookies(_ context.Context, _ ...drivers.Cookie) error { +func (doc *HTMLDocument) SetCookies(_ context.Context, _ ...drivers.HTTPCookie) error { return core.ErrNotSupported } diff --git a/pkg/drivers/value.go b/pkg/drivers/value.go index 52aa355d..b4898b47 100644 --- a/pkg/drivers/value.go +++ b/pkg/drivers/value.go @@ -90,7 +90,7 @@ type ( GetCookies(ctx context.Context) (*values.Array, error) - SetCookies(ctx context.Context, cookies ...Cookie) error + SetCookies(ctx context.Context, cookies ...HTTPCookie) error Navigate(ctx context.Context, url values.String) error diff --git a/pkg/runtime/values/array.go b/pkg/runtime/values/array.go index 1a1cf2cd..fa3b7dac 100644 --- a/pkg/runtime/values/array.go +++ b/pkg/runtime/values/array.go @@ -135,6 +135,16 @@ func (t *Array) ForEach(predicate ArrayPredicate) { } } +func (t *Array) Find(predicate ArrayPredicate) (core.Value, Boolean) { + for idx, val := range t.items { + if predicate(val, idx) == true { + return val, True + } + } + + return None, False +} + func (t *Array) Get(idx Int) core.Value { l := len(t.items) - 1 diff --git a/pkg/stdlib/html/cookie_get.go b/pkg/stdlib/html/cookie_get.go new file mode 100644 index 00000000..f1e53925 --- /dev/null +++ b/pkg/stdlib/html/cookie_get.go @@ -0,0 +1,56 @@ +package html + +import ( + "context" + + "github.com/MontFerret/ferret/pkg/drivers" + "github.com/MontFerret/ferret/pkg/runtime/core" + "github.com/MontFerret/ferret/pkg/runtime/values" + "github.com/MontFerret/ferret/pkg/runtime/values/types" +) + +// CookieSet gets a cookie from a given document by name. +func CookieGet(ctx context.Context, args ...core.Value) (core.Value, error) { + err := core.ValidateArgs(args, 2, 2) + + if err != nil { + return values.None, err + } + + err = core.ValidateType(args[0], drivers.HTMLDocumentType) + + if err != nil { + return values.None, err + } + + err = core.ValidateType(args[1], types.String) + + if err != nil { + return values.None, err + } + + doc := args[0].(drivers.HTMLDocument) + name := args[1].(values.String) + + if err != nil { + return values.None, err + } + + cookies, err := doc.GetCookies(ctx) + + if err != nil { + return values.None, err + } + + found, _ := cookies.Find(func(value core.Value, _ int) bool { + cookie, ok := value.(drivers.HTTPCookie) + + if !ok { + return ok + } + + return cookie.Name == name.String() + }) + + return found, nil +} diff --git a/pkg/stdlib/html/cookie_set.go b/pkg/stdlib/html/cookie_set.go new file mode 100644 index 00000000..09fc4657 --- /dev/null +++ b/pkg/stdlib/html/cookie_set.go @@ -0,0 +1,42 @@ +package html + +import ( + "context" + + "github.com/MontFerret/ferret/pkg/drivers" + "github.com/MontFerret/ferret/pkg/runtime/core" + "github.com/MontFerret/ferret/pkg/runtime/values" +) + +// CookieSet sets cookies to a given document +// @param doc (HTMLDocument) - Target document. +// @param cookie... (HTTPCookie) - Target cookies. +func CookieSet(ctx context.Context, args ...core.Value) (core.Value, error) { + err := core.ValidateArgs(args, 2, core.MaxArgs) + + if err != nil { + return values.None, err + } + + err = core.ValidateType(args[0], drivers.HTMLDocumentType) + + if err != nil { + return values.None, err + } + + doc := args[0].(drivers.HTMLDocument) + + cookies := make([]drivers.HTTPCookie, 0, len(args)-1) + + for _, c := range args[1:] { + cookie, err := parseCookie(c) + + if err != nil { + return values.None, err + } + + cookies = append(cookies, cookie) + } + + return values.None, doc.SetCookies(ctx, cookies...) +} diff --git a/pkg/stdlib/html/document.go b/pkg/stdlib/html/document.go index abcdfe0e..229702e9 100644 --- a/pkg/stdlib/html/document.go +++ b/pkg/stdlib/html/document.go @@ -122,7 +122,7 @@ func newDocLoadParams(url values.String, arg core.Value) (DocumentLoadParams, er keepCookies, exists := obj.Get(values.NewString("keepCookies")) if exists { - if err := core.ValidateType(userAgent, types.Boolean); err != nil { + if err := core.ValidateType(keepCookies, types.Boolean); err != nil { return res, err } @@ -132,7 +132,7 @@ func newDocLoadParams(url values.String, arg core.Value) (DocumentLoadParams, er cookies, exists := obj.Get(values.NewString("cookies")) if exists { - if err := core.ValidateType(userAgent, types.Array); err != nil { + if err := core.ValidateType(cookies, types.Array); err != nil { return res, err } @@ -148,7 +148,7 @@ func newDocLoadParams(url values.String, arg core.Value) (DocumentLoadParams, er header, exists := obj.Get(values.NewString("header")) if exists { - if err := core.ValidateType(userAgent, types.Object); err != nil { + if err := core.ValidateType(header, types.Object); err != nil { return res, err } @@ -180,104 +180,120 @@ func newDocLoadParams(url values.String, arg core.Value) (DocumentLoadParams, er return res, nil } -func parseCookies(cookies *values.Array) ([]drivers.Cookie, error) { +func parseCookies(arr *values.Array) ([]drivers.HTTPCookie, error) { var err error - res := make([]drivers.Cookie, 0, cookies.Length()) + res := make([]drivers.HTTPCookie, 0, arr.Length()) - cookies.ForEach(func(value core.Value, idx int) bool { - if err = core.ValidateType(value, types.Object); err != nil { - return false - } + arr.ForEach(func(value core.Value, idx int) bool { + cookie, e := parseCookie(value) - co := value.(*values.Object) + if e != nil { + err = e - cookie := drivers.Cookie{ - Name: co.MustGet("name").String(), - Value: co.MustGet("value").String(), - Path: co.MustGet("path").String(), - Domain: co.MustGet("domain").String(), + return false } - maxAge, exists := co.Get("maxAge") + res[idx] = cookie - if exists { - if err = core.ValidateType(maxAge, types.Int); err != nil { - return false - } + return true + }) - cookie.MaxAge = int(maxAge.(values.Int)) - } + return res, err +} - expires, exists := co.Get("expires") +func parseCookie(value core.Value) (drivers.HTTPCookie, error) { + var err error - if exists { - if err = core.ValidateType(maxAge, types.DateTime, types.String); err != nil { - return false - } + if err = core.ValidateType(value, types.Object, drivers.HTTPCookieType); err != nil { + return drivers.HTTPCookie{}, err + } - if expires.Type() == types.DateTime { - cookie.Expires = expires.(values.DateTime).Unwrap().(time.Time) - } else { - t, e := time.Parse(expires.String(), values.DefaultTimeLayout) + if value.Type() == drivers.HTTPCookieType { + return value.(drivers.HTTPCookie), nil + } - if e != nil { - err = e + co := value.(*values.Object) - return false - } + cookie := drivers.HTTPCookie{ + Name: co.MustGet("name").String(), + Value: co.MustGet("value").String(), + Path: co.MustGet("path").String(), + Domain: co.MustGet("domain").String(), + } - cookie.Expires = t - } + maxAge, exists := co.Get("maxAge") + + if exists { + if err = core.ValidateType(maxAge, types.Int); err != nil { + return drivers.HTTPCookie{}, err } - sameSite, exists := co.Get("sameSite") + cookie.MaxAge = int(maxAge.(values.Int)) + } - if exists { - sameSite := strings.ToLower(sameSite.String()) - - switch sameSite { - case "lax": - cookie.SameSite = http.SameSiteLaxMode - break - case "strict": - cookie.SameSite = http.SameSiteStrictMode - break - default: - cookie.SameSite = http.SameSiteDefaultMode - break - } + expires, exists := co.Get("expires") + + if exists { + if err = core.ValidateType(maxAge, types.DateTime, types.String); err != nil { + return drivers.HTTPCookie{}, err } - httpOnly, exists := co.Get("httpOnly") + if expires.Type() == types.DateTime { + cookie.Expires = expires.(values.DateTime).Unwrap().(time.Time) + } else { + t, err := time.Parse(expires.String(), values.DefaultTimeLayout) - if exists { - if err = core.ValidateType(httpOnly, types.Boolean); err != nil { - return false + if err != nil { + return drivers.HTTPCookie{}, err } - cookie.HttpOnly = bool(httpOnly.(values.Boolean)) + cookie.Expires = t } + } - secure, exists := co.Get("secure") + sameSite, exists := co.Get("sameSite") + + if exists { + sameSite := strings.ToLower(sameSite.String()) + + switch sameSite { + case "lax": + cookie.SameSite = http.SameSiteLaxMode + break + case "strict": + cookie.SameSite = http.SameSiteStrictMode + break + default: + cookie.SameSite = http.SameSiteDefaultMode + break + } + } - if exists { - if err = core.ValidateType(secure, types.Boolean); err != nil { - return false - } + httpOnly, exists := co.Get("httpOnly") - cookie.Secure = bool(secure.(values.Boolean)) + if exists { + if err = core.ValidateType(httpOnly, types.Boolean); err != nil { + return drivers.HTTPCookie{}, err } - res[idx] = cookie + cookie.HttpOnly = bool(httpOnly.(values.Boolean)) + } - return true - }) + secure, exists := co.Get("secure") - return res, err + if exists { + if err = core.ValidateType(secure, types.Boolean); err != nil { + return drivers.HTTPCookie{}, err + } + + cookie.Secure = bool(secure.(values.Boolean)) + } + + return cookie, err } -func parseHeader(header *values.Object) (drivers.Header, error) { - res := make(drivers.Header) +func parseHeader(header *values.Object) (drivers.HTTPHeader, error) { + res := make(drivers.HTTPHeader) header.ForEach(func(value core.Value, key string) bool { res.Set(key, value.String()) diff --git a/pkg/stdlib/html/lib.go b/pkg/stdlib/html/lib.go index a82bcbc1..75237f8a 100644 --- a/pkg/stdlib/html/lib.go +++ b/pkg/stdlib/html/lib.go @@ -14,6 +14,8 @@ const defaultTimeout = 5000 func NewLib() map[string]core.Function { return map[string]core.Function{ + "COOKIE_GET": CookieGet, + "COOKIE_SET": CookieSet, "CLICK": Click, "CLICK_ALL": ClickAll, "DOCUMENT": Document, From 165fec4c551752773a85eae121507a8279d3f514 Mon Sep 17 00:00:00 2001 From: Tim Voronov Date: Fri, 1 Mar 2019 16:13:48 -0500 Subject: [PATCH 05/13] Added support for cookies --- Makefile | 2 +- e2e/server/server.go | 12 +++++ e2e/tests/doc_cookie_del_d.fql | 21 +++++++++ e2e/tests/doc_cookie_get_d.fql | 10 ++++ e2e/tests/doc_cookie_load_d.fql | 14 ++++++ e2e/tests/doc_cookie_set_d.fql | 14 ++++++ pkg/drivers/cdp/document.go | 82 ++++++++++++++++++++++++++++++--- pkg/drivers/cdp/driver.go | 51 ++------------------ pkg/drivers/cdp/helpers.go | 33 ++++++++++++- pkg/drivers/driver.go | 2 +- pkg/drivers/http/document.go | 4 ++ pkg/drivers/http/driver.go | 8 ++-- pkg/drivers/value.go | 2 + pkg/stdlib/html/cookie_del.go | 66 ++++++++++++++++++++++++++ pkg/stdlib/html/cookie_get.go | 2 + pkg/stdlib/html/document.go | 4 +- pkg/stdlib/html/lib.go | 1 + 17 files changed, 265 insertions(+), 63 deletions(-) create mode 100644 e2e/tests/doc_cookie_del_d.fql create mode 100644 e2e/tests/doc_cookie_get_d.fql create mode 100644 e2e/tests/doc_cookie_load_d.fql create mode 100644 e2e/tests/doc_cookie_set_d.fql create mode 100644 pkg/stdlib/html/cookie_del.go diff --git a/Makefile b/Makefile index ffba0024..68349adb 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ cover: curl -s https://codecov.io/bash | bash e2e: - go run ${DIR_E2E}/main.go --tests ${DIR_E2E}/tests --pages ${DIR_E2E}/pages + go run ${DIR_E2E}/main.go --tests ${DIR_E2E}/tests --pages ${DIR_E2E}/pages --filter doc_cookie* bench: go test -run=XXX -bench=. ${DIR_PKG}/... diff --git a/e2e/server/server.go b/e2e/server/server.go index dc3e1bc1..46e004fc 100644 --- a/e2e/server/server.go +++ b/e2e/server/server.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "github.com/labstack/echo" + "net/http" "path/filepath" ) @@ -23,6 +24,17 @@ func New(settings Settings) *Server { e.Debug = false e.HideBanner = true + e.Use(func(handlerFunc echo.HandlerFunc) echo.HandlerFunc { + return func(ctx echo.Context) error { + ctx.SetCookie(&http.Cookie{ + Name: "x-ferret", + Value: "e2e", + HttpOnly: false, + }) + + return handlerFunc(ctx) + } + }) e.Static("/", settings.Dir) e.File("/", filepath.Join(settings.Dir, "index.html")) diff --git a/e2e/tests/doc_cookie_del_d.fql b/e2e/tests/doc_cookie_del_d.fql new file mode 100644 index 00000000..d0a1eca7 --- /dev/null +++ b/e2e/tests/doc_cookie_del_d.fql @@ -0,0 +1,21 @@ +LET url = @dynamic +LET doc = DOCUMENT(url, { + driver: "cdp", + cookies: [{ + name: "x-e2e", + value: "test" + }, { + name: "x-e2e-2", + value: "test2" + }] +}) + +COOKIE_DEL(doc, COOKIE_GET(doc, "x-e2e"), "x-e2e-2") + +LET cookie1 = COOKIE_GET(doc, "x-e2e") +LET cookie2 = COOKIE_GET(doc, "x-e2e-2") + +LET expected = "nonenone" +LET actual = TYPENAME(cookie1) + TYPENAME(cookie2) + +RETURN EXPECT(expected, actual) \ No newline at end of file diff --git a/e2e/tests/doc_cookie_get_d.fql b/e2e/tests/doc_cookie_get_d.fql new file mode 100644 index 00000000..8ad44e20 --- /dev/null +++ b/e2e/tests/doc_cookie_get_d.fql @@ -0,0 +1,10 @@ +LET url = @dynamic +LET doc = DOCUMENT(url, { + driver: "cdp" +}) + +LET cookiesPath = LENGTH(doc.cookies) > 0 ? "ok" : "false" +LET cookie = COOKIE_GET(doc, "x-ferret") +LET expected = "ok e2e" + +RETURN EXPECT(expected, cookiesPath + " " + cookie.value) \ No newline at end of file diff --git a/e2e/tests/doc_cookie_load_d.fql b/e2e/tests/doc_cookie_load_d.fql new file mode 100644 index 00000000..5c49f5bb --- /dev/null +++ b/e2e/tests/doc_cookie_load_d.fql @@ -0,0 +1,14 @@ +LET url = @dynamic +LET doc = DOCUMENT(url, { + driver: "cdp", + cookies: [{ + name: "x-e2e", + value: "test" + }] +}) + +LET cookiesPath = LENGTH(doc.cookies) > 1 ? "ok" : "false" +LET cookie = COOKIE_GET(doc, "x-e2e") +LET expected = "ok test" + +RETURN EXPECT(expected, cookiesPath + " " + cookie.value) \ No newline at end of file diff --git a/e2e/tests/doc_cookie_set_d.fql b/e2e/tests/doc_cookie_set_d.fql new file mode 100644 index 00000000..54fdc3e4 --- /dev/null +++ b/e2e/tests/doc_cookie_set_d.fql @@ -0,0 +1,14 @@ +LET url = @dynamic +LET doc = DOCUMENT("0.0.0.0:8081", { + driver: "cdp" +}) + +COOKIE_SET(doc, { + name: "x-e2e", + value: "test" +}) + +LET cookie = COOKIE_GET(doc, "x-e2e") +LET expected = "test" + +RETURN EXPECT(expected, cookie.value) \ No newline at end of file diff --git a/pkg/drivers/cdp/document.go b/pkg/drivers/cdp/document.go index 3eafb3c5..258bc4d2 100644 --- a/pkg/drivers/cdp/document.go +++ b/pkg/drivers/cdp/document.go @@ -2,6 +2,7 @@ package cdp import ( "context" + "encoding/json" "fmt" "hash/fnv" "sync" @@ -49,7 +50,7 @@ func LoadHTMLDocument( ctx context.Context, conn *rpcc.Conn, client *cdp.Client, - url string, + params drivers.LoadDocumentParams, ) (drivers.HTMLDocument, error) { logger := logging.FromContext(ctx) @@ -57,13 +58,61 @@ func LoadHTMLDocument( return nil, core.Error(core.ErrMissedArgument, "connection") } - if url == "" { + if params.URL == "" { return nil, core.Error(core.ErrMissedArgument, "url") } + if params.Cookies != nil { + cookies := make([]network.CookieParam, 0, len(params.Cookies)) + + for _, c := range params.Cookies { + cookies = append(cookies, fromDriverCookie(params.URL, c)) + + logger. + Debug(). + Timestamp(). + Str("cookie", c.Name). + Msg("set cookie") + } + + err := client.Network.SetCookies( + ctx, + network.NewSetCookiesArgs(cookies), + ) + + if err != nil { + return nil, err + } + } + + if params.Header != nil { + j, err := json.Marshal(params.Header) + + if err != nil { + return nil, err + } + + for k := range params.Header { + logger. + Debug(). + Timestamp(). + Str("header", k). + Msg("set header") + } + + err = client.Network.SetExtraHTTPHeaders( + ctx, + network.NewSetExtraHTTPHeadersArgs(network.Headers(j)), + ) + + if err != nil { + return nil, err + } + } + var err error - if url != BlankPageURL { + if params.URL != BlankPageURL { err = waitForLoadEvent(ctx, client) if err != nil { @@ -109,7 +158,7 @@ func LoadHTMLDocument( conn, client, broker, - values.NewString(url), + values.NewString(params.URL), rootElement, ), nil } @@ -349,13 +398,34 @@ func (doc *HTMLDocument) SetCookies(ctx context.Context, cookies ...drivers.HTTP params := make([]network.CookieParam, 0, len(cookies)) - for i, c := range cookies { - params[i] = fromDriverCookie(c) + for _, c := range cookies { + params = append(params, fromDriverCookie(doc.url.String(), c)) } return doc.client.Network.SetCookies(ctx, network.NewSetCookiesArgs(params)) } +func (doc *HTMLDocument) DeleteCookies(ctx context.Context, cookies ...drivers.HTTPCookie) error { + doc.Lock() + defer doc.Unlock() + + if len(cookies) == 0 { + return nil + } + + var err error + + for _, c := range cookies { + err = doc.client.Network.DeleteCookies(ctx, fromDriverCookieDelete(doc.url.String(), c)) + + if err != nil { + break + } + } + + return err +} + func (doc *HTMLDocument) SetURL(ctx context.Context, url values.String) error { return doc.Navigate(ctx, url) } diff --git a/pkg/drivers/cdp/driver.go b/pkg/drivers/cdp/driver.go index 3345dbe9..0d7ffe32 100644 --- a/pkg/drivers/cdp/driver.go +++ b/pkg/drivers/cdp/driver.go @@ -2,7 +2,6 @@ package cdp import ( "context" - "encoding/json" "sync" "github.com/MontFerret/ferret/pkg/drivers" @@ -59,7 +58,7 @@ func (drv *Driver) LoadDocument(ctx context.Context, params drivers.LoadDocument return nil, err } - url := params.Url + url := params.URL if url == "" { url = BlankPageURL @@ -149,51 +148,7 @@ func (drv *Driver) LoadDocument(ctx context.Context, params drivers.LoadDocument }, func() error { - if params.Cookies != nil { - cookies := make([]network.CookieParam, 0, len(params.Cookies)) - - for i, c := range params.Cookies { - cookies[i] = fromDriverCookie(c) - - logger. - Debug(). - Timestamp(). - Str("cookie", c.Name). - Msg("set cookie") - } - - return client.Network.SetCookies( - ctx, - network.NewSetCookiesArgs(cookies), - ) - } - - return nil - }, - - func() error { - if params.Header != nil { - j, err := json.Marshal(params.Header) - - if err != nil { - return err - } - - for k := range params.Header { - logger. - Debug(). - Timestamp(). - Str("header", k). - Msg("set header") - } - - return client.Network.SetExtraHTTPHeaders( - ctx, - network.NewSetExtraHTTPHeadersArgs(network.Headers(j)), - ) - } - - return nil + return client.Network.Enable(ctx, network.NewEnableArgs()) }, ) @@ -201,7 +156,7 @@ func (drv *Driver) LoadDocument(ctx context.Context, params drivers.LoadDocument return nil, err } - return LoadHTMLDocument(ctx, conn, client, url) + return LoadHTMLDocument(ctx, conn, client, params) } func (drv *Driver) Close() error { diff --git a/pkg/drivers/cdp/helpers.go b/pkg/drivers/cdp/helpers.go index 0f9df690..f826e747 100644 --- a/pkg/drivers/cdp/helpers.go +++ b/pkg/drivers/cdp/helpers.go @@ -23,6 +23,8 @@ import ( "golang.org/x/sync/errgroup" ) +var emptyExpires = time.Time{} + type ( batchFunc = func() error @@ -407,7 +409,7 @@ func createEventBroker(client *cdp.Client) (*events.EventBroker, error) { return broker, nil } -func fromDriverCookie(cookie drivers.HTTPCookie) network.CookieParam { +func fromDriverCookie(url string, cookie drivers.HTTPCookie) network.CookieParam { sameSite := network.CookieSameSiteNotSet switch cookie.SameSite { @@ -422,7 +424,14 @@ func fromDriverCookie(cookie drivers.HTTPCookie) network.CookieParam { break } + if cookie.Expires == emptyExpires { + cookie.Expires = time.Now().Add(time.Duration(24) + time.Hour) + } + + normalizedUrl := normalizeCookieUrl(url) + return network.CookieParam{ + URL: &normalizedUrl, Name: cookie.Name, Value: cookie.Value, Secure: &cookie.Secure, @@ -434,6 +443,17 @@ func fromDriverCookie(cookie drivers.HTTPCookie) network.CookieParam { } } +func fromDriverCookieDelete(url string, cookie drivers.HTTPCookie) *network.DeleteCookiesArgs { + normalizedUrl := normalizeCookieUrl(url) + + return &network.DeleteCookiesArgs{ + URL: &normalizedUrl, + Name: cookie.Name, + Path: &cookie.Path, + Domain: &cookie.Domain, + } +} + func toDriverCookie(c network.Cookie) drivers.HTTPCookie { sameSite := http.SameSiteDefaultMode @@ -460,3 +480,14 @@ func toDriverCookie(c network.Cookie) drivers.HTTPCookie { HttpOnly: c.HTTPOnly, } } + +func normalizeCookieUrl(url string) string { + const httpPrefix = "http://" + const httpsPrefix = "https://" + + if strings.HasPrefix(url, httpPrefix) || strings.HasPrefix(url, httpsPrefix) { + return url + } + + return httpPrefix + url +} diff --git a/pkg/drivers/driver.go b/pkg/drivers/driver.go index 0ff25f86..33a061d3 100644 --- a/pkg/drivers/driver.go +++ b/pkg/drivers/driver.go @@ -19,7 +19,7 @@ type ( } LoadDocumentParams struct { - Url string + URL string UserAgent string KeepCookies bool Cookies []HTTPCookie diff --git a/pkg/drivers/http/document.go b/pkg/drivers/http/document.go index c109f249..72e83a9a 100644 --- a/pkg/drivers/http/document.go +++ b/pkg/drivers/http/document.go @@ -191,6 +191,10 @@ func (doc *HTMLDocument) SetCookies(_ context.Context, _ ...drivers.HTTPCookie) return core.ErrNotSupported } +func (doc *HTMLDocument) DeleteCookies(ctx context.Context, cookies ...drivers.HTTPCookie) error { + return core.ErrNotSupported +} + func (doc *HTMLDocument) Navigate(_ context.Context, _ values.String) error { return core.ErrNotSupported } diff --git a/pkg/drivers/http/driver.go b/pkg/drivers/http/driver.go index c175c6f0..dcdc34ef 100644 --- a/pkg/drivers/http/driver.go +++ b/pkg/drivers/http/driver.go @@ -63,7 +63,7 @@ func (drv *Driver) Name() string { } func (drv *Driver) LoadDocument(ctx context.Context, params drivers.LoadDocumentParams) (drivers.HTMLDocument, error) { - req, err := http.NewRequest(http.MethodGet, params.Url, nil) + req, err := http.NewRequest(http.MethodGet, params.URL, nil) if err != nil { return nil, err @@ -122,7 +122,7 @@ func (drv *Driver) LoadDocument(ctx context.Context, params drivers.LoadDocument resp, err := drv.client.Do(req) if err != nil { - return nil, errors.Wrapf(err, "failed to retrieve a document %s", params.Url) + return nil, errors.Wrapf(err, "failed to retrieve a document %s", params.URL) } defer resp.Body.Close() @@ -130,10 +130,10 @@ func (drv *Driver) LoadDocument(ctx context.Context, params drivers.LoadDocument doc, err := goquery.NewDocumentFromReader(resp.Body) if err != nil { - return nil, errors.Wrapf(err, "failed to parse a document %s", params.Url) + return nil, errors.Wrapf(err, "failed to parse a document %s", params.URL) } - return NewHTMLDocument(doc, params.Url, params.Cookies) + return NewHTMLDocument(doc, params.URL, params.Cookies) } func (drv *Driver) ParseDocument(_ context.Context, str values.String) (drivers.HTMLDocument, error) { diff --git a/pkg/drivers/value.go b/pkg/drivers/value.go index b4898b47..34c0a691 100644 --- a/pkg/drivers/value.go +++ b/pkg/drivers/value.go @@ -92,6 +92,8 @@ type ( SetCookies(ctx context.Context, cookies ...HTTPCookie) error + DeleteCookies(ctx context.Context, cookies ...HTTPCookie) error + Navigate(ctx context.Context, url values.String) error NavigateBack(ctx context.Context, skip values.Int) (values.Boolean, error) diff --git a/pkg/stdlib/html/cookie_del.go b/pkg/stdlib/html/cookie_del.go new file mode 100644 index 00000000..bd12af33 --- /dev/null +++ b/pkg/stdlib/html/cookie_del.go @@ -0,0 +1,66 @@ +package html + +import ( + "context" + "github.com/MontFerret/ferret/pkg/drivers" + "github.com/MontFerret/ferret/pkg/runtime/core" + "github.com/MontFerret/ferret/pkg/runtime/values" + "github.com/MontFerret/ferret/pkg/runtime/values/types" +) + +// CookieSet gets a cookie from a given document by name. +// @param source (HTMLDocument) - Target HTMLDocument. +// @param cookie (...HTTPCookie|String) - Cookie or cookie name to delete. +func CookieDel(ctx context.Context, args ...core.Value) (core.Value, error) { + err := core.ValidateArgs(args, 2, core.MaxArgs) + + if err != nil { + return values.None, err + } + + err = core.ValidateType(args[0], drivers.HTMLDocumentType) + + if err != nil { + return values.None, err + } + + doc := args[0].(drivers.HTMLDocument) + inputs := args[1:] + var currentCookies *values.Array + cookies := make([]drivers.HTTPCookie, 0, len(inputs)) + + for _, c := range inputs { + switch cookie := c.(type) { + case values.String: + if currentCookies == nil { + current, err := doc.GetCookies(ctx) + + if err != nil { + return values.None, err + } + + currentCookies = current + } + + found, isFound := currentCookies.Find(func(value core.Value, _ int) bool { + cv := value.(drivers.HTTPCookie) + + return cv.Name == cookie.String() + }) + + if isFound { + cookies = append(cookies, found.(drivers.HTTPCookie)) + } + + break + case drivers.HTTPCookie: + cookies = append(cookies, cookie) + + break + default: + return values.None, core.TypeError(c.Type(), types.String, drivers.HTTPCookieType) + } + } + + return values.None, doc.DeleteCookies(ctx, cookies...) +} diff --git a/pkg/stdlib/html/cookie_get.go b/pkg/stdlib/html/cookie_get.go index f1e53925..90027ed3 100644 --- a/pkg/stdlib/html/cookie_get.go +++ b/pkg/stdlib/html/cookie_get.go @@ -10,6 +10,8 @@ import ( ) // CookieSet gets a cookie from a given document by name. +// @param doc (HTMLDocument) - Target HTMLDocument. +// @param name (String) - Cookie or cookie name to delete. func CookieGet(ctx context.Context, args ...core.Value) (core.Value, error) { err := core.ValidateArgs(args, 2, 2) diff --git a/pkg/stdlib/html/document.go b/pkg/stdlib/html/document.go index 229702e9..50c64a72 100644 --- a/pkg/stdlib/html/document.go +++ b/pkg/stdlib/html/document.go @@ -72,7 +72,7 @@ func Document(ctx context.Context, args ...core.Value) (core.Value, error) { func newDefaultDocLoadParams(url values.String) DocumentLoadParams { return DocumentLoadParams{ LoadDocumentParams: drivers.LoadDocumentParams{ - Url: url.String(), + URL: url.String(), }, Timeout: time.Second * 30, } @@ -193,7 +193,7 @@ func parseCookies(arr *values.Array) ([]drivers.HTTPCookie, error) { return false } - res[idx] = cookie + res = append(res, cookie) return true }) diff --git a/pkg/stdlib/html/lib.go b/pkg/stdlib/html/lib.go index 75237f8a..c79395d8 100644 --- a/pkg/stdlib/html/lib.go +++ b/pkg/stdlib/html/lib.go @@ -14,6 +14,7 @@ const defaultTimeout = 5000 func NewLib() map[string]core.Function { return map[string]core.Function{ + "COOKIE_DEL": CookieDel, "COOKIE_GET": CookieGet, "COOKIE_SET": CookieSet, "CLICK": Click, From 89cb63af52eb861dd90711c5bee226e17d90ee28 Mon Sep 17 00:00:00 2001 From: Tim Voronov Date: Sat, 2 Mar 2019 00:24:47 -0500 Subject: [PATCH 06/13] Updated README --- README.md | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/README.md b/README.md index eebfc1e9..e1d2cd45 100644 --- a/README.md +++ b/README.md @@ -465,6 +465,8 @@ func run(q string) ([]byte, error) { ## Cookies +### Non-incognito mode + By default, ``CDP`` driver execute each query in an incognito mode in order to avoid any collisions related to some persisted cookies from previous queries. However, sometimes it might not be a desirable behavior and a query needs to be executed within a Chrome tab with earlier persisted cookies. In order to do that, we need to inform the driver to execute all queries in regular tabs. Here is how to do that: @@ -507,4 +509,36 @@ func run(q string) ([]byte, error) { return program.Run(ctx) } +``` + +#### Query +``` +LET doc = DOCUMENT("https://www.google.com", { + driver: "cdp", + keepCookies: true +}) +``` + +### Cookies manipulation +For more precise work, you can set/get/delete cookies manually during and after page load: + +``` +LET doc = DOCUMENT("https://www.google.com", { + driver: "cdp", + cookies: [ + { + name: "foo", + value: "bar" + } + ] +}) + +COOKIES_SET(doc, { name: "baz", value: "qaz"}, { name: "daz", value: "gag" }) +COOKIES_DEL(doc, "foo") + +LET c = COOKIES_GET(doc, "baz") + +FOR cookie IN doc.cookies + RETURN cookie.name + ``` \ No newline at end of file From 30e7da24092d5540974788529696d7980c95c98d Mon Sep 17 00:00:00 2001 From: Tim Voronov Date: Sat, 2 Mar 2019 00:25:45 -0500 Subject: [PATCH 07/13] Removed e2e filter --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 68349adb..ffba0024 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ cover: curl -s https://codecov.io/bash | bash e2e: - go run ${DIR_E2E}/main.go --tests ${DIR_E2E}/tests --pages ${DIR_E2E}/pages --filter doc_cookie* + go run ${DIR_E2E}/main.go --tests ${DIR_E2E}/tests --pages ${DIR_E2E}/pages bench: go test -run=XXX -bench=. ${DIR_PKG}/... From 62b54b28b11f052b29028a853f198c2872d38226 Mon Sep 17 00:00:00 2001 From: Tim Voronov Date: Sat, 2 Mar 2019 13:14:35 -0500 Subject: [PATCH 08/13] Fixed issues --- README.md | 2 +- pkg/drivers/cdp/helpers.go | 17 +++++++---------- pkg/drivers/cookie.go | 10 +++++----- pkg/drivers/http/document.go | 2 +- pkg/stdlib/html/document.go | 2 +- 5 files changed, 15 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index e1d2cd45..43d6b18a 100644 --- a/README.md +++ b/README.md @@ -474,7 +474,7 @@ In order to do that, we need to inform the driver to execute all queries in regu #### CLI ```sh - ferret --cdp-keep-cookies my-query.fql +ferret --cdp-keep-cookies my-query.fql ``` #### Code diff --git a/pkg/drivers/cdp/helpers.go b/pkg/drivers/cdp/helpers.go index f826e747..ad15f6e1 100644 --- a/pkg/drivers/cdp/helpers.go +++ b/pkg/drivers/cdp/helpers.go @@ -415,39 +415,36 @@ func fromDriverCookie(url string, cookie drivers.HTTPCookie) network.CookieParam switch cookie.SameSite { case http.SameSiteLaxMode: sameSite = network.CookieSameSiteLax - break case http.SameSiteStrictMode: sameSite = network.CookieSameSiteStrict - break default: sameSite = network.CookieSameSiteNotSet - break } if cookie.Expires == emptyExpires { cookie.Expires = time.Now().Add(time.Duration(24) + time.Hour) } - normalizedUrl := normalizeCookieUrl(url) + normalizedURL := normalizeCookieURL(url) return network.CookieParam{ - URL: &normalizedUrl, + URL: &normalizedURL, Name: cookie.Name, Value: cookie.Value, Secure: &cookie.Secure, Path: &cookie.Path, Domain: &cookie.Domain, - HTTPOnly: &cookie.HttpOnly, + HTTPOnly: &cookie.HTTPOnly, SameSite: sameSite, Expires: network.TimeSinceEpoch(cookie.Expires.Unix()), } } func fromDriverCookieDelete(url string, cookie drivers.HTTPCookie) *network.DeleteCookiesArgs { - normalizedUrl := normalizeCookieUrl(url) + normalizedURL := normalizeCookieURL(url) return &network.DeleteCookiesArgs{ - URL: &normalizedUrl, + URL: &normalizedURL, Name: cookie.Name, Path: &cookie.Path, Domain: &cookie.Domain, @@ -477,11 +474,11 @@ func toDriverCookie(c network.Cookie) drivers.HTTPCookie { Expires: time.Unix(int64(c.Expires), 0), SameSite: sameSite, Secure: c.Secure, - HttpOnly: c.HTTPOnly, + HTTPOnly: c.HTTPOnly, } } -func normalizeCookieUrl(url string) string { +func normalizeCookieURL(url string) string { const httpPrefix = "http://" const httpsPrefix = "https://" diff --git a/pkg/drivers/cookie.go b/pkg/drivers/cookie.go index 1b95e023..b7f1a640 100644 --- a/pkg/drivers/cookie.go +++ b/pkg/drivers/cookie.go @@ -26,7 +26,7 @@ type HTTPCookie struct { MaxAge int `json:"max_age"` Secure bool `json:"secure"` - HttpOnly bool `json:"http_only"` + HTTPOnly bool `json:"http_only"` SameSite http.SameSite `json:"same_site"` } @@ -79,9 +79,9 @@ func (c HTTPCookie) Compare(other core.Value) int64 { return -1 } - if c.HttpOnly && !oc.HttpOnly { + if c.HTTPOnly && !oc.HTTPOnly { return 1 - } else if !c.HttpOnly && oc.HttpOnly { + } else if !c.HTTPOnly && oc.HTTPOnly { return -1 } @@ -110,7 +110,7 @@ func (c HTTPCookie) Hash() uint64 { h.Write([]byte(c.Expires.String())) h.Write([]byte(strconv.Itoa(c.MaxAge))) h.Write([]byte(fmt.Sprintf("%t", c.Secure))) - h.Write([]byte(fmt.Sprintf("%t", c.HttpOnly))) + h.Write([]byte(fmt.Sprintf("%t", c.HTTPOnly))) h.Write([]byte(strconv.Itoa(int(c.SameSite)))) return h.Sum64() @@ -159,7 +159,7 @@ func (c HTTPCookie) GetIn(_ context.Context, path []core.Value) (core.Value, err case "secure": return values.NewBoolean(c.Secure), nil case "httpOnly": - return values.NewBoolean(c.HttpOnly), nil + return values.NewBoolean(c.HTTPOnly), nil case "sameSite": switch c.SameSite { case http.SameSiteLaxMode: diff --git a/pkg/drivers/http/document.go b/pkg/drivers/http/document.go index 72e83a9a..403987be 100644 --- a/pkg/drivers/http/document.go +++ b/pkg/drivers/http/document.go @@ -191,7 +191,7 @@ func (doc *HTMLDocument) SetCookies(_ context.Context, _ ...drivers.HTTPCookie) return core.ErrNotSupported } -func (doc *HTMLDocument) DeleteCookies(ctx context.Context, cookies ...drivers.HTTPCookie) error { +func (doc *HTMLDocument) DeleteCookies(_ context.Context, _ ...drivers.HTTPCookie) error { return core.ErrNotSupported } diff --git a/pkg/stdlib/html/document.go b/pkg/stdlib/html/document.go index 50c64a72..5e10500f 100644 --- a/pkg/stdlib/html/document.go +++ b/pkg/stdlib/html/document.go @@ -276,7 +276,7 @@ func parseCookie(value core.Value) (drivers.HTTPCookie, error) { return drivers.HTTPCookie{}, err } - cookie.HttpOnly = bool(httpOnly.(values.Boolean)) + cookie.HTTPOnly = bool(httpOnly.(values.Boolean)) } secure, exists := co.Get("secure") From 1354ff0dca067a9a07015862f0663370a83393fc Mon Sep 17 00:00:00 2001 From: Tim Voronov Date: Sun, 3 Mar 2019 13:12:08 -0500 Subject: [PATCH 09/13] Updated example file --- examples/cookies.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/cookies.go b/examples/cookies.go index cef159c3..c1f4b85c 100644 --- a/examples/cookies.go +++ b/examples/cookies.go @@ -2,7 +2,6 @@ package main import ( "context" - "encoding/json" "fmt" "os" @@ -26,4 +25,4 @@ func run(q string) ([]byte, error) { ) return program.Run(ctx) -} \ No newline at end of file +} From 9e737dd89d9156ab21f674f926cd2eef2764a3b4 Mon Sep 17 00:00:00 2001 From: Tim Voronov Date: Mon, 4 Mar 2019 10:55:54 -0500 Subject: [PATCH 10/13] Set version of Go for compilation stage to stable --- .travis.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.travis.yml b/.travis.yml index 3272544c..f999b983 100644 --- a/.travis.yml +++ b/.travis.yml @@ -47,6 +47,7 @@ jobs: - make fmt - if [[ $(git diff --stat) != '' ]]; then echo 'Invalid formatting!' >&2; exit 1; fi - stage: compile + go: stable script: - make generate - make compile @@ -61,5 +62,6 @@ jobs: after_script: - killall google-chrome-stable - stage: bench + go: stable script: - make bench From 98653c9ee4fcb2cf56eda59b0a9371d1f367428b Mon Sep 17 00:00:00 2001 From: Tim Voronov Date: Mon, 4 Mar 2019 13:28:27 -0500 Subject: [PATCH 11/13] Added http.SameSite polyfill --- Gopkg.lock | 39 +++++++++++++++++----------------- pkg/drivers/cdp/helpers.go | 17 +++++++-------- pkg/drivers/cookie.go | 42 +++++++++++++++++++++++-------------- pkg/stdlib/html/document.go | 7 +++---- 4 files changed, 57 insertions(+), 48 deletions(-) diff --git a/Gopkg.lock b/Gopkg.lock index 1d6a8e9f..9554cf06 100644 --- a/Gopkg.lock +++ b/Gopkg.lock @@ -82,12 +82,12 @@ version = "v4.20" [[projects]] - digest = "1:e55506f8670236cf09b6b65cda7d6afa403233a5b75397dfcba3555e484e4b18" + digest = "1:688475ae01f983eceee598c7706119c8cc7649f382e4a186168ea7f9d472727a" name = "github.com/labstack/echo" packages = ["."] pruneopts = "UT" - revision = "c7eb8da9ec73e78c4f38413f3f835e0cd52c7d72" - version = "v3.3.8" + revision = "6d9e043284aea2d07f5fcaf0d3a424eb7d9f6109" + version = "v4.0.0" [[projects]] digest = "1:01eb0269028d3c2e21b5b6cd9b1ba81bc4170ab293fcffa84e3aa3a6138a92e8" @@ -158,20 +158,20 @@ version = "v0.21.0" [[projects]] - digest = "1:c658e84ad3916da105a761660dcaeb01e63416c8ec7bc62256a9b411a05fcd67" + digest = "1:2fa7b0155cd54479a755c629de26f888a918e13f8857a2c442205d825368e084" name = "github.com/mattn/go-colorable" packages = ["."] pruneopts = "UT" - revision = "167de6bfdfba052fa6b2d3664c8f5272e23c9072" - version = "v0.0.9" + revision = "3a70a971f94a22f2fa562ffcc7a0eb45f5daf045" + version = "v0.1.1" [[projects]] - digest = "1:0981502f9816113c9c8c4ac301583841855c8cf4da8c72f696b3ebedf6d0e4e5" + digest = "1:3bb9c8451d199650bfd303e0068d86f135952fead374ad87c09a9b8a2cc4bd7c" name = "github.com/mattn/go-isatty" packages = ["."] pruneopts = "UT" - revision = "6ca4dbf54d38eea1a992b3c722a76a5d1c4cb25c" - version = "v0.0.4" + revision = "369ecd8cea9851e459abb67eb171853e3986591e" + version = "v0.0.6" [[projects]] digest = "1:c805e517269b0ba4c21ded5836019ed7d16953d4026cb7d00041d039c7906be9" @@ -190,7 +190,7 @@ version = "v0.8.1" [[projects]] - digest = "1:6112a5eaec2ec65df289ccbb7a730aaf03e3c5cce6c906d367ccf9b7ac567604" + digest = "1:9be615b2a72fc4a99e623c6776cce3afe3451741c34ea1805ea4a9b58604b9df" name = "github.com/rs/zerolog" packages = [ ".", @@ -198,8 +198,8 @@ "internal/json", ] pruneopts = "UT" - revision = "8747b7b3a51b5d08ee7ac50eaf4869edaf9f714a" - version = "v1.11.0" + revision = "6d6350a51143b5c0d0a6a3b736ee2b41315f7269" + version = "v1.12.0" [[projects]] digest = "1:4ca145a665316d3c020a39c0741780fa3636b9152b824206796c4dce541f4a24" @@ -251,25 +251,25 @@ [[projects]] branch = "master" - digest = "1:e3d2db9bc633f4635e6418caf0b0734c43821ecd59105d1798458c6ae4d227fd" + digest = "1:398e132d86665f82a3642f675cdadea673d0d1521209ebac3c378141209f99c4" name = "golang.org/x/crypto" packages = [ "acme", "acme/autocert", ] pruneopts = "UT" - revision = "ff983b9c42bc9fbf91556e191cc8efb585c16908" + revision = "8dd112bcdc25174059e45e07517d9fc663123347" [[projects]] branch = "master" - digest = "1:1a1ecfa7b54ca3f7a0115ab5c578d7d6a5d8b605839c549e80260468c42f8be7" + digest = "1:de4815ce3ca5b624af2733716ecd471de1ef50cda8afec39491aab517f73139c" name = "golang.org/x/net" packages = [ "html", "html/atom", ] pruneopts = "UT" - revision = "915654e7eabcea33ae277abbecf52f0d8b7a9fdc" + revision = "16b79f2e4e95ea23b2bf9903c9809ff7b013ce85" [[projects]] branch = "master" @@ -277,15 +277,15 @@ name = "golang.org/x/sync" packages = ["errgroup"] pruneopts = "UT" - revision = "37e7f081c4d4c64e13b10787722085407fe5d15f" + revision = "e225da77a7e68af35c70ccbf71af2b83e6acac3c" [[projects]] branch = "master" - digest = "1:91137b48dc3eb34409f731b49f63a5ebf73218168a065e1a93af24eb5b2f99e8" + digest = "1:b95ef12b443f7b5a40ab69e3a02d113f5a7f2b67a32af76eb2fa7bebd52c9eb5" name = "golang.org/x/sys" packages = ["unix"] pruneopts = "UT" - revision = "48ac38b7c8cbedd50b1613c0fccacfc7d88dfcdf" + revision = "e844e0132e93db857c984c24fd4fc86815e43be3" [solve-meta] analyzer-name = "dep" @@ -303,6 +303,7 @@ "github.com/mafredri/cdp/protocol/dom", "github.com/mafredri/cdp/protocol/emulation", "github.com/mafredri/cdp/protocol/input", + "github.com/mafredri/cdp/protocol/network", "github.com/mafredri/cdp/protocol/page", "github.com/mafredri/cdp/protocol/runtime", "github.com/mafredri/cdp/protocol/target", diff --git a/pkg/drivers/cdp/helpers.go b/pkg/drivers/cdp/helpers.go index ad15f6e1..ac238e54 100644 --- a/pkg/drivers/cdp/helpers.go +++ b/pkg/drivers/cdp/helpers.go @@ -4,13 +4,11 @@ import ( "bytes" "context" "errors" - "github.com/MontFerret/ferret/pkg/drivers" - "github.com/mafredri/cdp/protocol/network" "math" - "net/http" "strings" "time" + "github.com/MontFerret/ferret/pkg/drivers" "github.com/MontFerret/ferret/pkg/drivers/cdp/eval" "github.com/MontFerret/ferret/pkg/drivers/cdp/events" "github.com/MontFerret/ferret/pkg/drivers/common" @@ -18,6 +16,7 @@ import ( "github.com/PuerkitoBio/goquery" "github.com/mafredri/cdp" "github.com/mafredri/cdp/protocol/dom" + "github.com/mafredri/cdp/protocol/network" "github.com/mafredri/cdp/protocol/page" "github.com/mafredri/cdp/protocol/runtime" "golang.org/x/sync/errgroup" @@ -413,9 +412,9 @@ func fromDriverCookie(url string, cookie drivers.HTTPCookie) network.CookieParam sameSite := network.CookieSameSiteNotSet switch cookie.SameSite { - case http.SameSiteLaxMode: + case drivers.SameSiteLaxMode: sameSite = network.CookieSameSiteLax - case http.SameSiteStrictMode: + case drivers.SameSiteStrictMode: sameSite = network.CookieSameSiteStrict default: sameSite = network.CookieSameSiteNotSet @@ -452,17 +451,17 @@ func fromDriverCookieDelete(url string, cookie drivers.HTTPCookie) *network.Dele } func toDriverCookie(c network.Cookie) drivers.HTTPCookie { - sameSite := http.SameSiteDefaultMode + sameSite := drivers.SameSiteDefaultMode switch c.SameSite { case network.CookieSameSiteLax: - sameSite = http.SameSiteLaxMode + sameSite = drivers.SameSiteLaxMode break case network.CookieSameSiteStrict: - sameSite = http.SameSiteStrictMode + sameSite = drivers.SameSiteStrictMode break default: - sameSite = http.SameSiteDefaultMode + sameSite = drivers.SameSiteDefaultMode break } diff --git a/pkg/drivers/cookie.go b/pkg/drivers/cookie.go index b7f1a640..b40413d8 100644 --- a/pkg/drivers/cookie.go +++ b/pkg/drivers/cookie.go @@ -5,7 +5,6 @@ import ( "encoding/json" "fmt" "hash/fnv" - "net/http" "strconv" "strings" "time" @@ -15,20 +14,31 @@ import ( "github.com/MontFerret/ferret/pkg/runtime/values/types" ) -// HTTPCookie HTTPCookie object -type HTTPCookie struct { - Name string `json:"name"` - Value string `json:"value"` +type ( + // Polyfill for Go 1.10 + SameSite int - Path string `json:"path"` - Domain string `json:"domain"` - Expires time.Time `json:"expires"` + // HTTPCookie HTTPCookie object + HTTPCookie struct { + Name string `json:"name"` + Value string `json:"value"` - MaxAge int `json:"max_age"` - Secure bool `json:"secure"` - HTTPOnly bool `json:"http_only"` - SameSite http.SameSite `json:"same_site"` -} + Path string `json:"path"` + Domain string `json:"domain"` + Expires time.Time `json:"expires"` + + MaxAge int `json:"max_age"` + Secure bool `json:"secure"` + HTTPOnly bool `json:"http_only"` + SameSite SameSite `json:"same_site"` + } +) + +const ( + SameSiteDefaultMode SameSite = iota + 1 + SameSiteLaxMode + SameSiteStrictMode +) func (c HTTPCookie) Type() core.Type { return HTTPCookieType @@ -162,12 +172,12 @@ func (c HTTPCookie) GetIn(_ context.Context, path []core.Value) (core.Value, err return values.NewBoolean(c.HTTPOnly), nil case "sameSite": switch c.SameSite { - case http.SameSiteLaxMode: + case SameSiteLaxMode: return values.NewString("Lax"), nil - case http.SameSiteStrictMode: + case SameSiteStrictMode: return values.NewString("Strict"), nil default: - return values.NewString("Default"), nil + return values.EmptyString, nil } default: return values.None, nil diff --git a/pkg/stdlib/html/document.go b/pkg/stdlib/html/document.go index 5e10500f..eac99f76 100644 --- a/pkg/stdlib/html/document.go +++ b/pkg/stdlib/html/document.go @@ -2,7 +2,6 @@ package html import ( "context" - "net/http" "strings" "time" @@ -258,13 +257,13 @@ func parseCookie(value core.Value) (drivers.HTTPCookie, error) { switch sameSite { case "lax": - cookie.SameSite = http.SameSiteLaxMode + cookie.SameSite = drivers.SameSiteLaxMode break case "strict": - cookie.SameSite = http.SameSiteStrictMode + cookie.SameSite = drivers.SameSiteStrictMode break default: - cookie.SameSite = http.SameSiteDefaultMode + cookie.SameSite = drivers.SameSiteDefaultMode break } } From ba7ee85edaeead39bcdb20f92604b8e89ee2a80c Mon Sep 17 00:00:00 2001 From: Tim Voronov Date: Fri, 15 Mar 2019 19:33:24 -0400 Subject: [PATCH 12/13] Updated tests --- Makefile | 2 +- e2e/tests/doc_cookie_load_d.fql | 2 +- e2e/tests/doc_cookie_set_d.fql | 2 +- pkg/drivers/cookie.go | 34 +++++++++++++++++++++------------ 4 files changed, 25 insertions(+), 15 deletions(-) diff --git a/Makefile b/Makefile index ffba0024..1cb45b99 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ cover: curl -s https://codecov.io/bash | bash e2e: - go run ${DIR_E2E}/main.go --tests ${DIR_E2E}/tests --pages ${DIR_E2E}/pages + go run ${DIR_E2E}/main.go --tests ${DIR_E2E}/tests --pages ${DIR_E2E}/pages --filter doc_cookie_set* bench: go test -run=XXX -bench=. ${DIR_PKG}/... diff --git a/e2e/tests/doc_cookie_load_d.fql b/e2e/tests/doc_cookie_load_d.fql index 5c49f5bb..492da6e6 100644 --- a/e2e/tests/doc_cookie_load_d.fql +++ b/e2e/tests/doc_cookie_load_d.fql @@ -7,7 +7,7 @@ LET doc = DOCUMENT(url, { }] }) -LET cookiesPath = LENGTH(doc.cookies) > 1 ? "ok" : "false" +LET cookiesPath = LENGTH(doc.cookies) > 0 ? "ok" : "false" LET cookie = COOKIE_GET(doc, "x-e2e") LET expected = "ok test" diff --git a/e2e/tests/doc_cookie_set_d.fql b/e2e/tests/doc_cookie_set_d.fql index 54fdc3e4..89ddd430 100644 --- a/e2e/tests/doc_cookie_set_d.fql +++ b/e2e/tests/doc_cookie_set_d.fql @@ -1,5 +1,5 @@ LET url = @dynamic -LET doc = DOCUMENT("0.0.0.0:8081", { +LET doc = DOCUMENT(@dynamic, { driver: "cdp" }) diff --git a/pkg/drivers/cookie.go b/pkg/drivers/cookie.go index b40413d8..01fa8644 100644 --- a/pkg/drivers/cookie.go +++ b/pkg/drivers/cookie.go @@ -20,17 +20,15 @@ type ( // HTTPCookie HTTPCookie object HTTPCookie struct { - Name string `json:"name"` - Value string `json:"value"` - - Path string `json:"path"` - Domain string `json:"domain"` - Expires time.Time `json:"expires"` - - MaxAge int `json:"max_age"` - Secure bool `json:"secure"` - HTTPOnly bool `json:"http_only"` - SameSite SameSite `json:"same_site"` + Name string + Value string + Path string + Domain string + Expires time.Time + MaxAge int + Secure bool + HTTPOnly bool + SameSite SameSite } ) @@ -131,7 +129,19 @@ func (c HTTPCookie) Copy() core.Value { } func (c HTTPCookie) MarshalJSON() ([]byte, error) { - out, err := json.Marshal(c) + v := map[string]interface{}{ + "name": c.Name, + "value": c.Value, + "path": c.Path, + "domain": c.Domain, + "expires": c.Expires, + "max_age": c.MaxAge, + "secure": c.Secure, + "http_only": c.HTTPOnly, + "same_site": c.SameSite, + } + + out, err := json.Marshal(v) if err != nil { return nil, err From 72cfbb9e27e5262812461d38c1c27ed4c59c389f Mon Sep 17 00:00:00 2001 From: Tim Voronov Date: Fri, 15 Mar 2019 19:37:46 -0400 Subject: [PATCH 13/13] Fixed formatting --- pkg/stdlib/html/lib.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/stdlib/html/lib.go b/pkg/stdlib/html/lib.go index eb90f779..c28d15f5 100644 --- a/pkg/stdlib/html/lib.go +++ b/pkg/stdlib/html/lib.go @@ -17,9 +17,9 @@ func NewLib() map[string]core.Function { "ATTR_GET": AttributeGet, "ATTR_REMOVE": AttributeRemove, "ATTR_SET": AttributeSet, - "COOKIE_DEL": CookieDel, - "COOKIE_GET": CookieGet, - "COOKIE_SET": CookieSet, + "COOKIE_DEL": CookieDel, + "COOKIE_GET": CookieGet, + "COOKIE_SET": CookieSet, "CLICK": Click, "CLICK_ALL": ClickAll, "DOCUMENT": Document,