Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handling non 200 http responses #593

Merged
merged 1 commit into from
Mar 5, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions examples/non-200.fql
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
LET p = DOCUMENT('https://www.g2.com/categories', {
ignore: {
statusCodes: [
{
code: 403
}
]
}
})

RETURN p.response.statusCode
6 changes: 3 additions & 3 deletions pkg/drivers/cdp/page.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,9 @@ func LoadHTMLPage(
netOpts.Cookies[params.URL] = params.Cookies
}

if params.Disable != nil {
if len(params.Disable.Resources) > 0 {
netOpts.Filter.Patterns = params.Disable.Resources
if params.Ignore != nil {
if len(params.Ignore.Resources) > 0 {
netOpts.Filter.Patterns = params.Ignore.Resources
}
}

Expand Down
51 changes: 47 additions & 4 deletions pkg/drivers/http/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package http
import (
"bytes"
"context"
"github.com/gobwas/glob"
"net/http"
"net/url"

Expand Down Expand Up @@ -171,7 +172,13 @@ func (drv *Driver) Open(ctx context.Context, params drivers.Params) (drivers.HTM

defer resp.Body.Close()

if !drv.responseCodeAllowed(resp) {
var queryFilters []drivers.StatusCodeFilter

if params.Ignore != nil {
queryFilters = params.Ignore.StatusCodes
}

if !drv.responseCodeAllowed(resp, queryFilters) {
return nil, errors.New(resp.Status)
}

Expand Down Expand Up @@ -214,7 +221,43 @@ func (drv *Driver) Close() error {
return nil
}

func (drv *Driver) responseCodeAllowed(resp *http.Response) bool {
_, exists := drv.options.AllowedHTTPCodes[resp.StatusCode]
return exists
func (drv *Driver) responseCodeAllowed(resp *http.Response, additional []drivers.StatusCodeFilter) bool {
var allowed bool
reqURL := resp.Request.URL.String()

// OK is by default
if resp.StatusCode >= 200 && resp.StatusCode <= 299 {
return true
}

// Try to use those that are passed within a query
for _, filter := range additional {
allowed = filter.Code == resp.StatusCode

// check url
if allowed && filter.URL != "" {
allowed = glob.MustCompile(filter.URL).Match(reqURL)
}

if allowed {
break
}
}

// if still not allowed, try the default ones
if !allowed {
for _, filter := range drv.options.HTTPCodesFilter {
allowed = filter.Code == resp.StatusCode

if allowed && filter.URL != nil {
allowed = filter.URL.Match(reqURL)
}

if allowed {
break
}
}
}

return allowed
}
36 changes: 23 additions & 13 deletions pkg/drivers/http/options.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package http

import (
"github.com/gobwas/glob"
stdhttp "net/http"

"github.com/MontFerret/ferret/pkg/drivers"
Expand All @@ -10,17 +11,22 @@ import (
type (
Option func(opts *Options)

compiledStatusCodeFilter struct {
URL glob.Glob
Code int
}

Options struct {
Name string
Backoff pester.BackoffStrategy
MaxRetries int
Concurrency int
Proxy string
UserAgent string
Headers drivers.HTTPHeaders
Cookies drivers.HTTPCookies
AllowedHTTPCodes map[int]struct{}
HTTPTransport *stdhttp.Transport
Name string
Backoff pester.BackoffStrategy
MaxRetries int
Concurrency int
Proxy string
UserAgent string
Headers drivers.HTTPHeaders
Cookies drivers.HTTPCookies
HTTPCodesFilter []compiledStatusCodeFilter
HTTPTransport *stdhttp.Transport
}
)

Expand All @@ -30,7 +36,7 @@ func newOptions(setters []Option) *Options {
opts.Backoff = pester.ExponentialBackoff
opts.Concurrency = 3
opts.MaxRetries = 5
opts.AllowedHTTPCodes = map[int]struct{}{stdhttp.StatusOK: struct{}{}}
opts.HTTPCodesFilter = make([]compiledStatusCodeFilter, 0, 5)

for _, setter := range setters {
setter(opts)
Expand Down Expand Up @@ -133,14 +139,18 @@ func WithCookies(cookies []drivers.HTTPCookie) Option {

func WithAllowedHTTPCode(httpCode int) Option {
return func(opts *Options) {
opts.AllowedHTTPCodes[httpCode] = struct{}{}
opts.HTTPCodesFilter = append(opts.HTTPCodesFilter, compiledStatusCodeFilter{
Code: httpCode,
})
}
}

func WithAllowedHTTPCodes(httpCodes []int) Option {
return func(opts *Options) {
for _, code := range httpCodes {
opts.AllowedHTTPCodes[code] = struct{}{}
opts.HTTPCodesFilter = append(opts.HTTPCodesFilter, compiledStatusCodeFilter{
Code: code,
})
}
}
}
Expand Down
12 changes: 9 additions & 3 deletions pkg/drivers/params.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,14 @@ type (
Type string
}

Disable struct {
Resources []ResourceFilter
StatusCodeFilter struct {
URL string
Code int
}

Ignore struct {
Resources []ResourceFilter
StatusCodes []StatusCodeFilter
}

Viewport struct {
Expand All @@ -25,7 +31,7 @@ type (
Cookies HTTPCookies
Headers HTTPHeaders
Viewport *Viewport
Disable *Disable
Ignore *Ignore
}

ParseParams struct {
Expand Down
10 changes: 10 additions & 0 deletions pkg/runtime/values/object.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,16 @@ func (t *Object) MustGet(key String) core.Value {
return val
}

func (t *Object) MustGetOr(key String, defaultValue core.Value) core.Value {
val, found := t.value[string(key)]

if found {
return val
}

return defaultValue
}

func (t *Object) Get(key String) (core.Value, Boolean) {
val, found := t.value[string(key)]

Expand Down
61 changes: 50 additions & 11 deletions pkg/stdlib/html/document.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package html

import (
"context"
"github.com/pkg/errors"
"strings"
"time"

Expand All @@ -27,10 +28,10 @@ type PageLoadParams struct {
// @param {Boolean} [params.keepCookies=False] - Boolean value indicating whether to use cookies from previous sessions i.e. not to open a page in the Incognito mode.
// @param {HTTPCookies} [params.cookies] - Set of HTTP cookies to use during page loading.
// @param {HTTPHeaders} [params.headers] - Set of HTTP headers to use during page loading.
// @param {Object} [params.disable] - Set of parameters to disable some page functionality or behavior.
// @param {Object[]} [params.disable.resources] - Collection of rules to disable resources during page load and navigation.
// @param {String} [params.disable.resources.*.url] - Resource url pattern. If set, requests for matching urls will be blocked. Wildcards ('*' -> zero or more, '?' -> exactly one) are allowed. Escape character is backslash. Omitting is equivalent to "*".
// @param {String} [params.disable.resources.*.type] - Resource type. If set, requests for matching resource types will be blocked.
// @param {Object} [params.ignore] - Set of parameters to ignore some page functionality or behavior.
// @param {Object[]} [params.ignore.resources] - Collection of rules to ignore resources during page load and navigation.
// @param {String} [params.ignore.resources.*.url] - Resource url pattern. If set, requests for matching urls will be blocked. Wildcards ('*' -> zero or more, '?' -> exactly one) are allowed. Escape character is backslash. Omitting is equivalent to "*".
// @param {String} [params.ignore.resources.*.type] - Resource type. If set, requests for matching resource types will be blocked.
// @param {Object} [params.viewport] - Viewport params.
// @param {Int} [params.viewport.height] - Viewport height.
// @param {Int} [params.viewport.width] - Viewport width.
Expand Down Expand Up @@ -191,16 +192,16 @@ func newPageLoadParams(url values.String, arg core.Value) (PageLoadParams, error
res.Viewport = viewport
}

disable, exists := obj.Get(values.NewString("disable"))
ignore, exists := obj.Get(values.NewString("ignore"))

if exists {
disable, err := parseDisable(disable)
ignore, err := parseIgnore(ignore)

if err != nil {
return res, err
}

res.Disable = disable
res.Ignore = ignore
}
case types.String:
res.Driver = arg.(values.String).String()
Expand Down Expand Up @@ -408,16 +409,16 @@ func parseViewport(value core.Value) (*drivers.Viewport, error) {
return res, nil
}

func parseDisable(value core.Value) (*drivers.Disable, error) {
func parseIgnore(value core.Value) (*drivers.Ignore, error) {
if err := core.ValidateType(value, types.Object); err != nil {
return nil, err
}

res := &drivers.Disable{}
res := &drivers.Ignore{}

disable := value.(*values.Object)
ignore := value.(*values.Object)

resources, exists := disable.Get("resources")
resources, exists := ignore.Get("resources")

if exists {
if err := core.ValidateType(resources, types.Array); err != nil {
Expand Down Expand Up @@ -458,5 +459,43 @@ func parseDisable(value core.Value) (*drivers.Disable, error) {
}
}

statusCodes, exists := ignore.Get("statusCodes")

if exists {
if err := core.ValidateType(statusCodes, types.Array); err != nil {
return nil, err
}

statusCodes := statusCodes.(*values.Array)

res.StatusCodes = make([]drivers.StatusCodeFilter, 0, statusCodes.Length())

var e error

statusCodes.ForEach(func(el core.Value, idx int) bool {
if e = core.ValidateType(el, types.Object); e != nil {
return false
}

pattern := el.(*values.Object)

url := pattern.MustGetOr("url", values.NewString(""))
code, codeExists := pattern.Get("code")

// ignore element
if !codeExists {
e = errors.New("http code is required")
return false
}

res.StatusCodes = append(res.StatusCodes, drivers.StatusCodeFilter{
URL: url.String(),
Code: int(values.ToInt(code)),
})

return true
})
}

return res, nil
}