Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ajout validateur GBFS à la multi-validation #2554

Merged
merged 10 commits into from
Aug 23, 2022
39 changes: 39 additions & 0 deletions apps/transport/lib/jobs/gbfs_multi_validation_job.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
defmodule Transport.Jobs.GBFSMultiValidationDispatcherJob do
@moduledoc """
Job in charge of validating multiple GBFS resources.
"""
use Oban.Worker, max_attempts: 3, tags: ["validation"]
alias Transport.Jobs.GBFSMultiValidationJob
import Ecto.Query

@impl Oban.Worker
def perform(%Oban.Job{}) do
relevant_resources()
|> Enum.map(&(%{resource_id: &1} |> GBFSMultiValidationJob.new()))
|> Oban.insert_all()

:ok
end

def relevant_resources do
DB.Resource.base_query()
|> where([r], r.is_available and r.format == "gbfs")
|> select([r], r.id)
|> DB.Repo.all()
end
end

defmodule Transport.Jobs.GBFSMultiValidationJob do
@moduledoc """
Job in charge of validating a GBFS resource.
"""
use Oban.Worker, max_attempts: 3, tags: ["validation"]
alias Transport.Validators.GBFSValidator

@impl Oban.Worker
def perform(%Oban.Job{args: %{"resource_id" => resource_id}}) do
DB.Resource
|> DB.Repo.get!(resource_id)
|> GBFSValidator.validate_and_save()
end
end
61 changes: 61 additions & 0 deletions apps/transport/lib/validators/gbfs_validator.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
defmodule Transport.Validators.GBFSValidator do
@moduledoc """
Validate and extract metadata for GBFS feed using [the MobilityData GBFS validator](https://gbfs-validator.netlify.app) and our own metadata extractor.
"""
# https://github.com/etalab/transport-site/issues/2390
# Plan to move the other validator here as we deprecate
# the previous validation flow.
alias Transport.Cache.API, as: Cache
alias Transport.Shared.GBFSMetadata.Wrapper, as: GBFSMetadata
@github_repository "MobilityData/gbfs-validator"
@behaviour Transport.Validators.Validator

@impl Transport.Validators.Validator
def validate_and_save(%DB.Resource{url: url, format: "gbfs", id: resource_id}) do
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Prend une ressource en param et non une ResourceHistory car les ressources GBFS ne sont pas historisées (car c'est du temps réel).

result = GBFSMetadata.compute_feed_metadata(url, "https://#{Application.fetch_env!(:transport, :domain_name)}")
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ça prend l'adresse du site web pour déterminer si le PAN est autorisé via CORS. À voir si c'est vraiment utile ou si en réalité on l'utilise pas, c'est dans la classe existante.


%DB.MultiValidation{
validation_timestamp: DateTime.utc_now(),
command: validator_command(),
validated_data_name: url,
validator: validator_name(),
result: Map.fetch!(result, :validation),
metadata: %DB.ResourceMetadata{
metadata: Map.reject(result, fn {key, _val} -> key == :validation end),
resource_id: resource_id
},
resource_id: resource_id,
validator_version: validator_version()
}
|> DB.Repo.insert!()

:ok
end

@impl Transport.Validators.Validator
def validator_name, do: @github_repository

defp validator_command, do: Application.fetch_env!(:transport, :gbfs_validator_url)

@doc """
Fetches the latest commit sha from the `gbfs-validator` GitHub repository to know the validator version.

May be solved by https://github.com/MobilityData/gbfs-validator/issues/77 in the future.
"""
def validator_version do
get_latest_commit_sha = fn ->
%HTTPoison.Response{status_code: 200, body: body} = http_client().get!(github_api_url())
default_branch = Map.fetch!(Jason.decode!(body), "default_branch")

%HTTPoison.Response{status_code: 200, body: body} =
http_client().get!("#{github_api_url()}/commits/#{default_branch}")

Map.fetch!(Jason.decode!(body), "sha")
end

Cache.fetch("#{__MODULE__}::validator_version", get_latest_commit_sha, :timer.minutes(5))
end

defp github_api_url, do: "https://api.github.com/repos/#{@github_repository}"
defp http_client, do: Transport.Shared.Wrapper.HTTPoison.impl()
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
defmodule Transport.Test.Transport.Jobs.GBFSMultiValidationDispatcherJobTest do
use ExUnit.Case, async: true
use Oban.Testing, repo: DB.Repo
import DB.Factory
import Mox
alias Shared.Validation.GBFSValidator.Summary, as: GBFSValidationSummary
alias Transport.Jobs.{GBFSMultiValidationDispatcherJob, GBFSMultiValidationJob}

setup :verify_on_exit!

setup do
:ok = Ecto.Adapters.SQL.Sandbox.checkout(DB.Repo)
end

test "relevant_resources" do
resource = insert(:resource, dataset: insert(:dataset), is_available: true, format: "gbfs")

_unvailable_resource = insert(:resource, dataset: insert(:dataset), is_available: true, format: "csv")
_csv_resource = insert(:resource, dataset: insert(:dataset), is_available: false, format: "gbfs")

assert [resource.id] == GBFSMultiValidationDispatcherJob.relevant_resources()
end

test "enqueues other jobs" do
%DB.Resource{id: resource_id} = insert(:resource, dataset: insert(:dataset), is_available: true, format: "gbfs")

# Non-relevant resource: a CSV
insert(:resource, dataset: insert(:dataset), is_available: true, format: "csv")

assert :ok == perform_job(GBFSMultiValidationDispatcherJob, %{})
assert [%Oban.Job{args: %{"resource_id" => ^resource_id}}] = all_enqueued(worker: GBFSMultiValidationJob)
end

test "validates a GBFS resource" do
%DB.Resource{id: resource_id, url: url} =
resource =
insert(:resource,
dataset: insert(:dataset),
is_available: true,
format: "gbfs",
url: "https://example.com/gbfs.json"
)

assert DB.Resource.is_gbfs?(resource)

validator_version = setup_validator_version_mocks()

Transport.Shared.GBFSMetadata.Mock
|> expect(:compute_feed_metadata, fn ^url, "https://transport.data.gouv.fr" ->
%{
languages: ["fr"],
system_details: %{name: "velhop", timezone: "Europe/Paris"},
ttl: 3600,
types: ["stations"],
versions: ["1.1"],
feeds: ["system_information", "station_information", "station_status"],
validation: %GBFSValidationSummary{
errors_count: 0,
has_errors: false,
version_detected: "1.1",
version_validated: "1.1",
validator: :validator_module
},
has_cors: true,
is_cors_allowed: true
}
end)

assert :ok == perform_job(GBFSMultiValidationJob, %{resource_id: resource_id})

assert %DB.MultiValidation{
metadata: %DB.ResourceMetadata{
metadata: %{
"feeds" => ["system_information", "station_information", "station_status"],
"has_cors" => true,
"is_cors_allowed" => true,
"languages" => ["fr"],
"system_details" => %{"name" => "velhop", "timezone" => "Europe/Paris"},
"ttl" => 3600,
"types" => ["stations"],
"versions" => ["1.1"]
},
resource_id: ^resource_id
},
resource_id: ^resource_id,
result: %{
"errors_count" => 0,
"has_errors" => false,
"validator" => "validator_module",
"version_detected" => "1.1",
"version_validated" => "1.1"
},
validated_data_name: ^url,
command: "https://gbfs-validator.netlify.app/.netlify/functions/validator",
validator: "MobilityData/gbfs-validator",
validator_version: ^validator_version
} = DB.MultiValidation |> DB.Repo.one!() |> DB.Repo.preload(:metadata)
end

defp setup_validator_version_mocks(default_branch \\ "master", sha \\ Ecto.UUID.generate()) do
Transport.HTTPoison.Mock
|> expect(:get!, fn "https://api.github.com/repos/MobilityData/gbfs-validator" ->
%HTTPoison.Response{status_code: 200, body: Jason.encode!(%{"default_branch" => default_branch})}
end)

commits_url = "https://api.github.com/repos/MobilityData/gbfs-validator/commits/#{default_branch}"

Transport.HTTPoison.Mock
|> expect(:get!, fn ^commits_url ->
%HTTPoison.Response{status_code: 200, body: Jason.encode!(%{"sha" => sha})}
end)

sha
end
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
defmodule Transport.Validators.GBFSValidatorTest do
use ExUnit.Case, async: true
import DB.Factory
import Mox
alias Shared.Validation.GBFSValidator.Summary, as: GBFSValidationSummary
alias Transport.Validators.GBFSValidator

setup do
Ecto.Adapters.SQL.Sandbox.checkout(DB.Repo)
end

setup :verify_on_exit!

test "validator_version" do
sha = setup_validator_version_mocks()

assert GBFSValidator.validator_version() == sha
end

test "validate_and_save inserts the expected data in the database" do
%DB.Resource{id: resource_id} =
resource = insert(:resource, url: url = "https://example.com/gbfs.json", format: "gbfs")

assert DB.Resource.is_gbfs?(resource)

validator_version = setup_validator_version_mocks()

Transport.Shared.GBFSMetadata.Mock
|> expect(:compute_feed_metadata, fn ^url, "https://transport.data.gouv.fr" ->
Comment on lines +20 to +21
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On pourrait peut-être simplifier les tests, là c'est du copié collé dans le test du job par rapport au test du validateur.

Peut-être lors de la suppression de la classe de validation dans shared en mettant un behaviour ?

%{
languages: ["fr"],
system_details: %{name: "velhop", timezone: "Europe/Paris"},
ttl: 3600,
types: ["stations"],
versions: ["1.1"],
feeds: ["system_information", "station_information", "station_status"],
validation: %GBFSValidationSummary{
errors_count: 0,
has_errors: false,
version_detected: "1.1",
version_validated: "1.1",
validator: :validator_module
},
has_cors: true,
is_cors_allowed: true
}
end)

GBFSValidator.validate_and_save(resource)

assert %DB.MultiValidation{
metadata: %DB.ResourceMetadata{
metadata: %{
"feeds" => ["system_information", "station_information", "station_status"],
"has_cors" => true,
"is_cors_allowed" => true,
"languages" => ["fr"],
"system_details" => %{"name" => "velhop", "timezone" => "Europe/Paris"},
"ttl" => 3600,
"types" => ["stations"],
"versions" => ["1.1"]
},
resource_id: ^resource_id
},
resource_id: ^resource_id,
result: %{
"errors_count" => 0,
"has_errors" => false,
"validator" => "validator_module",
"version_detected" => "1.1",
"version_validated" => "1.1"
},
validated_data_name: ^url,
command: "https://gbfs-validator.netlify.app/.netlify/functions/validator",
validator: "MobilityData/gbfs-validator",
validator_version: ^validator_version
} = DB.MultiValidation |> DB.Repo.one!() |> DB.Repo.preload(:metadata)
end

defp setup_validator_version_mocks(default_branch \\ "master", sha \\ Ecto.UUID.generate()) do
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Même remarque qu'avant, j'ai repris les tests du validateur.

Transport.HTTPoison.Mock
|> expect(:get!, fn "https://api.github.com/repos/MobilityData/gbfs-validator" ->
%HTTPoison.Response{status_code: 200, body: Jason.encode!(%{"default_branch" => default_branch})}
end)

commits_url = "https://api.github.com/repos/MobilityData/gbfs-validator/commits/#{default_branch}"

Transport.HTTPoison.Mock
|> expect(:get!, fn ^commits_url ->
%HTTPoison.Response{status_code: 200, body: Jason.encode!(%{"sha" => sha})}
end)

sha
end
end
1 change: 1 addition & 0 deletions config/runtime.exs
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ oban_crontab_all_envs =
{"30 */6 * * *", Transport.Jobs.BNLCToGeoData},
{"15 10 * * *", Transport.Jobs.DatabaseBackupReplicationJob},
{"0 7 * * *", Transport.Jobs.GTFSRTMultiValidationDispatcherJob},
{"30 7 * * *", Transport.Jobs.GBFSMultiValidationDispatcherJob},
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

30mn plus tard que la validation GTFS-RT. Dites-moi ce que vous en pensez en terme de fréquence.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

je pense qu'on va adapter par la suite si la cette fréquence pose problème.

{"45 */3 * * *", Transport.Jobs.ResourceHistoryJSONSchemaValidationJob},
{"15 */3 * * *", Transport.Jobs.ResourceHistoryTableSchemaValidationJob}
]
Expand Down