From 8b9e480416bac456453c1791c071a5dcdeaf0a1d Mon Sep 17 00:00:00 2001 From: Marc Worrell Date: Tue, 26 Jul 2022 16:52:15 +0200 Subject: [PATCH] Faster import of resources in mod_ginger_adlib. Also better reporting and debug logging during imports. Fixes for more robustness in m_creative_commons.erl, sparql_client.erl and filter_location_defined.erl --- modules/mod_ginger_adlib/mod_ginger_adlib.erl | 21 ++++++++++++------- .../filters/filter_location_defined.erl | 10 +++++++-- .../models/m_creative_commons.erl | 17 +++++++++++++-- .../mod_ginger_rdf/support/sparql_client.erl | 2 +- 4 files changed, 38 insertions(+), 12 deletions(-) diff --git a/modules/mod_ginger_adlib/mod_ginger_adlib.erl b/modules/mod_ginger_adlib/mod_ginger_adlib.erl index 67ee9d112..f5e50b958 100644 --- a/modules/mod_ginger_adlib/mod_ginger_adlib.erl +++ b/modules/mod_ginger_adlib/mod_ginger_adlib.erl @@ -27,6 +27,7 @@ start_link/1 ]). +-define(ADLIB_LIMIT, 100). -include_lib("zotonic.hrl"). -include_lib("include/ginger_adlib.hrl"). @@ -48,19 +49,28 @@ pull_database_updates(Database, Since, StartFrom, Context) when is_tuple(Since) Format = detect_modification_date_format(Database, "1900-01-01", Context), pull_database_updates(Database, z_datetime:format(Since, Format, Context), StartFrom, Context); pull_database_updates(Database, Since, StartFrom, Context) when is_binary(Since) -> + pull_database_updates_loop(Database, Since, StartFrom, 0, Context). + +pull_database_updates_loop(Database, Since, StartFrom, TotalAcc, Context) -> + lager:debug("mod_ginger_adlib: pulling records modified after ~s from database ~s", + [Since, Database]), Args = [ {database, Database}, {search, <<"modification>=", Since/binary>>} ], - - #search_result{result = Records, total = Total} = z_search:search({adlib, Args}, {StartFrom, 20}, Context), + #search_result{result = Records, total = Total} = z_search:search({adlib, Args}, {StartFrom, ?ADLIB_LIMIT}, Context), + TotalAcc1 = TotalAcc + length(Records), + lager:debug("mod_ginger_adlib: pulled records ~p / ~p modified after ~s from database ~s", + [TotalAcc1, Total, Since, Database]), case Records of [] -> - lager:info("mod_ginger_adlib: Pulled ~p records modified after ~s from database ~s", [Total, Since, Database]), + ?zInfo("mod_ginger_adlib: Pulled total ~p records modified after ~s from database ~s", + [TotalAcc1, Since, Database], + Context), ok; _ -> [z_notifier:notify(adlib_update(Record, Database), Context) || Record <- Records], - pull_database_updates(Database, Since, StartFrom + 20, Context) + pull_database_updates_loop(Database, Since, StartFrom + ?ADLIB_LIMIT, TotalAcc1, Context) end. %% @doc Pull single record update from Adlib @@ -124,7 +134,6 @@ detect_modification_date_format(Database, Since, Context) -> {database, Database}, {search, <<"modification>=", ISO8601/binary>>} ], - case z_search:search({adlib, Args}, {1, 20}, Context) of #search_result{total = undefined} -> %% Try legacy format @@ -138,14 +147,12 @@ start_link(Args) when is_list(Args) -> init(Args) -> {context, Context} = proplists:lookup(context, Args), - case m_config:get(?MODULE, databases, Context) of undefined -> m_config:set_prop(?MODULE, databases, list, [], Context); _Exists -> ok end, - {ok, #state{context=z_context:new(Context)}}. handle_call(Message, _From, State) -> diff --git a/modules/mod_ginger_base/filters/filter_location_defined.erl b/modules/mod_ginger_base/filters/filter_location_defined.erl index 24e893a89..0270cb79c 100644 --- a/modules/mod_ginger_base/filters/filter_location_defined.erl +++ b/modules/mod_ginger_base/filters/filter_location_defined.erl @@ -1,6 +1,6 @@ %% @author Driebit %% @copyright 2015 Driebit -%% @doc unique filter, return a list with unique ids +%% @doc Check if a resource has a location_lat defined. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -19,15 +19,21 @@ -export([location_defined/2]). -include("zotonic.hrl"). +has_location(undefined, _Context) -> + false; has_location(RscId, Context) when is_integer(RscId) -> case m_rsc:p(RscId, location_lat, Context) of undefined -> false; + <<>> -> + false; _Lat -> true end; has_location({RscId, _}, Context) -> - has_location(RscId, Context). + has_location(RscId, Context); +has_location(RscId, Context) -> + has_location(m_rsc:rid(RscId, Context), Context). location_defined(Data, Context) -> List = filter_make_list:make_list(Data, Context), diff --git a/modules/mod_ginger_base/models/m_creative_commons.erl b/modules/mod_ginger_base/models/m_creative_commons.erl index 434f0bb3d..2f2622cfc 100644 --- a/modules/mod_ginger_base/models/m_creative_commons.erl +++ b/modules/mod_ginger_base/models/m_creative_commons.erl @@ -81,7 +81,14 @@ label(<<"by-nc-nd/", _Version/binary>>) -> label(<<"http://creativecommons.org/publicdomain/zero/", _Version/binary>>) -> <<"CC0">>; label(<<"http://creativecommons.org/publicdomain/mark/", _Version/binary>>) -> - <<"PD">>. + <<"PD">>; +label(License) when is_binary(License) -> + case z_string:to_lower(License) of + License -> + undefined; + LowerLic -> + label(LowerLic) + end. %% @doc Get URL to translated license at the Creative Commons website -spec language_url_for(binary(), #context{}) -> binary() | undefined. @@ -103,4 +110,10 @@ versioned_license([<<"BY", _/binary>> = Type, Version]) -> versioned_license([<<"CC0">>, Version]) -> <<"http://creativecommons.org/publicdomain/zero/", Version/binary>>; versioned_license([<<"PD">>, Version]) -> - <<"http://creativecommons.org/publicdomain/mark/", Version/binary>>. + <<"http://creativecommons.org/publicdomain/mark/", Version/binary>>; +versioned_license([<<"BY", _/binary>> = Type]) -> + <<"http://creativecommons.org/licenses/", Type/binary, "/4.0">>; +versioned_license([<<"CC0">>]) -> + <<"http://creativecommons.org/publicdomain/zero/1.0">>; +versioned_license([<<"PD">>]) -> + <<"http://creativecommons.org/publicdomain/mark/1.0">>. diff --git a/modules/mod_ginger_rdf/support/sparql_client.erl b/modules/mod_ginger_rdf/support/sparql_client.erl index 48d0f6b2f..24f3a691d 100644 --- a/modules/mod_ginger_rdf/support/sparql_client.erl +++ b/modules/mod_ginger_rdf/support/sparql_client.erl @@ -78,7 +78,7 @@ get_resource(Endpoint, Uri, Properties) -> case query_rdf(Endpoint, Query) of undefined -> undefined; - [Rdf] -> + [Rdf|_] -> Rdf end.