Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DNM] Add more FailPoints in critical code paths #4853

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions dbms/src/Common/FailPoint.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,13 +100,18 @@ std::unordered_map<String, std::shared_ptr<FailPointChannel>> FailPointHelper::f
M(pause_when_altering_dt_store) \
M(pause_after_copr_streams_acquired)

#define APPLY_FOR_RANDOM_FAILPOINTS_ENABLED_OUTSIDE(M) \
M(random_tunnel_failpoint) \
M(random_receiver_failpoint)

Comment on lines +103 to +106
Copy link
Contributor

@JaySon-Huang JaySon-Huang May 10, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe you should declare these two failpoints in APPLY_FOR_FAILPOINTS, otherwise you can not get them enabled by using FailPointHelper::enableFailPoint

#define SUB_M(NAME, flags) \
if (fail_point_name == FailPoints::NAME) \
{ \
/* FIU_ONETIME -- Only fail once; the point of failure will be automatically disabled afterwards.*/ \
fiu_enable(FailPoints::NAME, 1, nullptr, flags); \
return; \
}
#define M(NAME) SUB_M(NAME, FIU_ONETIME)
APPLY_FOR_FAILPOINTS_ONCE(M)
#undef M
#define M(NAME) SUB_M(NAME, 0)
APPLY_FOR_FAILPOINTS(M)
#undef M
#undef SUB_M

Copy link
Contributor Author

@yibin87 yibin87 May 11, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah,I planned to enable these random failpoints by fiu-run and fiu-ctl, like fiu-run -x -c 'enable_random name=random_tunnel_failpoints,probability=0.01' tiflash xxx, in big-cluster test to see if tiflash handled correctly(don't crash and no resource leak), not used by gtest.
Now, I'll add it to be used by gtest also.

namespace FailPoints
{
#define M(NAME) extern const char(NAME)[] = #NAME "";
APPLY_FOR_FAILPOINTS_ONCE(M)
APPLY_FOR_FAILPOINTS(M)
APPLY_FOR_FAILPOINTS_ONCE_WITH_CHANNEL(M)
APPLY_FOR_FAILPOINTS_WITH_CHANNEL(M)
APPLY_FOR_RANDOM_FAILPOINTS_ENABLED_OUTSIDE(M)
#undef M
} // namespace FailPoints

Expand Down
17 changes: 13 additions & 4 deletions dbms/src/Common/MyTime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -992,6 +992,15 @@ int calcDayNum(int year, int month, int day)
return delsum + year / 4 - temp;
}

UInt64 calcSeconds(int year, int month, int day, int hour, int minute, int second)
{
if (year == 0 && month == 0)
return 0;
Int32 current_days = calcDayNum(year, month, day);
return current_days * MyTimeBase::SECOND_IN_ONE_DAY + hour * MyTimeBase::SECOND_IN_ONE_HOUR
+ minute * MyTimeBase::SECOND_IN_ONE_MINUTE + second;
}

size_t maxFormattedDateTimeStringLength(const String & format)
{
size_t result = 0;
Expand Down Expand Up @@ -1142,7 +1151,7 @@ UInt64 addSeconds(UInt64 t, Int64 delta)
return t;
}
MyDateTime my_time(t);
Int64 current_second = my_time.hour * 3600 + my_time.minute * 60 + my_time.second;
Int64 current_second = my_time.hour * MyTimeBase::SECOND_IN_ONE_HOUR + my_time.minute * MyTimeBase::SECOND_IN_ONE_MINUTE + my_time.second;
current_second += delta;
if (current_second >= 0)
{
Expand All @@ -1161,9 +1170,9 @@ UInt64 addSeconds(UInt64 t, Int64 delta)
current_second += days * MyTimeBase::SECOND_IN_ONE_DAY;
addDays(my_time, -days);
}
my_time.hour = current_second / 3600;
my_time.minute = (current_second % 3600) / 60;
my_time.second = current_second % 60;
my_time.hour = current_second / MyTimeBase::SECOND_IN_ONE_HOUR;
my_time.minute = (current_second % MyTimeBase::SECOND_IN_ONE_HOUR) / MyTimeBase::SECOND_IN_ONE_MINUTE;
my_time.second = current_second % MyTimeBase::SECOND_IN_ONE_MINUTE;
return my_time.toPackedUInt();
}

Expand Down
6 changes: 6 additions & 0 deletions dbms/src/Common/MyTime.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ namespace DB
struct MyTimeBase
{
static constexpr Int64 SECOND_IN_ONE_DAY = 86400;
static constexpr Int64 SECOND_IN_ONE_HOUR = 3600;
static constexpr Int64 SECOND_IN_ONE_MINUTE = 60;


// copied from https://github.com/pingcap/tidb/blob/master/types/time.go
// Core time bit fields.
Expand Down Expand Up @@ -193,6 +196,9 @@ std::pair<time_t, UInt32> roundTimeByFsp(time_t second, UInt64 nano_second, UInt

int calcDayNum(int year, int month, int day);

// returns seconds since '0000-00-00'
UInt64 calcSeconds(int year, int month, int day, int hour, int minute, int second);

size_t maxFormattedDateTimeStringLength(const String & format);

inline time_t getEpochSecond(const MyDateTime & my_time, const DateLUTImpl & time_zone)
Expand Down
2 changes: 1 addition & 1 deletion dbms/src/Flash/Coprocessor/DAGUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -573,7 +573,7 @@ const std::unordered_map<tipb::ScalarFuncSig, String> scalar_func_map({
//{tipb::ScalarFuncSig::TimeToSec, "cast"},
//{tipb::ScalarFuncSig::TimestampAdd, "cast"},
//{tipb::ScalarFuncSig::ToDays, "cast"},
//{tipb::ScalarFuncSig::ToSeconds, "cast"},
{tipb::ScalarFuncSig::ToSeconds, "tidbToSeconds"},
//{tipb::ScalarFuncSig::UTCTimeWithArg, "cast"},
//{tipb::ScalarFuncSig::UTCTimestampWithoutArg, "cast"},
//{tipb::ScalarFuncSig::Timestamp1Arg, "cast"},
Expand Down
8 changes: 8 additions & 0 deletions dbms/src/Flash/EstablishCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,18 @@
// limitations under the License.

#include <Common/TiFlashMetrics.h>
#include <Common/FailPoint.h>
#include <Flash/EstablishCall.h>
#include <Flash/FlashService.h>
#include <Flash/Mpp/Utils.h>

namespace DB
{
namespace FailPoints
{
extern const char random_tunnel_failpoint[];
} // namespace FailPoints

EstablishCallData::EstablishCallData(AsyncFlashService * service, grpc::ServerCompletionQueue * cq, grpc::ServerCompletionQueue * notify_cq, const std::shared_ptr<std::atomic<bool>> & is_shutdown)
: service(service)
, cq(cq)
Expand Down Expand Up @@ -60,6 +66,7 @@ void EstablishCallData::tryFlushOne()

void EstablishCallData::responderFinish(const grpc::Status & status)
{
FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::random_tunnel_failpoint);
if (*is_shutdown)
finishTunnelAndResponder();
else
Expand All @@ -71,6 +78,7 @@ void EstablishCallData::initRpc()
std::exception_ptr eptr = nullptr;
try
{
FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::random_tunnel_failpoint);
service->establishMPPConnectionSyncOrAsync(&ctx, &request, nullptr, this);
}
catch (...)
Expand Down
30 changes: 29 additions & 1 deletion dbms/src/Flash/Mpp/ExchangeReceiver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,24 @@
#include <Common/CPUAffinityManager.h>
#include <Common/ThreadFactory.h>
#include <Common/TiFlashMetrics.h>
#include <Common/FailPoint.h>
#include <Flash/Coprocessor/CoprocessorReader.h>
#include <Flash/Mpp/ExchangeReceiver.h>
#include <Flash/Mpp/MPPTunnel.h>
#include <fmt/core.h>

#ifdef FIU_ENABLE
#include <Common/randomSeed.h>
#include <pcg_random.hpp>
#endif

namespace DB
{
namespace FailPoints
{
extern const char random_receiver_failpoint[];
} // namespace FailPoints

namespace
{
String getReceiverStateStr(const ExchangeReceiverState & s)
Expand Down Expand Up @@ -470,13 +481,30 @@ void ExchangeReceiverBase<RPCContext>::readLoop(const Request & req)
recv_msg->req_info = req_info;
recv_msg->source_index = req.source_index;
bool success = reader->read(recv_msg->packet);
fiu_do_on(FailPoints::random_receiver_failpoint, {
// Since the code will run very frequently, then other failpoint might have no chance to trigger
// so internally low down the possibility to 1/100
pcg64 rng(randomSeed());
int num = std::uniform_int_distribution(0, 100)(rng);
if (num == 11)
success = false;
});
if (!success)
break;
has_data = true;
if (recv_msg->packet->has_error())
throw Exception("Exchange receiver meet error : " + recv_msg->packet->error().msg());

if (!msg_channel.push(std::move(recv_msg)))
bool push_success = msg_channel.push(std::move(recv_msg));
fiu_do_on(FailPoints::random_receiver_failpoint, {
// Since the code will run very frequently, then other failpoint might have no chance to trigger
// so internally low down the possibility to 1/100
pcg64 rng(randomSeed());
int num = std::uniform_int_distribution(0, 100)(rng);
if (num == 71)
push_success = false;
});
if (!push_success)
{
meet_error = true;
auto local_state = getState();
Expand Down
10 changes: 9 additions & 1 deletion dbms/src/Flash/Mpp/GRPCReceiverContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
// limitations under the License.

#include <Common/Exception.h>
#include <Common/FailPoint.h>
#include <Flash/Mpp/GRPCCompletionQueuePool.h>
#include <Flash/Mpp/GRPCReceiverContext.h>

Expand Down Expand Up @@ -50,6 +51,11 @@ struct RpcTypeTraits<::mpp::EstablishMPPConnectionRequest>

namespace DB
{
namespace FailPoints
{
extern const char random_receiver_failpoint[];
} // namespace FailPoints

namespace
{
struct GrpcExchangePacketReader : public ExchangePacketReader
Expand Down Expand Up @@ -218,7 +224,9 @@ ExchangePacketReaderPtr GRPCReceiverContext::makeReader(const ExchangeRecvReques
if (request.is_local)
{
auto [tunnel, status] = establishMPPConnectionLocal(request.req.get(), task_manager);
if (!status.ok())
bool status_ok = status.ok();
fiu_do_on(FailPoints::random_receiver_failpoint, status_ok = false;);
if (!status_ok)
{
throw Exception("Exchange receiver meet error : " + status.error_message());
}
Expand Down
19 changes: 18 additions & 1 deletion dbms/src/Flash/Mpp/MPPTunnel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,22 @@
#include <Common/FailPoint.h>
#include <Common/ThreadFactory.h>
#include <Common/TiFlashMetrics.h>
#include <Common/randomSeed.h>
#include <Flash/Mpp/MPPTunnel.h>
#include <Flash/Mpp/Utils.h>
#include <fmt/core.h>

#ifdef FIU_ENABLE
#include <Common/randomSeed.h>
#include <pcg_random.hpp>
#endif

namespace DB
{
namespace FailPoints
{
extern const char exception_during_mpp_close_tunnel[];
extern const char random_tunnel_failpoint[];
} // namespace FailPoints

template <typename Writer>
Expand Down Expand Up @@ -194,7 +201,16 @@ void MPPTunnelBase<Writer>::sendJob(bool need_lock)
MPPDataPacketPtr res;
while (send_queue.pop(res))
{
if (!writer->write(*res))
bool write_success = writer->write(*res);
fiu_do_on(FailPoints::random_tunnel_failpoint, {
// Since the code will run very frequently, then other failpoint might have no chance to trigger
// so internally low down the possibility to 1/100
pcg64 rng(randomSeed());
int num = std::uniform_int_distribution(0, 100)(rng);
if (num == 17)
write_success = false;
});
if (!write_success)
{
err_msg = "grpc writes failed.";
break;
Expand Down Expand Up @@ -322,6 +338,7 @@ void MPPTunnelBase<Writer>::waitUntilConnectedOrFinished(std::unique_lock<std::m
auto res = cv_for_connected_or_finished.wait_for(lk, timeout, connected_or_finished);
LOG_FMT_TRACE(log, "end waitUntilConnectedOrFinished");

fiu_do_on(FailPoints::random_tunnel_failpoint, res = false;);
if (!res)
throw Exception(tunnel_id + " is timeout");
}
Expand Down
1 change: 1 addition & 0 deletions dbms/src/Functions/FunctionsDateTime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ void registerFunctionsDateTime(FunctionFactory & factory)
factory.registerFunction<FunctionToTiDBDayOfWeek>();
factory.registerFunction<FunctionToTiDBDayOfYear>();
factory.registerFunction<FunctionToTiDBWeekOfYear>();
factory.registerFunction<FunctionToTiDBToSeconds>();

factory.registerFunction<FunctionToTimeZone>();
factory.registerFunction<FunctionToLastDay>();
Expand Down
37 changes: 37 additions & 0 deletions dbms/src/Functions/FunctionsDateTime.h
Original file line number Diff line number Diff line change
Expand Up @@ -3277,6 +3277,42 @@ struct TiDBWeekOfYearTransformerImpl
}
};

template <typename ToFieldType>
struct TiDBToSecondsTransformerImpl
{
static constexpr auto name = "tidbToSeconds";

static void execute(const Context & context,
const ColumnVector<DataTypeMyTimeBase::FieldType>::Container & vec_from,
typename ColumnVector<ToFieldType>::Container & vec_to,
typename ColumnVector<UInt8>::Container & vec_null_map)
{
bool is_null = false;
for (size_t i = 0; i < vec_from.size(); ++i)
{
MyTimeBase val(vec_from[i]);
vec_to[i] = execute(context, val, is_null);
vec_null_map[i] = is_null;
is_null = false;
}
}

static ToFieldType execute(const Context & context, const MyTimeBase & val, bool & is_null)
{
// TiDB returns normal value if one of month/day is zero for to_seconds function, while MySQL return null if either of them is zero.
// TiFlash aligns with MySQL to align the behavior with other functions like last_day.
if (val.month == 0 || val.day == 0)
{
context.getDAGContext()->handleInvalidTime(
fmt::format("Invalid time value: month({}) or day({}) is zero", val.month, val.day),
Errors::Types::WrongValue);
is_null = true;
return 0;
}
return static_cast<ToFieldType>(calcSeconds(val.year, val.month, val.day, val.hour, val.minute, val.second));
}
};

// Similar to FunctionDateOrDateTimeToSomething, but also handle nullable result and mysql sql mode.
template <typename ToDataType, template <typename> class Transformer, bool return_nullable>
class FunctionMyDateOrMyDateTimeToSomething : public IFunction
Expand Down Expand Up @@ -3376,6 +3412,7 @@ using FunctionToLastDay = FunctionMyDateOrMyDateTimeToSomething<DataTypeMyDate,
using FunctionToTiDBDayOfWeek = FunctionMyDateOrMyDateTimeToSomething<DataTypeUInt16, TiDBDayOfWeekTransformerImpl, return_nullable>;
using FunctionToTiDBDayOfYear = FunctionMyDateOrMyDateTimeToSomething<DataTypeUInt16, TiDBDayOfYearTransformerImpl, return_nullable>;
using FunctionToTiDBWeekOfYear = FunctionMyDateOrMyDateTimeToSomething<DataTypeUInt16, TiDBWeekOfYearTransformerImpl, return_nullable>;
using FunctionToTiDBToSeconds = FunctionMyDateOrMyDateTimeToSomething<DataTypeUInt64, TiDBToSecondsTransformerImpl, return_nullable>;

using FunctionToRelativeYearNum = FunctionDateOrDateTimeToSomething<DataTypeUInt16, ToRelativeYearNumImpl>;
using FunctionToRelativeQuarterNum = FunctionDateOrDateTimeToSomething<DataTypeUInt32, ToRelativeQuarterNumImpl>;
Expand Down
Loading