diff --git a/src/arm/assembler-arm.h b/src/arm/assembler-arm.h index ccb51042068..cbceca70686 100644 --- a/src/arm/assembler-arm.h +++ b/src/arm/assembler-arm.h @@ -366,6 +366,34 @@ struct QwNeonRegister { return r; } + static int ToAllocationIndex(QwNeonRegister reg) { + ASSERT(reg.code() < kMaxNumRegisters); + return reg.code(); + } + + static const char* AllocationIndexToString(int index) { + ASSERT(index >= 0 && index < kMaxNumRegisters); + const char* const names[] = { + "q0", + "q1", + "q2", + "q3", + "q4", + "q5", + "q6", + "q7", + "q8", + "q9", + "q10", + "q11", + "q12", + "q13", + "q14", + "q15", + }; + return names[index]; + } + bool is_valid() const { return (0 <= code_) && (code_ < kMaxNumRegisters); } @@ -385,6 +413,7 @@ struct QwNeonRegister { typedef QwNeonRegister QuadRegister; +typedef QwNeonRegister SIMD128Register; // Support for the VFP registers s0 to s31 (d0 to d15). diff --git a/src/arm/cpu-arm.cc b/src/arm/cpu-arm.cc index 20c6a5dcce3..a04f1413b88 100644 --- a/src/arm/cpu-arm.cc +++ b/src/arm/cpu-arm.cc @@ -56,6 +56,12 @@ bool CPU::SupportsCrankshaft() { } +bool CPU::SupportsSIMD128InCrankshaft() { + // Not Implemented. + return false; +} + + void CPU::FlushICache(void* start, size_t size) { // Nothing to do flushing no instructions. if (size == 0) { diff --git a/src/arm/deoptimizer-arm.cc b/src/arm/deoptimizer-arm.cc index 6031499dbd1..f3f9fd1ee68 100644 --- a/src/arm/deoptimizer-arm.cc +++ b/src/arm/deoptimizer-arm.cc @@ -113,7 +113,7 @@ void Deoptimizer::SetPlatformCompiledStubRegisters( } -void Deoptimizer::CopyDoubleRegisters(FrameDescription* output_frame) { +void Deoptimizer::CopySIMD128Registers(FrameDescription* output_frame) { for (int i = 0; i < DwVfpRegister::kMaxNumRegisters; ++i) { double double_value = input_->GetDoubleRegister(i); output_frame->SetDoubleRegister(i, double_value); @@ -210,7 +210,7 @@ void Deoptimizer::EntryGenerator::Generate() { // Copy VFP registers to // double_registers_[DoubleRegister::kMaxNumAllocatableRegisters] - int double_regs_offset = FrameDescription::double_registers_offset(); + int double_regs_offset = FrameDescription::simd128_registers_offset(); for (int i = 0; i < DwVfpRegister::kMaxNumAllocatableRegisters; ++i) { int dst_offset = i * kDoubleSize + double_regs_offset; int src_offset = i * kDoubleSize + kNumberOfRegisters * kPointerSize; @@ -284,7 +284,7 @@ void Deoptimizer::EntryGenerator::Generate() { __ CheckFor32DRegs(ip); __ ldr(r1, MemOperand(r0, Deoptimizer::input_offset())); - int src_offset = FrameDescription::double_registers_offset(); + int src_offset = FrameDescription::simd128_registers_offset(); for (int i = 0; i < DwVfpRegister::kMaxNumRegisters; ++i) { if (i == kDoubleRegZero.code()) continue; if (i == kScratchDoubleReg.code()) continue; @@ -350,6 +350,18 @@ void FrameDescription::SetCallerFp(unsigned offset, intptr_t value) { } +double FrameDescription::GetDoubleRegister(unsigned n) const { + ASSERT(n < 2 * ARRAY_SIZE(simd128_registers_)); + return simd128_registers_[n / 2].d[n % 2]; +} + + +void FrameDescription::SetDoubleRegister(unsigned n, double value) { + ASSERT(n < 2 * ARRAY_SIZE(simd128_registers_)); + simd128_registers_[n / 2].d[n % 2] = value; +} + + #undef __ } } // namespace v8::internal diff --git a/src/cpu.h b/src/cpu.h index b2e9f7da7ee..b43e618a0d9 100644 --- a/src/cpu.h +++ b/src/cpu.h @@ -107,6 +107,8 @@ class CPU V8_FINAL BASE_EMBEDDED { static bool SupportsCrankshaft(); + static bool SupportsSIMD128InCrankshaft(); + // Flush instruction cache. static void FlushICache(void* start, size_t size); diff --git a/src/deoptimizer.cc b/src/deoptimizer.cc index 9e7e113ed9a..4584990ed53 100644 --- a/src/deoptimizer.cc +++ b/src/deoptimizer.cc @@ -1711,7 +1711,7 @@ void Deoptimizer::DoComputeCompiledStubFrame(TranslationIterator* iterator, } // Copy the double registers from the input into the output frame. - CopyDoubleRegisters(output_frame); + CopySIMD128Registers(output_frame); // Fill registers containing handler and number of parameters. SetPlatformCompiledStubRegisters(output_frame, descriptor); @@ -1864,6 +1864,43 @@ void Deoptimizer::MaterializeHeapObjects(JavaScriptFrameIterator* it) { Memory::Object_at(d.destination()) = *num; } + // Materialize all float32x4 before looking at arguments because when the + // output frames are used to materialize arguments objects later on they need + // to already contain valid float32x4 values. + for (int i = 0; i < deferred_float32x4s_.length(); i++) { + SIMD128MaterializationDescriptor
d = deferred_float32x4s_[i]; + float32x4_value_t x4 = d.value().f4; + Handle float32x4 = isolate_->factory()->NewFloat32x4(x4); + if (trace_scope_ != NULL) { + PrintF(trace_scope_->file(), + "Materialized a new float32x4 %p " + "[float32x4(%e, %e, %e, %e)] in slot %p\n", + reinterpret_cast(*float32x4), + x4.storage[0], x4.storage[1], x4.storage[2], x4.storage[3], + d.destination()); + } + Memory::Object_at(d.destination()) = *float32x4; + } + + // Materialize all int32x4 before looking at arguments because when the + // output frames are used to materialize arguments objects later on they need + // to already contain valid int32x4 values. + for (int i = 0; i < deferred_int32x4s_.length(); i++) { + SIMD128MaterializationDescriptor
d = deferred_int32x4s_[i]; + int32x4_value_t x4 = d.value().i4; + Handle int32x4 = isolate_->factory()->NewInt32x4(x4); + if (trace_scope_ != NULL) { + PrintF(trace_scope_->file(), + "Materialized a new int32x4 %p " + "[int32x4(%u, %u, %u, %u)] in slot %p\n", + reinterpret_cast(*int32x4), + x4.storage[0], x4.storage[1], x4.storage[2], x4.storage[3], + d.destination()); + } + Memory::Object_at(d.destination()) = *int32x4; + } + + // Materialize all heap numbers required for arguments/captured objects. for (int i = 0; i < deferred_objects_double_values_.length(); i++) { HeapNumberMaterializationDescriptor d = @@ -1883,6 +1920,48 @@ void Deoptimizer::MaterializeHeapObjects(JavaScriptFrameIterator* it) { // Play it safe and clear all object double values before we continue. deferred_objects_double_values_.Clear(); + // Materialize all float32x4 values required for arguments/captured objects. + for (int i = 0; i < deferred_objects_float32x4_values_.length(); i++) { + SIMD128MaterializationDescriptor d = + deferred_objects_float32x4_values_[i]; + float32x4_value_t x4 = d.value().f4; + Handle float32x4 = isolate_->factory()->NewFloat32x4(x4); + if (trace_scope_ != NULL) { + PrintF(trace_scope_->file(), + "Materialized a new float32x4 %p " + "[float32x4(%e, %e, %e, %e)] for object at %d\n", + reinterpret_cast(*float32x4), + x4.storage[0], x4.storage[1], x4.storage[2], x4.storage[3], + d.destination()); + } + ASSERT(values.at(d.destination())->IsTheHole()); + values.Set(d.destination(), float32x4); + } + + // Play it safe and clear all object float32x4 values before we continue. + deferred_objects_float32x4_values_.Clear(); + + // Materialize all int32x4 values required for arguments/captured objects. + for (int i = 0; i < deferred_objects_int32x4_values_.length(); i++) { + SIMD128MaterializationDescriptor d = + deferred_objects_int32x4_values_[i]; + int32x4_value_t x4 = d.value().i4; + Handle int32x4 = isolate_->factory()->NewInt32x4(x4); + if (trace_scope_ != NULL) { + PrintF(trace_scope_->file(), + "Materialized a new int32x4 %p " + "[int32x4(%u, %u, %u, %u)] for object at %d\n", + reinterpret_cast(*int32x4), + x4.storage[0], x4.storage[1], x4.storage[2], x4.storage[3], + d.destination()); + } + ASSERT(values.at(d.destination())->IsTheHole()); + values.Set(d.destination(), int32x4); + } + + // Play it safe and clear all object int32x4 values before we continue. + deferred_objects_int32x4_values_.Clear(); + // Materialize arguments/captured objects. if (!deferred_objects_.is_empty()) { List > materialized_objects(deferred_objects_.length()); @@ -2094,6 +2173,38 @@ void Deoptimizer::DoTranslateObject(TranslationIterator* iterator, return; } + case Translation::FLOAT32x4_REGISTER: + case Translation::INT32x4_REGISTER: { + int input_reg = iterator->Next(); + simd128_value_t value = input_->GetSIMD128Register(input_reg); + if (trace_scope_ != NULL) { + if (opcode == Translation::FLOAT32x4_REGISTER) { + float32x4_value_t x4 = value.f4; + PrintF(trace_scope_->file(), + " object @0x%08" V8PRIxPTR ": [field #%d] <- ", + reinterpret_cast(object_slot), + field_index); + PrintF(trace_scope_->file(), + "float32x4(%e, %e, %e, %e) ; %s\n", + x4.storage[0], x4.storage[1], x4.storage[2], x4.storage[3], + SIMD128Register::AllocationIndexToString(input_reg)); + } else { + ASSERT(opcode == Translation::INT32x4_REGISTER); + int32x4_value_t x4 = value.i4; + PrintF(trace_scope_->file(), + " object @0x%08" V8PRIxPTR ": [field #%d] <- ", + reinterpret_cast(object_slot), + field_index); + PrintF(trace_scope_->file(), + "int32x4(%u, %u, %u, %u) ; %s\n", + x4.storage[0], x4.storage[1], x4.storage[2], x4.storage[3], + SIMD128Register::AllocationIndexToString(input_reg)); + } + } + AddObjectSIMD128Value(value, opcode); + return; + } + case Translation::STACK_SLOT: { int input_slot_index = iterator->Next(); unsigned input_offset = input_->GetOffsetFromSlotIndex(input_slot_index); @@ -2181,6 +2292,39 @@ void Deoptimizer::DoTranslateObject(TranslationIterator* iterator, return; } + case Translation::FLOAT32x4_STACK_SLOT: + case Translation::INT32x4_STACK_SLOT: { + int input_slot_index = iterator->Next(); + unsigned input_offset = input_->GetOffsetFromSlotIndex(input_slot_index); + simd128_value_t value = input_->GetSIMD128FrameSlot(input_offset); + if (trace_scope_ != NULL) { + if (opcode == Translation::FLOAT32x4_STACK_SLOT) { + float32x4_value_t x4 = value.f4; + PrintF(trace_scope_->file(), + " object @0x%08" V8PRIxPTR ": [field #%d] <- ", + reinterpret_cast(object_slot), + field_index); + PrintF(trace_scope_->file(), + "float32x4(%e, %e, %e, %e) ; [sp + %d]\n", + x4.storage[0], x4.storage[1], x4.storage[2], x4.storage[3], + input_offset); + } else { + ASSERT(opcode == Translation::INT32x4_STACK_SLOT); + int32x4_value_t x4 = value.i4; + PrintF(trace_scope_->file(), + " object @0x%08" V8PRIxPTR ": [field #%d] <- ", + reinterpret_cast(object_slot), + field_index); + PrintF(trace_scope_->file(), + "int32x4(%u, %u, %u, %u) ; [sp + %d]\n", + x4.storage[0], x4.storage[1], x4.storage[2], x4.storage[3], + input_offset); + } + } + AddObjectSIMD128Value(value, opcode); + return; + } + case Translation::LITERAL: { Object* literal = ComputeLiteral(iterator->Next()); if (trace_scope_ != NULL) { @@ -2363,6 +2507,40 @@ void Deoptimizer::DoTranslateCommand(TranslationIterator* iterator, return; } + case Translation::FLOAT32x4_REGISTER: + case Translation::INT32x4_REGISTER: { + int input_reg = iterator->Next(); + simd128_value_t value = input_->GetSIMD128Register(input_reg); + if (trace_scope_ != NULL) { + if (opcode == Translation::FLOAT32x4_REGISTER) { + float32x4_value_t x4 = value.f4; + PrintF(trace_scope_->file(), + " 0x%08" V8PRIxPTR ":" + " [top + %d] <- float32x4(%e, %e, %e, %e) ; %s\n", + output_[frame_index]->GetTop() + output_offset, + output_offset, + x4.storage[0], x4.storage[1], x4.storage[2], x4.storage[3], + SIMD128Register::AllocationIndexToString(input_reg)); + } else { + ASSERT(opcode == Translation::INT32x4_REGISTER); + int32x4_value_t x4 = value.i4; + PrintF(trace_scope_->file(), + " 0x%08" V8PRIxPTR ":" + " [top + %d] <- int32x4(%u, %u, %u, %u) ; %s\n", + output_[frame_index]->GetTop() + output_offset, + output_offset, + x4.storage[0], x4.storage[1], x4.storage[2], x4.storage[3], + SIMD128Register::AllocationIndexToString(input_reg)); + } + } + // We save the untagged value on the side and store a GC-safe + // temporary placeholder in the frame. + AddSIMD128Value(output_[frame_index]->GetTop() + output_offset, value, + opcode); + output_[frame_index]->SetFrameSlot(output_offset, kPlaceholder); + return; + } + case Translation::STACK_SLOT: { int input_slot_index = iterator->Next(); unsigned input_offset = input_->GetOffsetFromSlotIndex(input_slot_index); @@ -2464,6 +2642,41 @@ void Deoptimizer::DoTranslateCommand(TranslationIterator* iterator, return; } + case Translation::FLOAT32x4_STACK_SLOT: + case Translation::INT32x4_STACK_SLOT: { + int input_slot_index = iterator->Next(); + unsigned input_offset = input_->GetOffsetFromSlotIndex(input_slot_index); + simd128_value_t value = input_->GetSIMD128FrameSlot(input_offset); + if (trace_scope_ != NULL) { + if (opcode == Translation::FLOAT32x4_STACK_SLOT) { + float32x4_value_t x4 = value.f4; + PrintF(trace_scope_->file(), + " 0x%08" V8PRIxPTR ": " + "[top + %d] <- float32x4(%e, %e, %e, %e) ; [sp + %d]\n", + output_[frame_index]->GetTop() + output_offset, + output_offset, + x4.storage[0], x4.storage[1], x4.storage[2], x4.storage[3], + input_offset); + } else { + ASSERT(opcode == Translation::INT32x4_STACK_SLOT); + int32x4_value_t x4 = value.i4; + PrintF(trace_scope_->file(), + " 0x%08" V8PRIxPTR ": " + "[top + %d] <- int32x4(%u, %u, %u, %u) ; [sp + %d]\n", + output_[frame_index]->GetTop() + output_offset, + output_offset, + x4.storage[0], x4.storage[1], x4.storage[2], x4.storage[3], + input_offset); + } + } + // We save the untagged value on the side and store a GC-safe + // temporary placeholder in the frame. + AddSIMD128Value(output_[frame_index]->GetTop() + output_offset, value, + opcode); + output_[frame_index]->SetFrameSlot(output_offset, kPlaceholder); + return; + } + case Translation::LITERAL: { Object* literal = ComputeLiteral(iterator->Next()); if (trace_scope_ != NULL) { @@ -2612,6 +2825,24 @@ void Deoptimizer::AddObjectDoubleValue(double value) { } +void Deoptimizer::AddObjectSIMD128Value(simd128_value_t value, + int translation_opcode) { + deferred_objects_tagged_values_.Add(isolate()->heap()->the_hole_value()); + SIMD128MaterializationDescriptor value_desc( + deferred_objects_tagged_values_.length() - 1, value); + Translation::Opcode opcode = + static_cast(translation_opcode); + if (opcode == Translation::FLOAT32x4_REGISTER || + opcode == Translation::FLOAT32x4_STACK_SLOT) { + deferred_objects_float32x4_values_.Add(value_desc); + } else { + ASSERT(opcode == Translation::INT32x4_REGISTER || + opcode == Translation::INT32x4_STACK_SLOT); + deferred_objects_int32x4_values_.Add(value_desc); + } +} + + void Deoptimizer::AddDoubleValue(intptr_t slot_address, double value) { HeapNumberMaterializationDescriptor
value_desc( reinterpret_cast
(slot_address), value); @@ -2619,6 +2850,24 @@ void Deoptimizer::AddDoubleValue(intptr_t slot_address, double value) { } +void Deoptimizer::AddSIMD128Value(intptr_t slot_address, + simd128_value_t value, + int translation_opcode) { + SIMD128MaterializationDescriptor
value_desc( + reinterpret_cast
(slot_address), value); + Translation::Opcode opcode = + static_cast(translation_opcode); + if (opcode == Translation::FLOAT32x4_REGISTER || + opcode == Translation::FLOAT32x4_STACK_SLOT) { + deferred_float32x4s_.Add(value_desc); + } else { + ASSERT(opcode == Translation::INT32x4_REGISTER || + opcode == Translation::INT32x4_STACK_SLOT); + deferred_int32x4s_.Add(value_desc); + } +} + + void Deoptimizer::EnsureCodeForDeoptimizationEntry(Isolate* isolate, BailoutType type, int max_entry_id) { @@ -2860,6 +3109,12 @@ void Translation::StoreDoubleRegister(DoubleRegister reg) { } +void Translation::StoreSIMD128Register(SIMD128Register reg, Opcode opcode) { + buffer_->Add(opcode, zone()); + buffer_->Add(SIMD128Register::ToAllocationIndex(reg), zone()); +} + + void Translation::StoreStackSlot(int index) { buffer_->Add(STACK_SLOT, zone()); buffer_->Add(index, zone()); @@ -2884,6 +3139,12 @@ void Translation::StoreDoubleStackSlot(int index) { } +void Translation::StoreSIMD128StackSlot(int index, Opcode opcode) { + buffer_->Add(opcode, zone()); + buffer_->Add(index, zone()); +} + + void Translation::StoreLiteral(int literal_id) { buffer_->Add(LITERAL, zone()); buffer_->Add(literal_id, zone()); @@ -2911,10 +3172,14 @@ int Translation::NumberOfOperandsFor(Opcode opcode) { case INT32_REGISTER: case UINT32_REGISTER: case DOUBLE_REGISTER: + case FLOAT32x4_REGISTER: + case INT32x4_REGISTER: case STACK_SLOT: case INT32_STACK_SLOT: case UINT32_STACK_SLOT: case DOUBLE_STACK_SLOT: + case FLOAT32x4_STACK_SLOT: + case INT32x4_STACK_SLOT: case LITERAL: case COMPILED_STUB_FRAME: return 1; @@ -2974,6 +3239,8 @@ SlotRef SlotRef::ComputeSlotForNextArgument(TranslationIterator* iterator, case Translation::INT32_REGISTER: case Translation::UINT32_REGISTER: case Translation::DOUBLE_REGISTER: + case Translation::FLOAT32x4_REGISTER: + case Translation::INT32x4_REGISTER: // We are at safepoint which corresponds to call. All registers are // saved by caller so there would be no live registers at this // point. Thus these translation commands should not be used. @@ -3003,6 +3270,18 @@ SlotRef SlotRef::ComputeSlotForNextArgument(TranslationIterator* iterator, return SlotRef(slot_addr, SlotRef::DOUBLE); } + case Translation::FLOAT32x4_STACK_SLOT: { + int slot_index = iterator->Next(); + Address slot_addr = SlotAddress(frame, slot_index); + return SlotRef(slot_addr, SlotRef::FLOAT32x4); + } + + case Translation::INT32x4_STACK_SLOT: { + int slot_index = iterator->Next(); + Address slot_addr = SlotAddress(frame, slot_index); + return SlotRef(slot_addr, SlotRef::INT32x4); + } + case Translation::LITERAL: { int literal_index = iterator->Next(); return SlotRef(data->GetIsolate(), diff --git a/src/deoptimizer.h b/src/deoptimizer.h index aace2208673..5bffff7feb4 100644 --- a/src/deoptimizer.h +++ b/src/deoptimizer.h @@ -55,6 +55,9 @@ static inline double read_double_value(Address p) { #endif // V8_HOST_CAN_READ_UNALIGNED } +static inline simd128_value_t read_simd128_value(Address p) { + return *reinterpret_cast(p); +} class FrameDescription; class TranslationIterator; @@ -75,6 +78,21 @@ class HeapNumberMaterializationDescriptor BASE_EMBEDDED { }; +template +class SIMD128MaterializationDescriptor BASE_EMBEDDED { + public: + SIMD128MaterializationDescriptor(T destination, simd128_value_t value) + : destination_(destination), value_(value) { } + + T destination() const { return destination_; } + simd128_value_t value() const { return value_; } + + private: + T destination_; + simd128_value_t value_; +}; + + class ObjectMaterializationDescriptor BASE_EMBEDDED { public: ObjectMaterializationDescriptor( @@ -349,7 +367,10 @@ class Deoptimizer : public Malloced { void AddObjectDuplication(intptr_t slot, int object_index); void AddObjectTaggedValue(intptr_t value); void AddObjectDoubleValue(double value); + void AddObjectSIMD128Value(simd128_value_t value, int translation_opcode); void AddDoubleValue(intptr_t slot_address, double value); + void AddSIMD128Value(intptr_t slot_address, simd128_value_t value, + int translation_opcode); bool ArgumentsObjectIsAdapted(int object_index) { ObjectMaterializationDescriptor desc = deferred_objects_.at(object_index); @@ -398,9 +419,9 @@ class Deoptimizer : public Malloced { void SetPlatformCompiledStubRegisters(FrameDescription* output_frame, CodeStubInterfaceDescriptor* desc); - // Fill the given output frame's double registers with the original values - // from the input frame's double registers. - void CopyDoubleRegisters(FrameDescription* output_frame); + // Fill the given output frame's simd128 registers with the original values + // from the input frame's simd128 registers. + void CopySIMD128Registers(FrameDescription* output_frame); // Determines whether the input frame contains alignment padding by looking // at the dynamic alignment state slot inside the frame. @@ -432,8 +453,14 @@ class Deoptimizer : public Malloced { List deferred_objects_tagged_values_; List > deferred_objects_double_values_; + List > + deferred_objects_float32x4_values_; + List > + deferred_objects_int32x4_values_; List deferred_objects_; List > deferred_heap_numbers_; + List > deferred_float32x4s_; + List > deferred_int32x4s_; // Output frame information. Only used during heap object materialization. List > jsframe_functions_; @@ -495,6 +522,11 @@ class FrameDescription { return read_double_value(reinterpret_cast
(ptr)); } + simd128_value_t GetSIMD128FrameSlot(unsigned offset) { + intptr_t* ptr = GetFrameSlotPointer(offset); + return read_simd128_value(reinterpret_cast
(ptr)); + } + void SetFrameSlot(unsigned offset, intptr_t value) { *GetFrameSlotPointer(offset) = value; } @@ -516,9 +548,11 @@ class FrameDescription { return registers_[n]; } - double GetDoubleRegister(unsigned n) const { - ASSERT(n < ARRAY_SIZE(double_registers_)); - return double_registers_[n]; + double GetDoubleRegister(unsigned n) const; + + simd128_value_t GetSIMD128Register(unsigned n) const { + ASSERT(n < ARRAY_SIZE(simd128_registers_)); + return simd128_registers_[n]; } void SetRegister(unsigned n, intptr_t value) { @@ -526,9 +560,11 @@ class FrameDescription { registers_[n] = value; } - void SetDoubleRegister(unsigned n, double value) { - ASSERT(n < ARRAY_SIZE(double_registers_)); - double_registers_[n] = value; + void SetDoubleRegister(unsigned n, double value); + + void SetSIMD128Register(unsigned n, simd128_value_t value) { + ASSERT(n < ARRAY_SIZE(simd128_registers_)); + simd128_registers_[n] = value; } intptr_t GetTop() const { return top_; } @@ -572,8 +608,8 @@ class FrameDescription { return OFFSET_OF(FrameDescription, registers_); } - static int double_registers_offset() { - return OFFSET_OF(FrameDescription, double_registers_); + static int simd128_registers_offset() { + return OFFSET_OF(FrameDescription, simd128_registers_); } static int frame_size_offset() { @@ -605,7 +641,7 @@ class FrameDescription { uintptr_t frame_size_; // Number of bytes. JSFunction* function_; intptr_t registers_[Register::kNumRegisters]; - double double_registers_[DoubleRegister::kMaxNumRegisters]; + simd128_value_t simd128_registers_[SIMD128Register::kMaxNumRegisters]; intptr_t top_; intptr_t pc_; intptr_t fp_; @@ -708,10 +744,14 @@ class TranslationIterator BASE_EMBEDDED { V(INT32_REGISTER) \ V(UINT32_REGISTER) \ V(DOUBLE_REGISTER) \ + V(FLOAT32x4_REGISTER) \ + V(INT32x4_REGISTER) \ V(STACK_SLOT) \ V(INT32_STACK_SLOT) \ V(UINT32_STACK_SLOT) \ V(DOUBLE_STACK_SLOT) \ + V(FLOAT32x4_STACK_SLOT) \ + V(INT32x4_STACK_SLOT) \ V(LITERAL) @@ -750,10 +790,12 @@ class Translation BASE_EMBEDDED { void StoreInt32Register(Register reg); void StoreUint32Register(Register reg); void StoreDoubleRegister(DoubleRegister reg); + void StoreSIMD128Register(SIMD128Register reg, Opcode opcode); void StoreStackSlot(int index); void StoreInt32StackSlot(int index); void StoreUint32StackSlot(int index); void StoreDoubleStackSlot(int index); + void StoreSIMD128StackSlot(int index, Opcode opcode); void StoreLiteral(int literal_id); void StoreArgumentsObject(bool args_known, int args_index, int args_length); @@ -783,6 +825,8 @@ class SlotRef BASE_EMBEDDED { INT32, UINT32, DOUBLE, + FLOAT32x4, + INT32x4, LITERAL }; @@ -823,6 +867,14 @@ class SlotRef BASE_EMBEDDED { return isolate->factory()->NewNumber(value); } + case FLOAT32x4: { + return isolate->factory()->NewFloat32x4(read_simd128_value(addr_).f4); + } + + case INT32x4: { + return isolate->factory()->NewInt32x4(read_simd128_value(addr_).i4); + } + case LITERAL: return literal_; diff --git a/src/globals.h b/src/globals.h index c9d2326a7f9..90ca9478610 100644 --- a/src/globals.h +++ b/src/globals.h @@ -227,6 +227,11 @@ typedef byte* Address; struct float32x4_value_t { float storage[4]; }; struct int32x4_value_t { int32_t storage[4]; }; +union simd128_value_t { + double d[2]; + float32x4_value_t f4; + int32x4_value_t i4; +}; const int KB = 1024; const int MB = KB * KB; @@ -253,6 +258,7 @@ const int kDoubleSize = sizeof(double); // NOLINT const int kFloatSize = sizeof(float); // NOLINT const int kFloat32x4Size = sizeof(float32x4_value_t); // NOLINT const int kInt32x4Size = sizeof(int32x4_value_t); // NOLINT +const int kSIMD128Size = sizeof(simd128_value_t); // NOLINT const int kIntptrSize = sizeof(intptr_t); // NOLINT const int kPointerSize = sizeof(void*); // NOLINT const int kRegisterSize = kPointerSize; diff --git a/src/hydrogen-instructions.cc b/src/hydrogen-instructions.cc index 89399db8e41..f7f0c78117a 100644 --- a/src/hydrogen-instructions.cc +++ b/src/hydrogen-instructions.cc @@ -336,6 +336,8 @@ const char* HType::ToString() { case kTaggedNumber: return "number"; case kSmi: return "smi"; case kHeapNumber: return "heap-number"; + case kFloat32x4: return "float32x4"; + case kInt32x4: return "int32x4"; case kString: return "string"; case kBoolean: return "boolean"; case kNonPrimitive: return "non-primitive"; @@ -353,6 +355,10 @@ HType HType::TypeFromValue(Handle value) { result = HType::Smi(); } else if (value->IsHeapNumber()) { result = HType::HeapNumber(); + } else if (value->IsFloat32x4()) { + result = HType::Float32x4(); + } else if (value->IsInt32x4()) { + result = HType::Int32x4(); } else if (value->IsString()) { result = HType::String(); } else if (value->IsBoolean()) { @@ -1205,7 +1211,22 @@ bool HTypeofIsAndBranch::KnownSuccessorBlock(HBasicBlock** block) { *block = SecondSuccessor(); } return true; + } else if (value()->representation().IsFloat32x4()) { + if (compares_float32x4_type()) { + *block = FirstSuccessor(); + } else { + *block = SecondSuccessor(); + } + return true; + } else if (value()->representation().IsInt32x4()) { + if (compares_int32x4_type()) { + *block = FirstSuccessor(); + } else { + *block = SecondSuccessor(); + } + return true; } + *block = NULL; return false; } diff --git a/src/hydrogen-instructions.h b/src/hydrogen-instructions.h index 208cbd24ca9..54cbd9ad96b 100644 --- a/src/hydrogen-instructions.h +++ b/src/hydrogen-instructions.h @@ -314,6 +314,8 @@ class HType V8_FINAL { static HType TaggedNumber() { return HType(kTaggedNumber); } static HType Smi() { return HType(kSmi); } static HType HeapNumber() { return HType(kHeapNumber); } + static HType Float32x4() { return HType(kFloat32x4); } + static HType Int32x4() { return HType(kInt32x4); } static HType String() { return HType(kString); } static HType Boolean() { return HType(kBoolean); } static HType NonPrimitive() { return HType(kNonPrimitive); } @@ -349,12 +351,24 @@ class HType V8_FINAL { return ((type_ & kHeapNumber) == kHeapNumber); } + bool IsFloat32x4() const { + return ((type_ & kFloat32x4) == kFloat32x4); + } + + bool IsInt32x4() const { + return ((type_ & kInt32x4) == kInt32x4); + } + + bool IsSIMD128() const { + return IsFloat32x4() || IsInt32x4(); + } + bool IsString() const { return ((type_ & kString) == kString); } bool IsNonString() const { - return IsTaggedPrimitive() || IsSmi() || IsHeapNumber() || + return IsTaggedPrimitive() || IsSmi() || IsHeapNumber() || IsSIMD128() || IsBoolean() || IsJSArray(); } @@ -375,7 +389,8 @@ class HType V8_FINAL { } bool IsHeapObject() const { - return IsHeapNumber() || IsString() || IsBoolean() || IsNonPrimitive(); + return IsHeapNumber() || IsSIMD128() || IsString() || + IsBoolean() || IsNonPrimitive(); } bool ToStringOrToNumberCanBeObserved(Representation representation) { @@ -384,6 +399,8 @@ class HType V8_FINAL { case kTaggedNumber: // fallthru case kSmi: // fallthru case kHeapNumber: // fallthru + case kFloat32x4: // fallthru + case kInt32x4: // fallthru case kString: // fallthru case kBoolean: return false; @@ -408,11 +425,13 @@ class HType V8_FINAL { kTaggedNumber = 0xd, // 0000 0000 0000 1101 kSmi = 0x1d, // 0000 0000 0001 1101 kHeapNumber = 0x2d, // 0000 0000 0010 1101 - kString = 0x45, // 0000 0000 0100 0101 - kBoolean = 0x85, // 0000 0000 1000 0101 - kNonPrimitive = 0x101, // 0000 0001 0000 0001 - kJSObject = 0x301, // 0000 0011 0000 0001 - kJSArray = 0x701 // 0000 0111 0000 0001 + kFloat32x4 = 0x45, // 0000 0000 0100 0101 + kInt32x4 = 0x85, // 0000 0000 1000 0101 + kString = 0x105, // 0000 0001 0000 0101 + kBoolean = 0x205, // 0000 0010 1000 0101 + kNonPrimitive = 0x401, // 0000 0100 0000 0001 + kJSObject = 0xc01, // 0000 1100 0000 0001 + kJSArray = 0x1c01 // 0001 1100 0000 0001 }; // Make sure type fits in int16. @@ -673,6 +692,8 @@ class HValue : public ZoneObject { HType t = type(); if (t.IsSmi()) return Representation::Smi(); if (t.IsHeapNumber()) return Representation::Double(); + if (t.IsFloat32x4()) return Representation::Float32x4(); + if (t.IsInt32x4()) return Representation::Int32x4(); if (t.IsHeapObject()) return r; return Representation::None(); } @@ -1729,7 +1750,13 @@ class HChange V8_FINAL : public HUnaryOperation { if (value->representation().IsSmi() || value->type().IsSmi()) { set_type(HType::Smi()); } else { - set_type(HType::TaggedNumber()); + if (to.IsFloat32x4()) { + set_type(HType::Float32x4()); + } else if (to.IsInt32x4()) { + set_type(HType::Int32x4()); + } else { + set_type(HType::TaggedNumber()); + } if (to.IsTagged()) SetGVNFlag(kChangesNewSpacePromotion); } } @@ -4628,6 +4655,8 @@ class HTypeofIsAndBranch V8_FINAL : public HUnaryControlInstruction { Handle type_literal() { return type_literal_; } bool compares_number_type() { return compares_number_type_; } + bool compares_float32x4_type() { return compares_float32x4_type_; } + bool compares_int32x4_type() { return compares_int32x4_type_; } virtual void PrintDataTo(StringStream* stream) V8_OVERRIDE; DECLARE_CONCRETE_INSTRUCTION(TypeofIsAndBranch) @@ -4644,10 +4673,14 @@ class HTypeofIsAndBranch V8_FINAL : public HUnaryControlInstruction { type_literal_(type_literal) { Heap* heap = type_literal->GetHeap(); compares_number_type_ = type_literal->Equals(heap->number_string()); + compares_float32x4_type_ = type_literal->Equals(heap->float32x4_string()); + compares_int32x4_type_ = type_literal->Equals(heap->int32x4_string()); } Handle type_literal_; bool compares_number_type_ : 1; + bool compares_float32x4_type_ : 1; + bool compares_int32x4_type_ : 1; }; @@ -6431,13 +6464,12 @@ class HLoadKeyed V8_FINAL elements_kind == FLOAT32_ELEMENTS || elements_kind == FLOAT64_ELEMENTS) { set_representation(Representation::Double()); - } else if (elements_kind == EXTERNAL_FLOAT32x4_ELEMENTS || - elements_kind == FLOAT32x4_ELEMENTS || - elements_kind == EXTERNAL_INT32x4_ELEMENTS || - elements_kind == INT32x4_ELEMENTS) { - // TODO(haitao): Set the representation to Float32x4 or Int32x4 after - // SIMD instructions are added. - set_representation(Representation::Tagged()); + } else if (IsFloat32x4ElementsKind(elements_kind)) { + set_representation(CPU::SupportsSIMD128InCrankshaft() ? + Representation::Float32x4() : Representation::Tagged()); + } else if (IsInt32x4ElementsKind(elements_kind)) { + set_representation(CPU::SupportsSIMD128InCrankshaft() ? + Representation::Int32x4() : Representation::Tagged()); } else { set_representation(Representation::Integer32()); } @@ -6721,16 +6753,17 @@ class HStoreKeyed V8_FINAL } ASSERT_EQ(index, 2); + if (IsDoubleOrFloatElementsKind(elements_kind())) { return Representation::Double(); } - if (IsExternalFloat32x4ElementsKind(elements_kind()) || - IsFixedFloat32x4ElementsKind(elements_kind()) || - IsExternalInt32x4ElementsKind(elements_kind()) || - IsFixedInt32x4ElementsKind(elements_kind())) { - // TODO(haitao): Set the required input representation to Float32x4 or - // Int32x4 after SIMD instructions are added. - return Representation::Tagged(); + if (IsFloat32x4ElementsKind(elements_kind())) { + return CPU::SupportsSIMD128InCrankshaft() ? + Representation::Float32x4() : Representation::Tagged(); + } + if (IsInt32x4ElementsKind(elements_kind())) { + return CPU::SupportsSIMD128InCrankshaft() ? + Representation::Int32x4() : Representation::Tagged(); } if (SmiValuesAre32Bits() && store_mode_ == STORE_TO_INITIALIZED_ENTRY) { return Representation::Integer32(); @@ -6764,13 +6797,13 @@ class HStoreKeyed V8_FINAL if (IsDoubleOrFloatElementsKind(elements_kind())) { return Representation::Double(); } - if (IsExternalFloat32x4ElementsKind(elements_kind()) || - IsFixedFloat32x4ElementsKind(elements_kind()) || - IsExternalInt32x4ElementsKind(elements_kind()) || - IsFixedInt32x4ElementsKind(elements_kind())) { - // TODO(haitao): Set the required input representation to Float32x4 or - // Int32x4 after SIMD instructions are added. - return Representation::Tagged(); + if (IsFloat32x4ElementsKind(elements_kind())) { + return CPU::SupportsSIMD128InCrankshaft() ? + Representation::Float32x4() : Representation::Tagged(); + } + if (IsInt32x4ElementsKind(elements_kind())) { + return CPU::SupportsSIMD128InCrankshaft() ? + Representation::Int32x4() : Representation::Tagged(); } if (SmiValuesAre32Bits() && store_mode_ == STORE_TO_INITIALIZED_ENTRY) { return Representation::Integer32(); diff --git a/src/hydrogen.cc b/src/hydrogen.cc index 3698a322c04..0daf83c9400 100644 --- a/src/hydrogen.cc +++ b/src/hydrogen.cc @@ -10952,6 +10952,12 @@ void HTracer::TraceLiveRange(LiveRange* range, const char* type, if (op->IsDoubleRegister()) { trace_.Add(" \"%s\"", DoubleRegister::AllocationIndexToString(assigned_reg)); + } else if (op->IsFloat32x4Register()) { + trace_.Add(" \"%s\"", + SIMD128Register::AllocationIndexToString(assigned_reg)); + } else if (op->IsInt32x4Register()) { + trace_.Add(" \"%s\"", + SIMD128Register::AllocationIndexToString(assigned_reg)); } else { ASSERT(op->IsRegister()); trace_.Add(" \"%s\"", Register::AllocationIndexToString(assigned_reg)); @@ -10960,6 +10966,10 @@ void HTracer::TraceLiveRange(LiveRange* range, const char* type, LOperand* op = range->TopLevel()->GetSpillOperand(); if (op->IsDoubleStackSlot()) { trace_.Add(" \"double_stack:%d\"", op->index()); + } else if (op->IsFloat32x4StackSlot()) { + trace_.Add(" \"float32x4_stack:%d\"", op->index()); + } else if (op->IsInt32x4StackSlot()) { + trace_.Add(" \"int32x4_stack:%d\"", op->index()); } else { ASSERT(op->IsStackSlot()); trace_.Add(" \"stack:%d\"", op->index()); diff --git a/src/ia32/assembler-ia32.cc b/src/ia32/assembler-ia32.cc index 733432028af..f99f202535e 100644 --- a/src/ia32/assembler-ia32.cc +++ b/src/ia32/assembler-ia32.cc @@ -262,6 +262,52 @@ Operand::Operand(Register index, } +Operand::Operand(const Operand& operand, int32_t offset) { + ASSERT(operand.len_ >= 1); + // Operand encodes REX ModR/M [SIB] [Disp]. + byte modrm = operand.buf_[0]; + ASSERT(modrm < 0xC0); // Disallow mode 3 (register target). + bool has_sib = ((modrm & 0x07) == 0x04); + byte mode = modrm & 0xC0; + int disp_offset = has_sib ? 2 : 1; + int base_reg = (has_sib ? operand.buf_[1] : modrm) & 0x07; + // Mode 0 with rbp/r13 as ModR/M or SIB base register always has a 32-bit + // displacement. + bool is_baseless = (mode == 0) && (base_reg == 0x05); // No base or RIP base. + int32_t disp_value = 0; + if (mode == 0x80 || is_baseless) { + // Mode 2 or mode 0 with rbp/r13 as base: Word displacement. + disp_value = *BitCast(&operand.buf_[disp_offset]); + } else if (mode == 0x40) { + // Mode 1: Byte displacement. + disp_value = static_cast(operand.buf_[disp_offset]); + } + + // Write new operand with same registers, but with modified displacement. + ASSERT(offset >= 0 ? disp_value + offset >= disp_value + : disp_value + offset < disp_value); // No overflow. + disp_value += offset; + if (!is_int8(disp_value) || is_baseless) { + // Need 32 bits of displacement, mode 2 or mode 1 with register rbp/r13. + buf_[0] = (modrm & 0x3f) | (is_baseless ? 0x00 : 0x80); + len_ = disp_offset + 4; + Memory::int32_at(&buf_[disp_offset]) = disp_value; + } else if (disp_value != 0 || (base_reg == 0x05)) { + // Need 8 bits of displacement. + buf_[0] = (modrm & 0x3f) | 0x40; // Mode 1. + len_ = disp_offset + 1; + buf_[disp_offset] = static_cast(disp_value); + } else { + // Need no displacement. + buf_[0] = (modrm & 0x3f); // Mode 0. + len_ = disp_offset; + } + if (has_sib) { + buf_[1] = operand.buf_[1]; + } +} + + bool Operand::is_reg(Register reg) const { return ((buf_[0] & 0xF8) == 0xC0) // addressing mode is register only. && ((buf_[0] & 0x07) == reg.code()); // register codes match. @@ -2232,6 +2278,24 @@ void Assembler::movaps(XMMRegister dst, XMMRegister src) { } +void Assembler::movups(XMMRegister dst, const Operand& src) { + ASSERT(IsEnabled(SSE2)); + EnsureSpace ensure_space(this); + EMIT(0x0F); + EMIT(0x10); + emit_sse_operand(dst, src); +} + + +void Assembler::movups(const Operand& dst, XMMRegister src) { + ASSERT(IsEnabled(SSE2)); + EnsureSpace ensure_space(this); + EMIT(0x0F); + EMIT(0x11); + emit_sse_operand(src, dst); +} + + void Assembler::shufps(XMMRegister dst, XMMRegister src, byte imm8) { ASSERT(IsEnabled(SSE2)); ASSERT(is_uint8(imm8)); diff --git a/src/ia32/assembler-ia32.h b/src/ia32/assembler-ia32.h index 43d6d6868a9..fb58bdc0991 100644 --- a/src/ia32/assembler-ia32.h +++ b/src/ia32/assembler-ia32.h @@ -220,6 +220,9 @@ struct XMMRegister : IntelDoubleRegister { }; +typedef XMMRegister SIMD128Register; + + #define xmm0 (static_cast(double_register_0)) #define xmm1 (static_cast(double_register_1)) #define xmm2 (static_cast(double_register_2)) @@ -411,6 +414,11 @@ class Operand BASE_EMBEDDED { int32_t disp, RelocInfo::Mode rmode = RelocInfo::NONE32); + // Offset from existing memory operand. + // Offset is added to existing displacement as 32-bit signed values and + // this must not overflow. + Operand(const Operand& base, int32_t offset); + static Operand StaticVariable(const ExternalReference& ext) { return Operand(reinterpret_cast(ext.address()), RelocInfo::EXTERNAL_REFERENCE); @@ -1014,6 +1022,8 @@ class Assembler : public AssemblerBase { // SSE instructions void movaps(XMMRegister dst, XMMRegister src); + void movups(XMMRegister dst, const Operand& src); + void movups(const Operand& dst, XMMRegister src); void shufps(XMMRegister dst, XMMRegister src, byte imm8); void andps(XMMRegister dst, const Operand& src); diff --git a/src/ia32/cpu-ia32.cc b/src/ia32/cpu-ia32.cc index 5fb04fc7272..72bdc96b003 100644 --- a/src/ia32/cpu-ia32.cc +++ b/src/ia32/cpu-ia32.cc @@ -51,6 +51,11 @@ bool CPU::SupportsCrankshaft() { } +bool CPU::SupportsSIMD128InCrankshaft() { + return CpuFeatures::IsSupported(SSE2); +} + + void CPU::FlushICache(void* start, size_t size) { // No need to flush the instruction cache on Intel. On Intel instruction // cache flushing is only necessary when multiple cores running the same diff --git a/src/ia32/deoptimizer-ia32.cc b/src/ia32/deoptimizer-ia32.cc index 5300dde9a21..38090087edf 100644 --- a/src/ia32/deoptimizer-ia32.cc +++ b/src/ia32/deoptimizer-ia32.cc @@ -187,8 +187,9 @@ void Deoptimizer::FillInputFrame(Address tos, JavaScriptFrame* frame) { } input_->SetRegister(esp.code(), reinterpret_cast(frame->sp())); input_->SetRegister(ebp.code(), reinterpret_cast(frame->fp())); + simd128_value_t zero = {{0.0, 0.0}}; for (int i = 0; i < DoubleRegister::NumAllocatableRegisters(); i++) { - input_->SetDoubleRegister(i, 0.0); + input_->SetSIMD128Register(i, zero); } // Fill the frame content from the actual data on the frame. @@ -208,11 +209,11 @@ void Deoptimizer::SetPlatformCompiledStubRegisters( } -void Deoptimizer::CopyDoubleRegisters(FrameDescription* output_frame) { +void Deoptimizer::CopySIMD128Registers(FrameDescription* output_frame) { if (!CpuFeatures::IsSupported(SSE2)) return; for (int i = 0; i < XMMRegister::kNumAllocatableRegisters; ++i) { - double double_value = input_->GetDoubleRegister(i); - output_frame->SetDoubleRegister(i, double_value); + simd128_value_t xmm_value = input_->GetSIMD128Register(i); + output_frame->SetSIMD128Register(i, xmm_value); } } @@ -246,22 +247,22 @@ void Deoptimizer::EntryGenerator::Generate() { // Save all general purpose registers before messing with them. const int kNumberOfRegisters = Register::kNumRegisters; - const int kDoubleRegsSize = kDoubleSize * - XMMRegister::kNumAllocatableRegisters; - __ sub(esp, Immediate(kDoubleRegsSize)); + const int kXMMRegsSize = kSIMD128Size * + XMMRegister::kNumAllocatableRegisters; + __ sub(esp, Immediate(kXMMRegsSize)); if (CpuFeatures::IsSupported(SSE2)) { CpuFeatureScope scope(masm(), SSE2); for (int i = 0; i < XMMRegister::kNumAllocatableRegisters; ++i) { XMMRegister xmm_reg = XMMRegister::FromAllocationIndex(i); - int offset = i * kDoubleSize; - __ movsd(Operand(esp, offset), xmm_reg); + int offset = i * kSIMD128Size; + __ movups(Operand(esp, offset), xmm_reg); } } __ pushad(); const int kSavedRegistersAreaSize = kNumberOfRegisters * kPointerSize + - kDoubleRegsSize; + kXMMRegsSize; // Get the bailout id from the stack. __ mov(ebx, Operand(esp, kSavedRegistersAreaSize)); @@ -299,15 +300,15 @@ void Deoptimizer::EntryGenerator::Generate() { __ pop(Operand(ebx, offset)); } - int double_regs_offset = FrameDescription::double_registers_offset(); + int xmm_regs_offset = FrameDescription::simd128_registers_offset(); if (CpuFeatures::IsSupported(SSE2)) { CpuFeatureScope scope(masm(), SSE2); - // Fill in the double input registers. + // Fill in the xmm input registers. for (int i = 0; i < XMMRegister::kNumAllocatableRegisters; ++i) { - int dst_offset = i * kDoubleSize + double_regs_offset; - int src_offset = i * kDoubleSize; - __ movsd(xmm0, Operand(esp, src_offset)); - __ movsd(Operand(ebx, dst_offset), xmm0); + int dst_offset = i * kSIMD128Size + xmm_regs_offset; + int src_offset = i * kSIMD128Size; + __ movups(xmm0, Operand(esp, src_offset)); + __ movups(Operand(ebx, dst_offset), xmm0); } } @@ -317,7 +318,7 @@ void Deoptimizer::EntryGenerator::Generate() { __ fnclex(); // Remove the bailout id, return address and the double registers. - __ add(esp, Immediate(kDoubleRegsSize + 2 * kPointerSize)); + __ add(esp, Immediate(kXMMRegsSize + 2 * kPointerSize)); // Compute a pointer to the unwinding limit in register ecx; that is // the first stack slot not part of the input frame. @@ -391,8 +392,8 @@ void Deoptimizer::EntryGenerator::Generate() { CpuFeatureScope scope(masm(), SSE2); for (int i = 0; i < XMMRegister::kNumAllocatableRegisters; ++i) { XMMRegister xmm_reg = XMMRegister::FromAllocationIndex(i); - int src_offset = i * kDoubleSize + double_regs_offset; - __ movsd(xmm_reg, Operand(ebx, src_offset)); + int src_offset = i * kSIMD128Size + xmm_regs_offset; + __ movups(xmm_reg, Operand(ebx, src_offset)); } } @@ -440,6 +441,18 @@ void FrameDescription::SetCallerFp(unsigned offset, intptr_t value) { } +double FrameDescription::GetDoubleRegister(unsigned n) const { + ASSERT(n < ARRAY_SIZE(simd128_registers_)); + return simd128_registers_[n].d[0]; +} + + +void FrameDescription::SetDoubleRegister(unsigned n, double value) { + ASSERT(n < ARRAY_SIZE(simd128_registers_)); + simd128_registers_[n].d[0] = value; +} + + #undef __ diff --git a/src/ia32/disasm-ia32.cc b/src/ia32/disasm-ia32.cc index 6a7f3bc8377..81746ef3d99 100644 --- a/src/ia32/disasm-ia32.cc +++ b/src/ia32/disasm-ia32.cc @@ -1043,6 +1043,19 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector out_buffer, NameOfXMMRegister(regop), NameOfXMMRegister(rm)); data++; + } else if (f0byte == 0x10) { + data += 2; + int mod, regop, rm; + get_modrm(*data, &mod, ®op, &rm); + AppendToBuffer("movups %s,", NameOfXMMRegister(regop)); + data += PrintRightXMMOperand(data); + } else if (f0byte == 0x11) { + AppendToBuffer("movups "); + data += 2; + int mod, regop, rm; + get_modrm(*data, &mod, ®op, &rm); + data += PrintRightXMMOperand(data); + AppendToBuffer(",%s", NameOfXMMRegister(regop)); } else if (f0byte >= 0x53 && f0byte <= 0x5F) { const char* const pseudo_op[] = { "rcpps", diff --git a/src/ia32/lithium-codegen-ia32.cc b/src/ia32/lithium-codegen-ia32.cc index 2588e900407..66bab87fea2 100644 --- a/src/ia32/lithium-codegen-ia32.cc +++ b/src/ia32/lithium-codegen-ia32.cc @@ -547,6 +547,11 @@ XMMRegister LCodeGen::ToDoubleRegister(int index) const { } +XMMRegister LCodeGen::ToSIMD128Register(int index) const { + return XMMRegister::FromAllocationIndex(index); +} + + void LCodeGen::X87LoadForUsage(X87Register reg) { ASSERT(x87_stack_.Contains(reg)); x87_stack_.Fxch(reg); @@ -769,6 +774,24 @@ XMMRegister LCodeGen::ToDoubleRegister(LOperand* op) const { } +XMMRegister LCodeGen::ToFloat32x4Register(LOperand* op) const { + ASSERT(op->IsFloat32x4Register()); + return ToSIMD128Register(op->index()); +} + + +XMMRegister LCodeGen::ToInt32x4Register(LOperand* op) const { + ASSERT(op->IsInt32x4Register()); + return ToSIMD128Register(op->index()); +} + + +XMMRegister LCodeGen::ToSIMD128Register(LOperand* op) const { + ASSERT(op->IsFloat32x4Register() || op->IsInt32x4Register()); + return ToSIMD128Register(op->index()); +} + + int32_t LCodeGen::ToInteger32(LConstantOperand* op) const { return ToRepresentation(op, Representation::Integer32()); } @@ -824,7 +847,10 @@ static int ArgumentsOffsetWithoutFrame(int index) { Operand LCodeGen::ToOperand(LOperand* op) const { if (op->IsRegister()) return Operand(ToRegister(op)); if (op->IsDoubleRegister()) return Operand(ToDoubleRegister(op)); - ASSERT(op->IsStackSlot() || op->IsDoubleStackSlot()); + if (op->IsFloat32x4Register()) return Operand(ToFloat32x4Register(op)); + if (op->IsInt32x4Register()) return Operand(ToInt32x4Register(op)); + ASSERT(op->IsStackSlot() || op->IsDoubleStackSlot() || + op->IsFloat32x4StackSlot() || op->IsInt32x4StackSlot()); if (NeedsEagerFrame()) { return Operand(ebp, StackSlotOffset(op->index())); } else { @@ -951,6 +977,12 @@ void LCodeGen::AddToTranslation(LEnvironment* environment, } } else if (op->IsDoubleStackSlot()) { translation->StoreDoubleStackSlot(op->index()); + } else if (op->IsFloat32x4StackSlot()) { + translation->StoreSIMD128StackSlot(op->index(), + Translation::FLOAT32x4_STACK_SLOT); + } else if (op->IsInt32x4StackSlot()) { + translation->StoreSIMD128StackSlot(op->index(), + Translation::INT32x4_STACK_SLOT); } else if (op->IsArgument()) { ASSERT(is_tagged); int src_index = GetStackSlotCount() + op->index(); @@ -967,6 +999,12 @@ void LCodeGen::AddToTranslation(LEnvironment* environment, } else if (op->IsDoubleRegister()) { XMMRegister reg = ToDoubleRegister(op); translation->StoreDoubleRegister(reg); + } else if (op->IsFloat32x4Register()) { + XMMRegister reg = ToFloat32x4Register(op); + translation->StoreSIMD128Register(reg, Translation::FLOAT32x4_REGISTER); + } else if (op->IsInt32x4Register()) { + XMMRegister reg = ToInt32x4Register(op); + translation->StoreSIMD128Register(reg, Translation::INT32x4_REGISTER); } else if (op->IsConstantOperand()) { HConstant* constant = chunk()->LookupConstant(LConstantOperand::cast(op)); int src_index = DefineDeoptimizationLiteral(constant->handle(isolate())); @@ -3447,55 +3485,52 @@ void LCodeGen::DoLoadKeyedSIMD128ExternalArray(LLoadKeyed* instr) { Runtime::FunctionId id_; }; - // Allocate a SIMD128 object on the heap. - Register reg = ToRegister(instr->result()); - Register tmp = ToRegister(instr->temp()); - DeferredSIMD128ToTagged* deferred = new(zone()) DeferredSIMD128ToTagged( - this, instr, static_cast(T::kRuntimeAllocatorId()), - x87_stack_); - if (FLAG_inline_new) { - __ AllocateSIMDHeapObject(T::kSize, reg, tmp, deferred->entry(), - static_cast(T::kMapRootIndex())); - } else { - __ jmp(deferred->entry()); - } - __ bind(deferred->exit()); - - // Copy the SIMD128 value from the external array to the heap object. - STATIC_ASSERT(T::kValueSize % kPointerSize == 0); LOperand* key = instr->key(); ElementsKind elements_kind = instr->elements_kind(); - for (int offset = 0; offset < T::kValueSize; offset += kPointerSize) { + + if (CpuFeatures::IsSupported(SSE2)) { + CpuFeatureScope scope(masm(), SSE2); Operand operand(BuildFastArrayOperand( instr->elements(), key, instr->hydrogen()->key()->representation(), elements_kind, - offset, + 0, instr->additional_index())); - __ mov(tmp, operand); - __ mov(FieldOperand(reg, T::kValueOffset + offset), tmp); + __ movups(ToSIMD128Register(instr->result()), operand); + } else { + // Allocate a SIMD128 object on the heap. + Register reg = ToRegister(instr->result()); + Register tmp = ToRegister(instr->temp()); + DeferredSIMD128ToTagged* deferred = new(zone()) DeferredSIMD128ToTagged( + this, instr, static_cast(T::kRuntimeAllocatorId()), + x87_stack_); + if (FLAG_inline_new) { + __ AllocateSIMDHeapObject(T::kSize, reg, tmp, deferred->entry(), + static_cast(T::kMapRootIndex())); + } else { + __ jmp(deferred->entry()); + } + __ bind(deferred->exit()); + + // Copy the SIMD128 value from the external array to the heap object. + STATIC_ASSERT(T::kValueSize % kPointerSize == 0); + for (int offset = 0; offset < T::kValueSize; offset += kPointerSize) { + Operand operand(BuildFastArrayOperand( + instr->elements(), + key, + instr->hydrogen()->key()->representation(), + elements_kind, + offset, + instr->additional_index())); + __ mov(tmp, operand); + __ mov(FieldOperand(reg, T::kValueOffset + offset), tmp); + } } } void LCodeGen::DoLoadKeyedExternalArray(LLoadKeyed* instr) { - class DeferredSIMD128ToTagged V8_FINAL : public LDeferredCode { - public: - DeferredSIMD128ToTagged(LCodeGen* codegen, - LInstruction* instr, - Runtime::FunctionId id, - const X87Stack& x87_stack) - : LDeferredCode(codegen, x87_stack), instr_(instr), id_(id) { } - virtual void Generate() V8_OVERRIDE { - codegen()->DoDeferredSIMD128ToTagged(instr_, id_); - } - virtual LInstruction* instr() V8_OVERRIDE { return instr_; } - private: - LInstruction* instr_; - Runtime::FunctionId id_; - }; - ElementsKind elements_kind = instr->elements_kind(); LOperand* key = instr->key(); if (!key->IsConstantOperand() && @@ -3504,6 +3539,7 @@ void LCodeGen::DoLoadKeyedExternalArray(LLoadKeyed* instr) { HandleExternalArrayOpRequiresTemp(key, instr->hydrogen()->key()->representation(), elements_kind); } + Operand operand(BuildFastArrayOperand( instr->elements(), key, @@ -4639,28 +4675,41 @@ void LCodeGen::DoBoundsCheck(LBoundsCheck* instr) { template void LCodeGen::DoStoreKeyedSIMD128ExternalArray(LStoreKeyed* instr) { - ASSERT(instr->value()->IsRegister()); - Register temp = ToRegister(instr->temp()); - Register input_reg = ToRegister(instr->value()); - __ test(input_reg, Immediate(kSmiTagMask)); - DeoptimizeIf(zero, instr->environment()); - __ CmpObjectType(input_reg, T::kInstanceType, temp); - DeoptimizeIf(not_equal, instr->environment()); - - // Copy the SIMD128 value from the heap object to the external array. - STATIC_ASSERT(T::kValueSize % kPointerSize == 0); LOperand* key = instr->key(); ElementsKind elements_kind = instr->elements_kind(); - for (int offset = 0; offset < T::kValueSize; offset += kPointerSize) { + + if (CpuFeatures::IsSafeForSnapshot(SSE2)) { + CpuFeatureScope scope(masm(), SSE2); Operand operand(BuildFastArrayOperand( instr->elements(), key, instr->hydrogen()->key()->representation(), elements_kind, - offset, + 0, instr->additional_index())); - __ mov(temp, FieldOperand(input_reg, T::kValueOffset + offset)); - __ mov(operand, temp); + __ movups(operand, ToSIMD128Register(instr->value())); + } else { + ASSERT(instr->value()->IsRegister()); + Register temp = ToRegister(instr->temp()); + Register input_reg = ToRegister(instr->value()); + __ test(input_reg, Immediate(kSmiTagMask)); + DeoptimizeIf(zero, instr->environment()); + __ CmpObjectType(input_reg, T::kInstanceType, temp); + DeoptimizeIf(not_equal, instr->environment()); + + // Copy the SIMD128 value from the heap object to the external array. + STATIC_ASSERT(T::kValueSize % kPointerSize == 0); + for (int offset = 0; offset < T::kValueSize; offset += kPointerSize) { + Operand operand(BuildFastArrayOperand( + instr->elements(), + key, + instr->hydrogen()->key()->representation(), + elements_kind, + offset, + instr->additional_index())); + __ mov(temp, FieldOperand(input_reg, T::kValueOffset + offset)); + __ mov(operand, temp); + } } } @@ -4674,6 +4723,7 @@ void LCodeGen::DoStoreKeyedExternalArray(LStoreKeyed* instr) { HandleExternalArrayOpRequiresTemp(key, instr->hydrogen()->key()->representation(), elements_kind); } + Operand operand(BuildFastArrayOperand( instr->elements(), key, @@ -6518,6 +6568,83 @@ void LCodeGen::DoLoadFieldByIndex(LLoadFieldByIndex* instr) { } +template +void LCodeGen::HandleSIMD128ToTagged(LSIMD128ToTagged* instr) { + class DeferredSIMD128ToTagged V8_FINAL : public LDeferredCode { + public: + DeferredSIMD128ToTagged(LCodeGen* codegen, + LInstruction* instr, + Runtime::FunctionId id, + const X87Stack& x87_stack) + : LDeferredCode(codegen, x87_stack), instr_(instr), id_(id) { } + virtual void Generate() V8_OVERRIDE { + codegen()->DoDeferredSIMD128ToTagged(instr_, id_); + } + virtual LInstruction* instr() V8_OVERRIDE { return instr_; } + private: + LInstruction* instr_; + Runtime::FunctionId id_; + }; + + CpuFeatureScope scope(masm(), SSE2); + XMMRegister input_reg = ToSIMD128Register(instr->value()); + Register reg = ToRegister(instr->result()); + Register tmp = ToRegister(instr->temp()); + + DeferredSIMD128ToTagged* deferred = new(zone()) DeferredSIMD128ToTagged( + this, instr, static_cast(T::kRuntimeAllocatorId()), + x87_stack_); + if (FLAG_inline_new) { + __ AllocateSIMDHeapObject(T::kSize, reg, tmp, deferred->entry(), + static_cast(T::kMapRootIndex())); + } else { + __ jmp(deferred->entry()); + } + __ bind(deferred->exit()); + __ movups(FieldOperand(reg, T::kValueOffset), input_reg); +} + + +void LCodeGen::DoSIMD128ToTagged(LSIMD128ToTagged* instr) { + if (instr->value()->IsFloat32x4Register()) { + HandleSIMD128ToTagged(instr); + } else { + ASSERT(instr->value()->IsInt32x4Register()); + HandleSIMD128ToTagged(instr); + } +} + + +template +void LCodeGen::HandleTaggedToSIMD128(LTaggedToSIMD128* instr) { + LOperand* input = instr->value(); + ASSERT(input->IsRegister()); + LOperand* result = instr->result(); + ASSERT(result->IsSIMD128Register()); + + Register input_reg = ToRegister(input); + Register temp_reg = ToRegister(instr->temp()); + XMMRegister result_reg = ToSIMD128Register(result); + + CpuFeatureScope scope(masm(), SSE2); + __ test(input_reg, Immediate(kSmiTagMask)); + DeoptimizeIf(zero, instr->environment()); + __ CmpObjectType(input_reg, T::kInstanceType, temp_reg); + DeoptimizeIf(not_equal, instr->environment()); + __ movups(result_reg, FieldOperand(input_reg, T::kValueOffset)); +} + + +void LCodeGen::DoTaggedToSIMD128(LTaggedToSIMD128* instr) { + if (instr->representation().IsFloat32x4()) { + HandleTaggedToSIMD128(instr); + } else { + ASSERT(instr->representation().IsInt32x4()); + HandleTaggedToSIMD128(instr); + } +} + + #undef __ } } // namespace v8::internal diff --git a/src/ia32/lithium-codegen-ia32.h b/src/ia32/lithium-codegen-ia32.h index 0269ab872cf..e05acc9d3f1 100644 --- a/src/ia32/lithium-codegen-ia32.h +++ b/src/ia32/lithium-codegen-ia32.h @@ -90,6 +90,9 @@ class LCodeGen: public LCodeGenBase { Operand ToOperand(LOperand* op) const; Register ToRegister(LOperand* op) const; XMMRegister ToDoubleRegister(LOperand* op) const; + XMMRegister ToFloat32x4Register(LOperand* op) const; + XMMRegister ToInt32x4Register(LOperand* op) const; + XMMRegister ToSIMD128Register(LOperand* op) const; X87Register ToX87Register(LOperand* op) const; bool IsInteger32(LConstantOperand* op) const; @@ -163,6 +166,11 @@ class LCodeGen: public LCodeGenBase { void DoDeferredInstanceMigration(LCheckMaps* instr, Register object); void DoDeferredSIMD128ToTagged(LInstruction* instr, Runtime::FunctionId id); + template + void HandleTaggedToSIMD128(LTaggedToSIMD128* instr); + template + void HandleSIMD128ToTagged(LSIMD128ToTagged* instr); + // Parallel move support. void DoParallelMove(LParallelMove* move); void DoGap(LGap* instr); @@ -289,6 +297,9 @@ class LCodeGen: public LCodeGenBase { Register ToRegister(int index) const; XMMRegister ToDoubleRegister(int index) const; + XMMRegister ToFloat32x4Register(int index) const; + XMMRegister ToInt32x4Register(int index) const; + XMMRegister ToSIMD128Register(int index) const; X87Register ToX87Register(int index) const; int32_t ToRepresentation(LConstantOperand* op, const Representation& r) const; int32_t ToInteger32(LConstantOperand* op) const; diff --git a/src/ia32/lithium-gap-resolver-ia32.cc b/src/ia32/lithium-gap-resolver-ia32.cc index d621bd261d6..095d48e1eda 100644 --- a/src/ia32/lithium-gap-resolver-ia32.cc +++ b/src/ia32/lithium-gap-resolver-ia32.cc @@ -405,6 +405,27 @@ void LGapResolver::EmitMove(int index) { cgen_->X87Mov(dst, src); } } + } else if (source->IsSIMD128Register()) { + ASSERT(CpuFeatures::IsSupported(SSE2)); + CpuFeatureScope scope(cgen_->masm(), SSE2); + XMMRegister src = cgen_->ToSIMD128Register(source); + if (destination->IsSIMD128Register()) { + __ movaps(cgen_->ToSIMD128Register(destination), src); + } else { + ASSERT(destination->IsSIMD128StackSlot()); + __ movups(cgen_->ToOperand(destination), src); + } + } else if (source->IsSIMD128StackSlot()) { + ASSERT(CpuFeatures::IsSupported(SSE2)); + CpuFeatureScope scope(cgen_->masm(), SSE2); + Operand src = cgen_->ToOperand(source); + if (destination->IsSIMD128Register()) { + __ movups(cgen_->ToSIMD128Register(destination), src); + } else { + ASSERT(destination->IsSIMD128StackSlot()); + __ movups(xmm0, src); + __ movups(cgen_->ToOperand(destination), xmm0); + } } else { UNREACHABLE(); } @@ -506,6 +527,43 @@ void LGapResolver::EmitSwap(int index) { __ mov(dst1, tmp); __ movsd(src0, xmm0); + } else if ((source->IsSIMD128StackSlot() && + destination->IsSIMD128StackSlot())) { + // Swap two XMM stack slots. + Operand src = cgen_->ToOperand(source); + Operand dst = cgen_->ToOperand(destination); + Register tmp = EnsureTempRegister(); + __ movups(xmm0, src); + for (int offset = 0; offset < kSIMD128Size; offset += kPointerSize) { + __ mov(tmp, Operand(dst, offset)); + __ mov(Operand(src, offset), tmp); + } + __ movups(dst, xmm0); + + } else if (source->IsSIMD128Register() && destination->IsSIMD128Register()) { + // Swap two XMM registers. + XMMRegister source_reg = cgen_->ToSIMD128Register(source); + XMMRegister destination_reg = cgen_->ToSIMD128Register(destination); + __ movaps(xmm0, source_reg); + __ movaps(source_reg, destination_reg); + __ movaps(destination_reg, xmm0); + + } else if (source->IsSIMD128Register() || destination->IsSIMD128Register()) { + // Swap a xmm register and a xmm stack slot. + ASSERT((source->IsSIMD128Register() && + destination->IsSIMD128StackSlot()) || + (source->IsSIMD128StackSlot() && + destination->IsSIMD128Register())); + XMMRegister reg = cgen_->ToSIMD128Register(source->IsSIMD128Register() + ? source + : destination); + LOperand* other = source->IsSIMD128Register() ? destination : source; + ASSERT(other->IsSIMD128StackSlot()); + Operand other_operand = cgen_->ToOperand(other); + __ movups(xmm0, other_operand); + __ movups(other_operand, reg); + __ movaps(reg, xmm0); + } else { // No other combinations are possible. UNREACHABLE(); diff --git a/src/ia32/lithium-ia32.cc b/src/ia32/lithium-ia32.cc index 26e84975638..4580af11b6c 100644 --- a/src/ia32/lithium-ia32.cc +++ b/src/ia32/lithium-ia32.cc @@ -377,23 +377,39 @@ void LAccessArgumentsAt::PrintDataTo(StringStream* stream) { int LPlatformChunk::GetNextSpillIndex(RegisterKind kind) { - // Skip a slot if for a double-width slot. - if (kind == DOUBLE_REGISTERS) { - spill_slot_count_++; - spill_slot_count_ |= 1; - num_double_slots_++; + switch (kind) { + case GENERAL_REGISTERS: return spill_slot_count_++; + case DOUBLE_REGISTERS: { + // Skip a slot if for a double-width slot. + spill_slot_count_++; + spill_slot_count_ |= 1; + num_double_slots_++; + return spill_slot_count_++; + } + case FLOAT32x4_REGISTERS: + case INT32x4_REGISTERS: { + // Skip three slots if for a quad-width slot. + spill_slot_count_ += 3; + num_double_slots_ += 2; // for dynamic frame alignment + return spill_slot_count_++; + } + default: + UNREACHABLE(); + return -1; } - return spill_slot_count_++; } LOperand* LPlatformChunk::GetNextSpillSlot(RegisterKind kind) { int index = GetNextSpillIndex(kind); - if (kind == DOUBLE_REGISTERS) { - return LDoubleStackSlot::Create(index, zone()); - } else { - ASSERT(kind == GENERAL_REGISTERS); - return LStackSlot::Create(index, zone()); + switch (kind) { + case GENERAL_REGISTERS: return LStackSlot::Create(index, zone()); + case DOUBLE_REGISTERS: return LDoubleStackSlot::Create(index, zone()); + case FLOAT32x4_REGISTERS: return LFloat32x4StackSlot::Create(index, zone()); + case INT32x4_REGISTERS: return LInt32x4StackSlot::Create(index, zone()); + default: + UNREACHABLE(); + return NULL; } } @@ -1835,6 +1851,11 @@ LInstruction* LChunkBuilder::DoChange(HChange* instr) { LOperand* temp = TempRegister(); LNumberUntagD* res = new(zone()) LNumberUntagD(value, temp); return AssignEnvironment(DefineAsRegister(res)); + } else if (to.IsSIMD128()) { + LOperand* value = UseRegister(instr->value()); + LOperand* temp = TempRegister(); + LTaggedToSIMD128* res = new(zone()) LTaggedToSIMD128(value, temp, to); + return AssignEnvironment(DefineAsRegister(res)); } else if (to.IsSmi()) { HValue* val = instr->value(); LOperand* value = UseRegister(val); @@ -1918,6 +1939,16 @@ LInstruction* LChunkBuilder::DoChange(HChange* instr) { new(zone()) LInteger32ToDouble(Use(instr->value()))); } } + } else if (from.IsSIMD128()) { + ASSERT(to.IsTagged()); + info()->MarkAsDeferredCalling(); + LOperand* value = UseRegister(instr->value()); + LOperand* temp = TempRegister(); + + // Make sure that temp and result_temp are different registers. + LUnallocated* result_temp = TempRegister(); + LSIMD128ToTagged* result = new(zone()) LSIMD128ToTagged(value, temp); + return AssignPointerMap(Define(result, result_temp)); } UNREACHABLE(); return NULL; @@ -2124,7 +2155,8 @@ LInstruction* LChunkBuilder::DoLoadKeyed(HLoadKeyed* instr) { : UseRegisterOrConstantAtStart(instr->key()); LLoadKeyed* result = NULL; - bool load_128bits_without_sse2 = IsSIMD128ElementsKind(elements_kind); + bool load_128bits_without_sse2 = IsSIMD128ElementsKind(elements_kind) && + !CPU::SupportsSIMD128InCrankshaft(); if (!instr->is_typed_elements()) { LOperand* obj = UseRegisterAtStart(instr->elements()); result = new(zone()) LLoadKeyed(obj, key, NULL); @@ -2134,8 +2166,14 @@ LInstruction* LChunkBuilder::DoLoadKeyed(HLoadKeyed* instr) { !(IsDoubleOrFloatElementsKind(instr->elements_kind()))) || (instr->representation().IsDouble() && (IsDoubleOrFloatElementsKind(instr->elements_kind()))) || - (instr->representation().IsTagged() && - (IsSIMD128ElementsKind(instr->elements_kind())))); + (CPU::SupportsSIMD128InCrankshaft() + ? instr->representation().IsFloat32x4() + : instr->representation().IsTagged() && + (IsFloat32x4ElementsKind(instr->elements_kind()))) || + (CPU::SupportsSIMD128InCrankshaft() + ? instr->representation().IsInt32x4() + : instr->representation().IsTagged() && + (IsInt32x4ElementsKind(instr->elements_kind())))); LOperand* backing_store = UseRegister(instr->elements()); result = new(zone()) LLoadKeyed(backing_store, key, load_128bits_without_sse2 ? TempRegister() : NULL); @@ -2225,8 +2263,14 @@ LInstruction* LChunkBuilder::DoStoreKeyed(HStoreKeyed* instr) { !IsDoubleOrFloatElementsKind(elements_kind)) || (instr->value()->representation().IsDouble() && IsDoubleOrFloatElementsKind(elements_kind)) || - (instr->value()->representation().IsTagged() && - IsSIMD128ElementsKind(elements_kind))); + (CPU::SupportsSIMD128InCrankshaft() + ? instr->value()->representation().IsFloat32x4() + : instr->value()->representation().IsTagged() && + IsFloat32x4ElementsKind(elements_kind)) || + (CPU::SupportsSIMD128InCrankshaft() + ? instr->value()->representation().IsInt32x4() + : instr->value()->representation().IsTagged() && + IsInt32x4ElementsKind(elements_kind))); ASSERT((instr->is_fixed_typed_array() && instr->elements()->representation().IsTagged()) || (instr->is_external() && @@ -2239,7 +2283,8 @@ LInstruction* LChunkBuilder::DoStoreKeyed(HStoreKeyed* instr) { LOperand* key = clobbers_key ? UseTempRegister(instr->key()) : UseRegisterOrConstantAtStart(instr->key()); - bool store_128bits_without_sse2 = IsSIMD128ElementsKind(elements_kind); + bool store_128bits_without_sse2 = IsSIMD128ElementsKind(elements_kind) && + !CPU::SupportsSIMD128InCrankshaft(); LStoreKeyed* result = new(zone()) LStoreKeyed(backing_store, key, val, store_128bits_without_sse2 ? TempRegister() : NULL); diff --git a/src/ia32/lithium-ia32.h b/src/ia32/lithium-ia32.h index 7cb7da0364f..1dd54061f11 100644 --- a/src/ia32/lithium-ia32.h +++ b/src/ia32/lithium-ia32.h @@ -144,6 +144,8 @@ class LCodeGen; V(NumberTagI) \ V(NumberTagU) \ V(NumberUntagD) \ + V(SIMD128ToTagged) \ + V(TaggedToSIMD128) \ V(OsrEntry) \ V(OuterContext) \ V(Parameter) \ @@ -2076,6 +2078,21 @@ class LNumberTagD V8_FINAL : public LTemplateInstruction<1, 1, 1> { }; +class LSIMD128ToTagged V8_FINAL : public LTemplateInstruction<1, 1, 1> { + public: + explicit LSIMD128ToTagged(LOperand* value, LOperand* temp) { + inputs_[0] = value; + temps_[0] = temp; + } + + LOperand* value() { return inputs_[0]; } + LOperand* temp() { return temps_[0]; } + + DECLARE_CONCRETE_INSTRUCTION(SIMD128ToTagged, "simd128-tag") + DECLARE_HYDROGEN_ACCESSOR(Change) +}; + + // Sometimes truncating conversion from a tagged value to an int32. class LDoubleToI V8_FINAL : public LTemplateInstruction<1, 1, 1> { public: @@ -2152,6 +2169,25 @@ class LNumberUntagD V8_FINAL : public LTemplateInstruction<1, 1, 1> { }; +class LTaggedToSIMD128 V8_FINAL : public LTemplateInstruction<1, 1, 1> { + public: + explicit LTaggedToSIMD128(LOperand* value, LOperand* temp, + Representation representation) : representation_(representation) { + inputs_[0] = value; + temps_[0] = temp; + } + + LOperand* value() { return inputs_[0]; } + LOperand* temp() { return temps_[0]; } + Representation representation() const { return representation_; } + + DECLARE_CONCRETE_INSTRUCTION(TaggedToSIMD128, "simd128-untag") + DECLARE_HYDROGEN_ACCESSOR(Change); + private: + Representation representation_; +}; + + class LSmiUntag V8_FINAL : public LTemplateInstruction<1, 1, 0> { public: LSmiUntag(LOperand* value, bool needs_check) diff --git a/src/ia32/macro-assembler-ia32.cc b/src/ia32/macro-assembler-ia32.cc index edb68813ac5..b20db74dfad 100644 --- a/src/ia32/macro-assembler-ia32.cc +++ b/src/ia32/macro-assembler-ia32.cc @@ -1117,12 +1117,13 @@ void MacroAssembler::EnterExitFrameEpilogue(int argc, bool save_doubles) { // Optionally save all XMM registers. if (save_doubles) { CpuFeatureScope scope(this, SSE2); - int space = XMMRegister::kNumRegisters * kDoubleSize + argc * kPointerSize; + int space = XMMRegister::kNumRegisters * kSIMD128Size + + argc * kPointerSize; sub(esp, Immediate(space)); const int offset = -2 * kPointerSize; for (int i = 0; i < XMMRegister::kNumRegisters; i++) { XMMRegister reg = XMMRegister::from_code(i); - movsd(Operand(ebp, offset - ((i + 1) * kDoubleSize)), reg); + movups(Operand(ebp, offset - ((i + 1) * kSIMD128Size)), reg); } } else { sub(esp, Immediate(argc * kPointerSize)); @@ -1166,7 +1167,7 @@ void MacroAssembler::LeaveExitFrame(bool save_doubles) { const int offset = -2 * kPointerSize; for (int i = 0; i < XMMRegister::kNumRegisters; i++) { XMMRegister reg = XMMRegister::from_code(i); - movsd(reg, Operand(ebp, offset - ((i + 1) * kDoubleSize))); + movups(reg, Operand(ebp, offset - ((i + 1) * kSIMD128Size))); } } diff --git a/src/ia32/macro-assembler-ia32.h b/src/ia32/macro-assembler-ia32.h index 102adb6a32e..522bd5eb553 100644 --- a/src/ia32/macro-assembler-ia32.h +++ b/src/ia32/macro-assembler-ia32.h @@ -651,6 +651,10 @@ class MacroAssembler: public Assembler { Register scratch1, Register scratch2, Label* gc_required); + + // Allocate a float32x4 or int32x4 object in new space with undefined value. + // Returns tagged pointer in result register, or jumps to gc_required if new + // space is full. void AllocateSIMDHeapObject(int size, Register result, Register scratch, diff --git a/src/lithium-allocator-inl.h b/src/lithium-allocator-inl.h index deee98877d6..9a6d08b1683 100644 --- a/src/lithium-allocator-inl.h +++ b/src/lithium-allocator-inl.h @@ -146,7 +146,8 @@ void UseIterator::Advance() { void LAllocator::SetLiveRangeAssignedRegister(LiveRange* range, int reg) { - if (range->Kind() == DOUBLE_REGISTERS) { + if (range->Kind() == DOUBLE_REGISTERS || + IsSIMD128RegisterKind(range->Kind())) { assigned_double_registers_->Add(reg); } else { ASSERT(range->Kind() == GENERAL_REGISTERS); diff --git a/src/lithium-allocator.cc b/src/lithium-allocator.cc index 48fa862c90d..fd8f847e0f4 100644 --- a/src/lithium-allocator.cc +++ b/src/lithium-allocator.cc @@ -238,6 +238,12 @@ LOperand* LiveRange::CreateAssignedOperand(Zone* zone) { case DOUBLE_REGISTERS: op = LDoubleRegister::Create(assigned_register(), zone); break; + case FLOAT32x4_REGISTERS: + op = LFloat32x4Register::Create(assigned_register(), zone); + break; + case INT32x4_REGISTERS: + op = LInt32x4Register::Create(assigned_register(), zone); + break; default: UNREACHABLE(); } @@ -488,7 +494,7 @@ void LiveRange::ConvertOperands(Zone* zone) { if (use_pos->HasOperand()) { ASSERT(op->IsRegister() || op->IsDoubleRegister() || - !use_pos->RequiresRegister()); + op->IsSIMD128Register() || !use_pos->RequiresRegister()); use_pos->operand()->ConvertTo(op->kind(), op->index()); } use_pos = use_pos->next(); @@ -554,6 +560,7 @@ LAllocator::LAllocator(int num_values, HGraph* graph) active_live_ranges_(8, zone()), inactive_live_ranges_(8, zone()), reusable_slots_(8, zone()), + reusable_simd128_slots_(8, zone()), next_virtual_register_(num_values), first_artificial_register_(num_values), mode_(UNALLOCATED_REGISTERS), @@ -873,6 +880,16 @@ void LAllocator::MeetConstraintsBetween(LInstruction* first, double_artificial_registers_.Add( cur_input->virtual_register() - first_artificial_register_, zone()); + } else if (RequiredRegisterKind(input_copy->virtual_register()) == + FLOAT32x4_REGISTERS) { + float32x4_artificial_registers_.Add( + cur_input->virtual_register() - first_artificial_register_, + zone()); + } else if (RequiredRegisterKind(input_copy->virtual_register()) == + INT32x4_REGISTERS) { + int32x4_artificial_registers_.Add( + cur_input->virtual_register() - first_artificial_register_, + zone()); } AddConstraintsGapMove(gap_index, input_copy, cur_input); @@ -1185,8 +1202,10 @@ void LAllocator::ResolveControlFlow(LiveRange* range, if (branch->HasPointerMap()) { if (HasTaggedValue(range->id())) { branch->pointer_map()->RecordPointer(cur_op, chunk()->zone()); - } else if (!cur_op->IsDoubleStackSlot() && - !cur_op->IsDoubleRegister()) { + } else if (!cur_op->IsDoubleStackSlot() && + !cur_op->IsDoubleRegister() && + !cur_op->IsSIMD128StackSlot() && + !cur_op->IsSIMD128Register()) { branch->pointer_map()->RemovePointer(cur_op); } } @@ -1512,6 +1531,9 @@ void LAllocator::AllocateRegisters() { if (live_ranges_[i] != NULL) { if (live_ranges_[i]->Kind() == mode_) { AddToUnhandledUnsorted(live_ranges_[i]); + } else if (mode_ == DOUBLE_REGISTERS && + IsSIMD128RegisterKind(live_ranges_[i]->Kind())) { + AddToUnhandledUnsorted(live_ranges_[i]); } } } @@ -1519,6 +1541,7 @@ void LAllocator::AllocateRegisters() { ASSERT(UnhandledIsSorted()); ASSERT(reusable_slots_.is_empty()); + ASSERT(reusable_simd128_slots_.is_empty()); ASSERT(active_live_ranges_.is_empty()); ASSERT(inactive_live_ranges_.is_empty()); @@ -1610,6 +1633,7 @@ void LAllocator::AllocateRegisters() { } reusable_slots_.Rewind(0); + reusable_simd128_slots_.Rewind(0); active_live_ranges_.Rewind(0); inactive_live_ranges_.Rewind(0); } @@ -1646,10 +1670,20 @@ RegisterKind LAllocator::RequiredRegisterKind(int virtual_register) const { HValue* value = graph_->LookupValue(virtual_register); if (value != NULL && value->representation().IsDouble()) { return DOUBLE_REGISTERS; + } else if (value != NULL && (value->representation().IsFloat32x4())) { + return FLOAT32x4_REGISTERS; + } else if (value != NULL && (value->representation().IsInt32x4())) { + return INT32x4_REGISTERS; } } else if (double_artificial_registers_.Contains( virtual_register - first_artificial_register_)) { return DOUBLE_REGISTERS; + } else if (float32x4_artificial_registers_.Contains( + virtual_register - first_artificial_register_)) { + return FLOAT32x4_REGISTERS; + } else if (int32x4_artificial_registers_.Contains( + virtual_register - first_artificial_register_)) { + return INT32x4_REGISTERS; } return GENERAL_REGISTERS; @@ -1732,19 +1766,26 @@ void LAllocator::FreeSpillSlot(LiveRange* range) { int index = range->TopLevel()->GetSpillOperand()->index(); if (index >= 0) { - reusable_slots_.Add(range, zone()); + if (IsSIMD128RegisterKind(range->Kind())) { + reusable_simd128_slots_.Add(range, zone()); + } else { + reusable_slots_.Add(range, zone()); + } } } LOperand* LAllocator::TryReuseSpillSlot(LiveRange* range) { - if (reusable_slots_.is_empty()) return NULL; - if (reusable_slots_.first()->End().Value() > + ZoneList* reusable_slots = IsSIMD128RegisterKind(range->Kind()) + ? &reusable_simd128_slots_ + : &reusable_slots_; + if (reusable_slots->is_empty()) return NULL; + if (reusable_slots->first()->End().Value() > range->TopLevel()->Start().Value()) { return NULL; } - LOperand* result = reusable_slots_.first()->TopLevel()->GetSpillOperand(); - reusable_slots_.Remove(0); + LOperand* result = reusable_slots->first()->TopLevel()->GetSpillOperand(); + reusable_slots->Remove(0); return result; } @@ -1811,7 +1852,8 @@ bool LAllocator::TryAllocateFreeReg(LiveRange* current) { } LOperand* hint = current->FirstHint(); - if (hint != NULL && (hint->IsRegister() || hint->IsDoubleRegister())) { + if (hint != NULL && (hint->IsRegister() || hint->IsDoubleRegister() || + hint->IsSIMD128Register())) { int register_index = hint->index(); TraceAlloc( "Found reg hint %s (free until [%d) for live range %d (end %d[).\n", @@ -2162,7 +2204,17 @@ void LAllocator::Spill(LiveRange* range) { if (!first->HasAllocatedSpillOperand()) { LOperand* op = TryReuseSpillSlot(range); - if (op == NULL) op = chunk_->GetNextSpillSlot(range->Kind()); + if (op == NULL) { + op = chunk_->GetNextSpillSlot(range->Kind()); + } else if (range->Kind() == FLOAT32x4_REGISTERS && + op->kind() != LOperand::FLOAT32x4_STACK_SLOT) { + // Convert to Float32x4StackSlot. + op = LFloat32x4StackSlot::Create(op->index(), zone()); + } else if (range->Kind() == INT32x4_REGISTERS && + op->kind() != LOperand::INT32x4_STACK_SLOT) { + // Convert to Int32x4StackSlot. + op = LInt32x4StackSlot::Create(op->index(), zone()); + } first->SetSpillOperand(op); } range->MakeSpilled(chunk()->zone()); diff --git a/src/lithium-allocator.h b/src/lithium-allocator.h index 9908ea823d3..d87b689f669 100644 --- a/src/lithium-allocator.h +++ b/src/lithium-allocator.h @@ -51,12 +51,9 @@ class LArgument; class LPlatformChunk; class LOperand; class LUnallocated; -class LConstantOperand; class LGap; class LParallelMove; class LPointerMap; -class LStackSlot; -class LRegister; // This class represents a single point of a LOperand's lifetime. @@ -148,10 +145,17 @@ class LifetimePosition { enum RegisterKind { UNALLOCATED_REGISTERS, GENERAL_REGISTERS, - DOUBLE_REGISTERS + DOUBLE_REGISTERS, + FLOAT32x4_REGISTERS, + INT32x4_REGISTERS }; +inline bool IsSIMD128RegisterKind(RegisterKind kind) { + return kind == FLOAT32x4_REGISTERS || kind == INT32x4_REGISTERS; +} + + // A register-allocator view of a Lithium instruction. It contains the id of // the output operand and a list of input operand uses. @@ -616,11 +620,15 @@ class LAllocator BASE_EMBEDDED { ZoneList active_live_ranges_; ZoneList inactive_live_ranges_; ZoneList reusable_slots_; + // Slots reusable for both float32x4 and int32x4 register spilling. + ZoneList reusable_simd128_slots_; // Next virtual register number to be assigned to temporaries. int next_virtual_register_; int first_artificial_register_; GrowableBitVector double_artificial_registers_; + GrowableBitVector float32x4_artificial_registers_; + GrowableBitVector int32x4_artificial_registers_; RegisterKind mode_; int num_registers_; diff --git a/src/lithium.cc b/src/lithium.cc index b2fb4ead7cc..96b0eba359e 100644 --- a/src/lithium.cc +++ b/src/lithium.cc @@ -102,45 +102,63 @@ void LOperand::PrintTo(StringStream* stream) { case DOUBLE_STACK_SLOT: stream->Add("[double_stack:%d]", index()); break; + case FLOAT32x4_STACK_SLOT: + stream->Add("[float32x4_stack:%d]", index()); + break; + case INT32x4_STACK_SLOT: + stream->Add("[int32x4_stack:%d]", index()); + break; case REGISTER: stream->Add("[%s|R]", Register::AllocationIndexToString(index())); break; case DOUBLE_REGISTER: stream->Add("[%s|R]", DoubleRegister::AllocationIndexToString(index())); break; + case FLOAT32x4_REGISTER: + stream->Add("[%s|R]", + SIMD128Register::AllocationIndexToString(index())); + break; + case INT32x4_REGISTER: + stream->Add("[%s|R]", + SIMD128Register::AllocationIndexToString(index())); + break; case ARGUMENT: stream->Add("[arg:%d]", index()); break; } } -#define DEFINE_OPERAND_CACHE(name, type) \ - L##name* L##name::cache = NULL; \ - \ - void L##name::SetUpCache() { \ - if (cache) return; \ - cache = new L##name[kNumCachedOperands]; \ - for (int i = 0; i < kNumCachedOperands; i++) { \ - cache[i].ConvertTo(type, i); \ - } \ - } \ - \ - void L##name::TearDownCache() { \ - delete[] cache; \ + +template +LSubKindOperand* +LSubKindOperand::cache = NULL; + + +template +void LSubKindOperand::SetUpCache() { + if (cache) return; + cache = new LSubKindOperand[kNumCachedOperands]; + for (int i = 0; i < kNumCachedOperands; i++) { + cache[i].ConvertTo(kOperandKind, i); } +} + + +template +void LSubKindOperand::TearDownCache() { + delete[] cache; +} -LITHIUM_OPERAND_LIST(DEFINE_OPERAND_CACHE) -#undef DEFINE_OPERAND_CACHE void LOperand::SetUpCaches() { -#define LITHIUM_OPERAND_SETUP(name, type) L##name::SetUpCache(); +#define LITHIUM_OPERAND_SETUP(name, type, number) L##name::SetUpCache(); LITHIUM_OPERAND_LIST(LITHIUM_OPERAND_SETUP) #undef LITHIUM_OPERAND_SETUP } void LOperand::TearDownCaches() { -#define LITHIUM_OPERAND_TEARDOWN(name, type) L##name::TearDownCache(); +#define LITHIUM_OPERAND_TEARDOWN(name, type, number) L##name::TearDownCache(); LITHIUM_OPERAND_LIST(LITHIUM_OPERAND_TEARDOWN) #undef LITHIUM_OPERAND_TEARDOWN } @@ -197,7 +215,9 @@ void LEnvironment::PrintTo(StringStream* stream) { void LPointerMap::RecordPointer(LOperand* op, Zone* zone) { // Do not record arguments as pointers. if (op->IsStackSlot() && op->index() < 0) return; - ASSERT(!op->IsDoubleRegister() && !op->IsDoubleStackSlot()); + ASSERT(!op->IsDoubleRegister() && !op->IsDoubleStackSlot() && + !op->IsFloat32x4Register() && !op->IsFloat32x4StackSlot() && + !op->IsInt32x4Register() && !op->IsInt32x4StackSlot()); pointer_operands_.Add(op, zone); } @@ -205,7 +225,9 @@ void LPointerMap::RecordPointer(LOperand* op, Zone* zone) { void LPointerMap::RemovePointer(LOperand* op) { // Do not record arguments as pointers. if (op->IsStackSlot() && op->index() < 0) return; - ASSERT(!op->IsDoubleRegister() && !op->IsDoubleStackSlot()); + ASSERT(!op->IsDoubleRegister() && !op->IsDoubleStackSlot() && + !op->IsFloat32x4Register() && !op->IsFloat32x4StackSlot() && + !op->IsInt32x4Register() && !op->IsInt32x4StackSlot()); for (int i = 0; i < pointer_operands_.length(); ++i) { if (pointer_operands_[i]->Equals(op)) { pointer_operands_.Remove(i); @@ -218,7 +240,9 @@ void LPointerMap::RemovePointer(LOperand* op) { void LPointerMap::RecordUntagged(LOperand* op, Zone* zone) { // Do not record arguments as pointers. if (op->IsStackSlot() && op->index() < 0) return; - ASSERT(!op->IsDoubleRegister() && !op->IsDoubleStackSlot()); + ASSERT(!op->IsDoubleRegister() && !op->IsDoubleStackSlot() && + !op->IsFloat32x4Register() && !op->IsFloat32x4StackSlot() && + !op->IsInt32x4Register() && !op->IsInt32x4StackSlot()); untagged_operands_.Add(op, zone); } diff --git a/src/lithium.h b/src/lithium.h index 754f88da821..f70a60e36dc 100644 --- a/src/lithium.h +++ b/src/lithium.h @@ -35,12 +35,16 @@ namespace v8 { namespace internal { -#define LITHIUM_OPERAND_LIST(V) \ - V(ConstantOperand, CONSTANT_OPERAND) \ - V(StackSlot, STACK_SLOT) \ - V(DoubleStackSlot, DOUBLE_STACK_SLOT) \ - V(Register, REGISTER) \ - V(DoubleRegister, DOUBLE_REGISTER) +#define LITHIUM_OPERAND_LIST(V) \ + V(ConstantOperand, CONSTANT_OPERAND, 128) \ + V(StackSlot, STACK_SLOT, 128) \ + V(DoubleStackSlot, DOUBLE_STACK_SLOT, 128) \ + V(Float32x4StackSlot, FLOAT32x4_STACK_SLOT, 128) \ + V(Int32x4StackSlot, INT32x4_STACK_SLOT, 128) \ + V(Register, REGISTER, 16) \ + V(DoubleRegister, DOUBLE_REGISTER, 16) \ + V(Float32x4Register, FLOAT32x4_REGISTER, 16) \ + V(Int32x4Register, INT32x4_REGISTER, 16) class LOperand : public ZoneObject { @@ -51,8 +55,12 @@ class LOperand : public ZoneObject { CONSTANT_OPERAND, STACK_SLOT, DOUBLE_STACK_SLOT, + FLOAT32x4_STACK_SLOT, + INT32x4_STACK_SLOT, REGISTER, DOUBLE_REGISTER, + FLOAT32x4_REGISTER, + INT32x4_REGISTER, ARGUMENT }; @@ -60,14 +68,24 @@ class LOperand : public ZoneObject { Kind kind() const { return KindField::decode(value_); } int index() const { return static_cast(value_) >> kKindFieldWidth; } -#define LITHIUM_OPERAND_PREDICATE(name, type) \ +#define LITHIUM_OPERAND_PREDICATE(name, type, number) \ bool Is##name() const { return kind() == type; } LITHIUM_OPERAND_LIST(LITHIUM_OPERAND_PREDICATE) - LITHIUM_OPERAND_PREDICATE(Argument, ARGUMENT) - LITHIUM_OPERAND_PREDICATE(Unallocated, UNALLOCATED) - LITHIUM_OPERAND_PREDICATE(Ignored, INVALID) + LITHIUM_OPERAND_PREDICATE(Argument, ARGUMENT, 0) + LITHIUM_OPERAND_PREDICATE(Unallocated, UNALLOCATED, 0) + LITHIUM_OPERAND_PREDICATE(Ignored, INVALID, 0) #undef LITHIUM_OPERAND_PREDICATE - bool Equals(LOperand* other) const { return value_ == other->value_; } + bool IsSIMD128Register() const { + return kind() == FLOAT32x4_REGISTER || kind() == INT32x4_REGISTER; + } + bool IsSIMD128StackSlot() const { + return kind() == FLOAT32x4_STACK_SLOT || kind() == INT32x4_STACK_SLOT; + } + bool Equals(LOperand* other) const { + return value_ == other->value_ || (index() == other->index() && + ((IsSIMD128Register() && other->IsSIMD128Register()) || + (IsSIMD128StackSlot() && other->IsSIMD128StackSlot()))); + } void PrintTo(StringStream* stream); void ConvertTo(Kind kind, int index) { @@ -81,7 +99,7 @@ class LOperand : public ZoneObject { static void TearDownCaches(); protected: - static const int kKindFieldWidth = 3; + static const int kKindFieldWidth = 4; class KindField : public BitField { }; LOperand(Kind kind, int index) { ConvertTo(kind, index); } @@ -165,32 +183,32 @@ class LUnallocated : public LOperand { // because it accommodates a larger pay-load. // // For FIXED_SLOT policy: - // +------------------------------------------+ - // | slot_index | vreg | 0 | 001 | - // +------------------------------------------+ + // +-------------------------------------------+ + // | slot_index | vreg | 0 | 0001 | + // +-------------------------------------------+ // // For all other (extended) policies: - // +------------------------------------------+ - // | reg_index | L | PPP | vreg | 1 | 001 | L ... Lifetime - // +------------------------------------------+ P ... Policy + // +-------------------------------------------+ + // | reg_index | L | PPP | vreg | 1 | 0001 | L ... Lifetime + // +-------------------------------------------+ P ... Policy // // The slot index is a signed value which requires us to decode it manually // instead of using the BitField utility class. // The superclass has a KindField. - STATIC_ASSERT(kKindFieldWidth == 3); + STATIC_ASSERT(kKindFieldWidth == 4); // BitFields for all unallocated operands. - class BasicPolicyField : public BitField {}; - class VirtualRegisterField : public BitField {}; + class BasicPolicyField : public BitField {}; + class VirtualRegisterField : public BitField {}; // BitFields specific to BasicPolicy::FIXED_SLOT. - class FixedSlotIndexField : public BitField {}; + class FixedSlotIndexField : public BitField {}; // BitFields specific to BasicPolicy::EXTENDED_POLICY. - class ExtendedPolicyField : public BitField {}; - class LifetimeField : public BitField {}; - class FixedRegisterField : public BitField {}; + class ExtendedPolicyField : public BitField {}; + class LifetimeField : public BitField {}; + class FixedRegisterField : public BitField {}; static const int kMaxVirtualRegisters = VirtualRegisterField::kMax + 1; static const int kFixedSlotIndexWidth = FixedSlotIndexField::kSize; @@ -317,31 +335,37 @@ class LMoveOperands V8_FINAL BASE_EMBEDDED { }; -class LConstantOperand V8_FINAL : public LOperand { +template +class LSubKindOperand V8_FINAL : public LOperand { public: - static LConstantOperand* Create(int index, Zone* zone) { + static LSubKindOperand* Create(int index, Zone* zone) { ASSERT(index >= 0); if (index < kNumCachedOperands) return &cache[index]; - return new(zone) LConstantOperand(index); + return new(zone) LSubKindOperand(index); } - static LConstantOperand* cast(LOperand* op) { - ASSERT(op->IsConstantOperand()); - return reinterpret_cast(op); + static LSubKindOperand* cast(LOperand* op) { + ASSERT(op->kind() == kOperandKind); + return reinterpret_cast(op); } static void SetUpCache(); static void TearDownCache(); private: - static const int kNumCachedOperands = 128; - static LConstantOperand* cache; + static LSubKindOperand* cache; - LConstantOperand() : LOperand() { } - explicit LConstantOperand(int index) : LOperand(CONSTANT_OPERAND, index) { } + LSubKindOperand() : LOperand() { } + explicit LSubKindOperand(int index) : LOperand(kOperandKind, index) { } }; +#define LITHIUM_TYPEDEF_SUBKIND_OPERAND_CLASS(name, type, number) \ +typedef LSubKindOperand L##name; +LITHIUM_OPERAND_LIST(LITHIUM_TYPEDEF_SUBKIND_OPERAND_CLASS) +#undef LITHIUM_TYPEDEF_SUBKIND_OPERAND_CLASS + + class LArgument V8_FINAL : public LOperand { public: explicit LArgument(int index) : LOperand(ARGUMENT, index) { } @@ -353,106 +377,6 @@ class LArgument V8_FINAL : public LOperand { }; -class LStackSlot V8_FINAL : public LOperand { - public: - static LStackSlot* Create(int index, Zone* zone) { - ASSERT(index >= 0); - if (index < kNumCachedOperands) return &cache[index]; - return new(zone) LStackSlot(index); - } - - static LStackSlot* cast(LOperand* op) { - ASSERT(op->IsStackSlot()); - return reinterpret_cast(op); - } - - static void SetUpCache(); - static void TearDownCache(); - - private: - static const int kNumCachedOperands = 128; - static LStackSlot* cache; - - LStackSlot() : LOperand() { } - explicit LStackSlot(int index) : LOperand(STACK_SLOT, index) { } -}; - - -class LDoubleStackSlot V8_FINAL : public LOperand { - public: - static LDoubleStackSlot* Create(int index, Zone* zone) { - ASSERT(index >= 0); - if (index < kNumCachedOperands) return &cache[index]; - return new(zone) LDoubleStackSlot(index); - } - - static LDoubleStackSlot* cast(LOperand* op) { - ASSERT(op->IsStackSlot()); - return reinterpret_cast(op); - } - - static void SetUpCache(); - static void TearDownCache(); - - private: - static const int kNumCachedOperands = 128; - static LDoubleStackSlot* cache; - - LDoubleStackSlot() : LOperand() { } - explicit LDoubleStackSlot(int index) : LOperand(DOUBLE_STACK_SLOT, index) { } -}; - - -class LRegister V8_FINAL : public LOperand { - public: - static LRegister* Create(int index, Zone* zone) { - ASSERT(index >= 0); - if (index < kNumCachedOperands) return &cache[index]; - return new(zone) LRegister(index); - } - - static LRegister* cast(LOperand* op) { - ASSERT(op->IsRegister()); - return reinterpret_cast(op); - } - - static void SetUpCache(); - static void TearDownCache(); - - private: - static const int kNumCachedOperands = 16; - static LRegister* cache; - - LRegister() : LOperand() { } - explicit LRegister(int index) : LOperand(REGISTER, index) { } -}; - - -class LDoubleRegister V8_FINAL : public LOperand { - public: - static LDoubleRegister* Create(int index, Zone* zone) { - ASSERT(index >= 0); - if (index < kNumCachedOperands) return &cache[index]; - return new(zone) LDoubleRegister(index); - } - - static LDoubleRegister* cast(LOperand* op) { - ASSERT(op->IsDoubleRegister()); - return reinterpret_cast(op); - } - - static void SetUpCache(); - static void TearDownCache(); - - private: - static const int kNumCachedOperands = 16; - static LDoubleRegister* cache; - - LDoubleRegister() : LOperand() { } - explicit LDoubleRegister(int index) : LOperand(DOUBLE_REGISTER, index) { } -}; - - class LParallelMove V8_FINAL : public ZoneObject { public: explicit LParallelMove(Zone* zone) : move_operands_(4, zone) { } diff --git a/src/objects-inl.h b/src/objects-inl.h index 8c919601b4b..f5ed7aafba7 100644 --- a/src/objects-inl.h +++ b/src/objects-inl.h @@ -1427,6 +1427,11 @@ int HeapNumber::get_sign() { } +const char* Float32x4::Name() { + return "float32x4"; +} + + int Float32x4::kRuntimeAllocatorId() { return Runtime::kAllocateFloat32x4; } @@ -1453,6 +1458,11 @@ float Float32x4::getAt(int index) { } +const char* Int32x4::Name() { + return "int32x4"; +} + + int Int32x4::kRuntimeAllocatorId() { return Runtime::kAllocateInt32x4; } diff --git a/src/objects.cc b/src/objects.cc index 1f08b31d489..6e8874db38e 100644 --- a/src/objects.cc +++ b/src/objects.cc @@ -2330,6 +2330,8 @@ const char* Representation::Mnemonic() const { case kTagged: return "t"; case kSmi: return "s"; case kDouble: return "d"; + case kFloat32x4: return "float32x4"; + case kInt32x4: return "int32x44"; case kInteger32: return "i"; case kHeapObject: return "h"; case kExternal: return "x"; @@ -11015,6 +11017,20 @@ void DeoptimizationInputData::DeoptimizationInputDataPrint(FILE* out) { break; } + case Translation::FLOAT32x4_REGISTER: { + int reg_code = iterator.Next(); + PrintF(out, "{input=%s}", + SIMD128Register::AllocationIndexToString(reg_code)); + break; + } + + case Translation::INT32x4_REGISTER: { + int reg_code = iterator.Next(); + PrintF(out, "{input=%s}", + SIMD128Register::AllocationIndexToString(reg_code)); + break; + } + case Translation::STACK_SLOT: { int input_slot_index = iterator.Next(); PrintF(out, "{input=%d}", input_slot_index); @@ -11039,6 +11055,18 @@ void DeoptimizationInputData::DeoptimizationInputDataPrint(FILE* out) { break; } + case Translation::FLOAT32x4_STACK_SLOT: { + int input_slot_index = iterator.Next(); + PrintF(out, "{input=%d}", input_slot_index); + break; + } + + case Translation::INT32x4_STACK_SLOT: { + int input_slot_index = iterator.Next(); + PrintF(out, "{input=%d}", input_slot_index); + break; + } + case Translation::LITERAL: { unsigned literal_index = iterator.Next(); PrintF(out, "{literal_id=%u}", literal_index); diff --git a/src/objects.h b/src/objects.h index 73923b9aca8..e518b3bfe3f 100644 --- a/src/objects.h +++ b/src/objects.h @@ -1972,6 +1972,7 @@ class Float32x4: public HeapObject { static const int kLanes = 4; static const int kValueSize = kFloat32x4Size; static const InstanceType kInstanceType = FLOAT32x4_TYPE; + static inline const char* Name(); static inline int kRuntimeAllocatorId(); static inline int kMapRootIndex(); @@ -2009,6 +2010,7 @@ class Int32x4: public HeapObject { typedef int32x4_value_t value_t; static const int kValueSize = kInt32x4Size; static const InstanceType kInstanceType = INT32x4_TYPE; + static inline const char* Name(); static inline int kRuntimeAllocatorId(); static inline int kMapRootIndex(); diff --git a/src/property-details.h b/src/property-details.h index 99dd1211b24..11d7063455b 100644 --- a/src/property-details.h +++ b/src/property-details.h @@ -148,6 +148,7 @@ class Representation { } if (kind_ == kUInteger8 && other.kind_ == kInteger8) return false; if (kind_ == kUInteger16 && other.kind_ == kInteger16) return false; + if (IsSIMD128() && other.IsSIMD128()) return false; return kind_ > other.kind_; } @@ -189,6 +190,7 @@ class Representation { bool IsDouble() const { return kind_ == kDouble; } bool IsFloat32x4() const { return kind_ == kFloat32x4; } bool IsInt32x4() const { return kind_ == kInt32x4; } + bool IsSIMD128() const { return IsFloat32x4() || IsInt32x4(); } bool IsHeapObject() const { return kind_ == kHeapObject; } bool IsExternal() const { return kind_ == kExternal; } bool IsSpecialization() const { diff --git a/src/x64/assembler-x64.cc b/src/x64/assembler-x64.cc index e7c20bb1508..bc6121a07ef 100644 --- a/src/x64/assembler-x64.cc +++ b/src/x64/assembler-x64.cc @@ -2739,6 +2739,24 @@ void Assembler::movaps(XMMRegister dst, XMMRegister src) { } +void Assembler::movups(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x10); + emit_sse_operand(dst, src); +} + + +void Assembler::movups(const Operand& dst, XMMRegister src) { + EnsureSpace ensure_space(this); + emit_optional_rex_32(src, dst); + emit(0x0F); + emit(0x11); + emit_sse_operand(src, dst); +} + + void Assembler::shufps(XMMRegister dst, XMMRegister src, byte imm8) { ASSERT(is_uint8(imm8)); EnsureSpace ensure_space(this); diff --git a/src/x64/assembler-x64.h b/src/x64/assembler-x64.h index d4a51cd4dd9..c2411ba1477 100644 --- a/src/x64/assembler-x64.h +++ b/src/x64/assembler-x64.h @@ -300,6 +300,7 @@ const XMMRegister xmm15 = { 15 }; typedef XMMRegister DoubleRegister; +typedef XMMRegister SIMD128Register; enum Condition { @@ -1359,6 +1360,8 @@ class Assembler : public AssemblerBase { // SSE instructions void movaps(XMMRegister dst, XMMRegister src); + void movups(XMMRegister dst, const Operand& src); + void movups(const Operand& dst, XMMRegister src); void movss(XMMRegister dst, const Operand& src); void movss(const Operand& dst, XMMRegister src); void shufps(XMMRegister dst, XMMRegister src, byte imm8); diff --git a/src/x64/cpu-x64.cc b/src/x64/cpu-x64.cc index 4fa290a8b5f..427d3b0aad8 100644 --- a/src/x64/cpu-x64.cc +++ b/src/x64/cpu-x64.cc @@ -51,6 +51,11 @@ bool CPU::SupportsCrankshaft() { } +bool CPU::SupportsSIMD128InCrankshaft() { + return true; // Yay! +} + + void CPU::FlushICache(void* start, size_t size) { // No need to flush the instruction cache on Intel. On Intel instruction // cache flushing is only necessary when multiple cores running the same diff --git a/src/x64/deoptimizer-x64.cc b/src/x64/deoptimizer-x64.cc index fd26cf5265e..8eadb6f4ae9 100644 --- a/src/x64/deoptimizer-x64.cc +++ b/src/x64/deoptimizer-x64.cc @@ -91,8 +91,9 @@ void Deoptimizer::FillInputFrame(Address tos, JavaScriptFrame* frame) { } input_->SetRegister(rsp.code(), reinterpret_cast(frame->sp())); input_->SetRegister(rbp.code(), reinterpret_cast(frame->fp())); + simd128_value_t zero = {{0.0, 0.0}}; for (int i = 0; i < DoubleRegister::NumAllocatableRegisters(); i++) { - input_->SetDoubleRegister(i, 0.0); + input_->SetSIMD128Register(i, zero); } // Fill the frame content from the actual data on the frame. @@ -112,10 +113,10 @@ void Deoptimizer::SetPlatformCompiledStubRegisters( } -void Deoptimizer::CopyDoubleRegisters(FrameDescription* output_frame) { +void Deoptimizer::CopySIMD128Registers(FrameDescription* output_frame) { for (int i = 0; i < XMMRegister::NumAllocatableRegisters(); ++i) { - double double_value = input_->GetDoubleRegister(i); - output_frame->SetDoubleRegister(i, double_value); + simd128_value_t xmm_value = input_->GetSIMD128Register(i); + output_frame->SetSIMD128Register(i, xmm_value); } } @@ -139,14 +140,14 @@ void Deoptimizer::EntryGenerator::Generate() { // Save all general purpose registers before messing with them. const int kNumberOfRegisters = Register::kNumRegisters; - const int kDoubleRegsSize = kDoubleSize * + const int kXMMRegsSize = kSIMD128Size * XMMRegister::NumAllocatableRegisters(); - __ subq(rsp, Immediate(kDoubleRegsSize)); + __ subq(rsp, Immediate(kXMMRegsSize)); for (int i = 0; i < XMMRegister::NumAllocatableRegisters(); ++i) { XMMRegister xmm_reg = XMMRegister::FromAllocationIndex(i); - int offset = i * kDoubleSize; - __ movsd(Operand(rsp, offset), xmm_reg); + int offset = i * kSIMD128Size; + __ movups(Operand(rsp, offset), xmm_reg); } // We push all registers onto the stack, even though we do not need @@ -157,7 +158,7 @@ void Deoptimizer::EntryGenerator::Generate() { } const int kSavedRegistersAreaSize = kNumberOfRegisters * kRegisterSize + - kDoubleRegsSize; + kXMMRegsSize; // We use this to keep the value of the fifth argument temporarily. // Unfortunately we can't store it directly in r8 (used for passing @@ -207,11 +208,13 @@ void Deoptimizer::EntryGenerator::Generate() { __ pop(Operand(rbx, offset)); } - // Fill in the double input registers. - int double_regs_offset = FrameDescription::double_registers_offset(); + // Fill in the xmm input registers. + STATIC_ASSERT(kSIMD128Size == 2 * kDoubleSize); + int xmm_regs_offset = FrameDescription::simd128_registers_offset(); for (int i = 0; i < XMMRegister::NumAllocatableRegisters(); i++) { - int dst_offset = i * kDoubleSize + double_regs_offset; + int dst_offset = i * kSIMD128Size + xmm_regs_offset; __ pop(Operand(rbx, dst_offset)); + __ pop(Operand(rbx, dst_offset + kDoubleSize)); } // Remove the bailout id and return address from the stack. @@ -275,8 +278,8 @@ void Deoptimizer::EntryGenerator::Generate() { for (int i = 0; i < XMMRegister::NumAllocatableRegisters(); ++i) { XMMRegister xmm_reg = XMMRegister::FromAllocationIndex(i); - int src_offset = i * kDoubleSize + double_regs_offset; - __ movsd(xmm_reg, Operand(rbx, src_offset)); + int src_offset = i * kSIMD128Size + xmm_regs_offset; + __ movups(xmm_reg, Operand(rbx, src_offset)); } // Push state, pc, and continuation from the last output frame. @@ -335,6 +338,18 @@ void FrameDescription::SetCallerFp(unsigned offset, intptr_t value) { } +double FrameDescription::GetDoubleRegister(unsigned n) const { + ASSERT(n < ARRAY_SIZE(simd128_registers_)); + return simd128_registers_[n].d[0]; +} + + +void FrameDescription::SetDoubleRegister(unsigned n, double value) { + ASSERT(n < ARRAY_SIZE(simd128_registers_)); + simd128_registers_[n].d[0] = value; +} + + #undef __ diff --git a/src/x64/disasm-x64.cc b/src/x64/disasm-x64.cc index 76b541c0100..0f6b9971c2f 100644 --- a/src/x64/disasm-x64.cc +++ b/src/x64/disasm-x64.cc @@ -1249,6 +1249,21 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) { current += PrintRightXMMOperand(current); AppendToBuffer(",%s", NameOfXMMRegister(regop)); + } else if (opcode == 0x10) { + // movups xmm, xmm/m128 + int mod, regop, rm; + get_modrm(*current, &mod, ®op, &rm); + AppendToBuffer("movups %s, ", NameOfXMMRegister(regop)); + current += PrintRightXMMOperand(current); + + } else if (opcode == 0x11) { + // movups xmm/m128, xmm + int mod, regop, rm; + get_modrm(*current, &mod, ®op, &rm); + AppendToBuffer("movups "); + current += PrintRightXMMOperand(current); + AppendToBuffer(", %s", NameOfXMMRegister(regop)); + } else if (opcode == 0xA2) { // CPUID AppendToBuffer("%s", mnemonic); diff --git a/src/x64/lithium-codegen-x64.cc b/src/x64/lithium-codegen-x64.cc index d25971af7ee..c0f14e1fff9 100644 --- a/src/x64/lithium-codegen-x64.cc +++ b/src/x64/lithium-codegen-x64.cc @@ -385,6 +385,11 @@ XMMRegister LCodeGen::ToDoubleRegister(int index) const { } +XMMRegister LCodeGen::ToSIMD128Register(int index) const { + return XMMRegister::FromAllocationIndex(index); +} + + Register LCodeGen::ToRegister(LOperand* op) const { ASSERT(op->IsRegister()); return ToRegister(op->index()); @@ -397,6 +402,24 @@ XMMRegister LCodeGen::ToDoubleRegister(LOperand* op) const { } +XMMRegister LCodeGen::ToFloat32x4Register(LOperand* op) const { + ASSERT(op->IsFloat32x4Register()); + return ToSIMD128Register(op->index()); +} + + +XMMRegister LCodeGen::ToInt32x4Register(LOperand* op) const { + ASSERT(op->IsInt32x4Register()); + return ToSIMD128Register(op->index()); +} + + +XMMRegister LCodeGen::ToSIMD128Register(LOperand* op) const { + ASSERT(op->IsFloat32x4Register() || op->IsInt32x4Register()); + return ToSIMD128Register(op->index()); +} + + bool LCodeGen::IsInteger32Constant(LConstantOperand* op) const { return op->IsConstantOperand() && chunk_->LookupLiteralRepresentation(op).IsSmiOrInteger32(); @@ -457,7 +480,8 @@ static int ArgumentsOffsetWithoutFrame(int index) { Operand LCodeGen::ToOperand(LOperand* op) const { // Does not handle registers. In X64 assembler, plain registers are not // representable as an Operand. - ASSERT(op->IsStackSlot() || op->IsDoubleStackSlot()); + ASSERT(op->IsStackSlot() || op->IsDoubleStackSlot() || + op->IsFloat32x4StackSlot() || op->IsInt32x4StackSlot()); if (NeedsEagerFrame()) { return Operand(rbp, StackSlotOffset(op->index())); } else { @@ -570,6 +594,12 @@ void LCodeGen::AddToTranslation(LEnvironment* environment, } } else if (op->IsDoubleStackSlot()) { translation->StoreDoubleStackSlot(op->index()); + } else if (op->IsFloat32x4StackSlot()) { + translation->StoreSIMD128StackSlot(op->index(), + Translation::FLOAT32x4_STACK_SLOT); + } else if (op->IsInt32x4StackSlot()) { + translation->StoreSIMD128StackSlot(op->index(), + Translation::INT32x4_STACK_SLOT); } else if (op->IsArgument()) { ASSERT(is_tagged); int src_index = GetStackSlotCount() + op->index(); @@ -586,6 +616,12 @@ void LCodeGen::AddToTranslation(LEnvironment* environment, } else if (op->IsDoubleRegister()) { XMMRegister reg = ToDoubleRegister(op); translation->StoreDoubleRegister(reg); + } else if (op->IsFloat32x4Register()) { + XMMRegister reg = ToFloat32x4Register(op); + translation->StoreSIMD128Register(reg, Translation::FLOAT32x4_REGISTER); + } else if (op->IsInt32x4Register()) { + XMMRegister reg = ToInt32x4Register(op); + translation->StoreSIMD128Register(reg, Translation::INT32x4_REGISTER); } else if (op->IsConstantOperand()) { HConstant* constant = chunk()->LookupConstant(LConstantOperand::cast(op)); int src_index = DefineDeoptimizationLiteral(constant->handle(isolate())); @@ -2936,26 +2972,6 @@ void LCodeGen::DoAccessArgumentsAt(LAccessArgumentsAt* instr) { } -void LCodeGen::DoDeferredSIMD128ToTagged(LInstruction* instr, - Runtime::FunctionId id) { - // TODO(3095996): Get rid of this. For now, we need to make the - // result register contain a valid pointer because it is already - // contained in the register pointer map. - Register reg = ToRegister(instr->result()); - __ Move(reg, Smi::FromInt(0)); - - { - PushSafepointRegistersScope scope(this); - __ movp(rsi, Operand(rbp, StandardFrameConstants::kContextOffset)); - __ CallRuntimeSaveDoubles(id); - RecordSafepointWithRegisters( - instr->pointer_map(), 0, Safepoint::kNoLazyDeopt); - __ movp(kScratchRegister, rax); - } - __ movp(reg, kScratchRegister); -} - - void LCodeGen::HandleExternalArrayOpRequiresPreScale( LOperand* key, ElementsKind elements_kind) { @@ -2968,62 +2984,6 @@ void LCodeGen::HandleExternalArrayOpRequiresPreScale( } -template -void LCodeGen::DoLoadKeyedSIMD128ExternalArray(LLoadKeyed* instr) { - class DeferredSIMD128ToTagged V8_FINAL : public LDeferredCode { - public: - DeferredSIMD128ToTagged(LCodeGen* codegen, - LInstruction* instr, - Runtime::FunctionId id) - : LDeferredCode(codegen), instr_(instr), id_(id) { } - virtual void Generate() V8_OVERRIDE { - codegen()->DoDeferredSIMD128ToTagged(instr_, id_); - } - virtual LInstruction* instr() V8_OVERRIDE { return instr_; } - private: - LInstruction* instr_; - Runtime::FunctionId id_; - }; - - // Pre scale key if necessary. - LOperand* key = instr->key(); - ElementsKind elements_kind = instr->elements_kind(); - if (!key->IsConstantOperand()) { - HandleExternalArrayOpRequiresPreScale(key, elements_kind); - } - - // Allocate a SIMD128 object on the heap. - Register reg = ToRegister(instr->result()); - Register tmp = ToRegister(instr->temp()); - DeferredSIMD128ToTagged* deferred = - new(zone()) DeferredSIMD128ToTagged(this, instr, - static_cast(T::kRuntimeAllocatorId())); - if (FLAG_inline_new) { - __ AllocateSIMDHeapObject(T::kSize, reg, tmp, deferred->entry(), - static_cast(T::kMapRootIndex())); - } else { - __ jmp(deferred->entry()); - } - __ bind(deferred->exit()); - - // Copy the SIMD128 value from the external array to the heap object. - STATIC_ASSERT(T::kValueSize % kPointerSize == 0); - int base_offset = instr->is_fixed_typed_array() - ? FixedTypedArrayBase::kDataOffset - kHeapObjectTag - : 0; - for (int offset = 0; offset < T::kValueSize; offset += kPointerSize) { - Operand operand(BuildFastArrayOperand( - instr->elements(), - key, - elements_kind, - base_offset + offset, - instr->additional_index())); - __ movp(tmp, operand); - __ movp(FieldOperand(reg, T::kValueOffset + offset), tmp); - } -} - - void LCodeGen::DoLoadKeyedExternalArray(LLoadKeyed* instr) { ElementsKind elements_kind = instr->elements_kind(); LOperand* key = instr->key(); @@ -3039,6 +2999,8 @@ void LCodeGen::DoLoadKeyedExternalArray(LLoadKeyed* instr) { // and the dehoisted address computation happens in 64 bits __ movsxlq(key_reg, key_reg); } + + HandleExternalArrayOpRequiresPreScale(key, elements_kind); } int base_offset = instr->is_fixed_typed_array() ? FixedTypedArrayBase::kDataOffset - kHeapObjectTag @@ -3058,10 +3020,8 @@ void LCodeGen::DoLoadKeyedExternalArray(LLoadKeyed* instr) { } else if (elements_kind == EXTERNAL_FLOAT64_ELEMENTS || elements_kind == FLOAT64_ELEMENTS) { __ movsd(ToDoubleRegister(instr->result()), operand); - } else if (IsFloat32x4ElementsKind(elements_kind)) { - DoLoadKeyedSIMD128ExternalArray(instr); - } else if (IsInt32x4ElementsKind(elements_kind)) { - DoLoadKeyedSIMD128ExternalArray(instr); + } else if (IsSIMD128ElementsKind(elements_kind)) { + __ movups(ToSIMD128Register(instr->result()), operand); } else { Register result(ToRegister(instr->result())); switch (elements_kind) { @@ -3233,6 +3193,7 @@ Operand LCodeGen::BuildFastArrayOperand( if (constant_value & 0xF0000000) { Abort(kArrayIndexConstantValueTooBig); } + return Operand(elements_pointer_reg, ((constant_value + additional_index) << shift_size) + offset); @@ -4228,42 +4189,6 @@ void LCodeGen::DoBoundsCheck(LBoundsCheck* instr) { } -template -void LCodeGen::DoStoreKeyedSIMD128ExternalArray(LStoreKeyed* instr) { - ASSERT(instr->value()->IsRegister()); - Register input_reg = ToRegister(instr->value()); - Condition cc = masm()->CheckSmi(input_reg); - DeoptimizeIf(cc, instr->environment()); - __ CompareRoot(FieldOperand(input_reg, HeapObject::kMapOffset), - static_cast(T::kMapRootIndex())); - DeoptimizeIf(not_equal, instr->environment()); - - // Pre scale key if necessary. - LOperand* key = instr->key(); - ElementsKind elements_kind = instr->elements_kind(); - if (!key->IsConstantOperand()) { - HandleExternalArrayOpRequiresPreScale(key, elements_kind); - } - - // Copy the SIMD128 value from the heap object to the external array. - STATIC_ASSERT(T::kValueSize % kPointerSize == 0); - int base_offset = instr->is_fixed_typed_array() - ? FixedTypedArrayBase::kDataOffset - kHeapObjectTag - : 0; - for (int offset = 0; offset < T::kValueSize; offset += kPointerSize) { - Operand operand(BuildFastArrayOperand( - instr->elements(), - key, - elements_kind, - base_offset + offset, - instr->additional_index())); - __ movp(kScratchRegister, - FieldOperand(input_reg, T::kValueOffset + offset)); - __ movp(operand, kScratchRegister); - } -} - - void LCodeGen::DoStoreKeyedExternalArray(LStoreKeyed* instr) { ElementsKind elements_kind = instr->elements_kind(); LOperand* key = instr->key(); @@ -4279,6 +4204,8 @@ void LCodeGen::DoStoreKeyedExternalArray(LStoreKeyed* instr) { // and the dehoisted address computation happens in 64 bits __ movsxlq(key_reg, key_reg); } + + HandleExternalArrayOpRequiresPreScale(key, elements_kind); } int base_offset = instr->is_fixed_typed_array() ? FixedTypedArrayBase::kDataOffset - kHeapObjectTag @@ -4298,10 +4225,8 @@ void LCodeGen::DoStoreKeyedExternalArray(LStoreKeyed* instr) { } else if (elements_kind == EXTERNAL_FLOAT64_ELEMENTS || elements_kind == FLOAT64_ELEMENTS) { __ movsd(operand, ToDoubleRegister(instr->value())); - } else if (IsFloat32x4ElementsKind(elements_kind)) { - DoStoreKeyedSIMD128ExternalArray(instr); - } else if (IsInt32x4ElementsKind(elements_kind)) { - DoStoreKeyedSIMD128ExternalArray(instr); + } else if (IsSIMD128ElementsKind(elements_kind)) { + __ movups(operand, ToSIMD128Register(instr->value())); } else { Register value(ToRegister(instr->value())); switch (elements_kind) { @@ -4835,6 +4760,71 @@ void LCodeGen::DoDeferredNumberTagD(LNumberTagD* instr) { } +void LCodeGen::DoDeferredSIMD128ToTagged(LSIMD128ToTagged* instr, + Runtime::FunctionId id) { + // TODO(3095996): Get rid of this. For now, we need to make the + // result register contain a valid pointer because it is already + // contained in the register pointer map. + Register reg = ToRegister(instr->result()); + __ Move(reg, Smi::FromInt(0)); + + { + PushSafepointRegistersScope scope(this); + __ movp(rsi, Operand(rbp, StandardFrameConstants::kContextOffset)); + __ CallRuntimeSaveDoubles(id); + RecordSafepointWithRegisters( + instr->pointer_map(), 0, Safepoint::kNoLazyDeopt); + __ movp(kScratchRegister, rax); + } + __ movp(reg, kScratchRegister); +} + + +template +void LCodeGen::HandleSIMD128ToTagged(LSIMD128ToTagged* instr) { + class DeferredSIMD128ToTagged V8_FINAL : public LDeferredCode { + public: + DeferredSIMD128ToTagged(LCodeGen* codegen, + LSIMD128ToTagged* instr, + Runtime::FunctionId id) + : LDeferredCode(codegen), instr_(instr), id_(id) { } + virtual void Generate() V8_OVERRIDE { + codegen()->DoDeferredSIMD128ToTagged(instr_, id_); + } + virtual LInstruction* instr() V8_OVERRIDE { return instr_; } + private: + LSIMD128ToTagged* instr_; + Runtime::FunctionId id_; + }; + + XMMRegister input_reg = ToSIMD128Register(instr->value()); + Register reg = ToRegister(instr->result()); + Register tmp = ToRegister(instr->temp()); + + DeferredSIMD128ToTagged* deferred = + new(zone()) DeferredSIMD128ToTagged(this, instr, + static_cast(T::kRuntimeAllocatorId())); + if (FLAG_inline_new) { + __ AllocateSIMDHeapObject(T::kSize, reg, tmp, deferred->entry(), + static_cast(T::kMapRootIndex())); + } else { + __ jmp(deferred->entry()); + } + __ bind(deferred->exit()); + __ movups(FieldOperand(reg, T::kValueOffset), input_reg); +} + + +void LCodeGen::DoSIMD128ToTagged(LSIMD128ToTagged* instr) { + if (instr->value()->IsFloat32x4Register()) { + HandleSIMD128ToTagged(instr); + } else { + ASSERT(instr->value()->IsInt32x4Register()); + HandleSIMD128ToTagged(instr); + } +} + + void LCodeGen::DoSmiTag(LSmiTag* instr) { ASSERT(instr->value()->Equals(instr->result())); Register input = ToRegister(instr->value()); @@ -5012,6 +5002,35 @@ void LCodeGen::DoNumberUntagD(LNumberUntagD* instr) { } +template +void LCodeGen::HandleTaggedToSIMD128(LTaggedToSIMD128* instr) { + LOperand* input = instr->value(); + ASSERT(input->IsRegister()); + LOperand* result = instr->result(); + ASSERT(result->IsSIMD128Register()); + + Register input_reg = ToRegister(input); + XMMRegister result_reg = ToSIMD128Register(result); + + Condition cc = masm()->CheckSmi(input_reg); + DeoptimizeIf(cc, instr->environment()); + __ CompareRoot(FieldOperand(input_reg, HeapObject::kMapOffset), + static_cast(T::kMapRootIndex())); + DeoptimizeIf(not_equal, instr->environment()); + __ movups(result_reg, FieldOperand(input_reg, T::kValueOffset)); +} + + +void LCodeGen::DoTaggedToSIMD128(LTaggedToSIMD128* instr) { + if (instr->representation().IsFloat32x4()) { + HandleTaggedToSIMD128(instr); + } else { + ASSERT(instr->representation().IsInt32x4()); + HandleTaggedToSIMD128(instr); + } +} + + void LCodeGen::DoDoubleToI(LDoubleToI* instr) { LOperand* input = instr->value(); ASSERT(input->IsDoubleRegister()); diff --git a/src/x64/lithium-codegen-x64.h b/src/x64/lithium-codegen-x64.h index 0c20c4b2026..db2f15bf3c5 100644 --- a/src/x64/lithium-codegen-x64.h +++ b/src/x64/lithium-codegen-x64.h @@ -85,6 +85,9 @@ class LCodeGen: public LCodeGenBase { // Support for converting LOperands to assembler types. Register ToRegister(LOperand* op) const; XMMRegister ToDoubleRegister(LOperand* op) const; + XMMRegister ToFloat32x4Register(LOperand* op) const; + XMMRegister ToInt32x4Register(LOperand* op) const; + XMMRegister ToSIMD128Register(LOperand* op) const; bool IsInteger32Constant(LConstantOperand* op) const; bool IsSmiConstant(LConstantOperand* op) const; int32_t ToInteger32(LConstantOperand* op) const; @@ -116,7 +119,13 @@ class LCodeGen: public LCodeGenBase { void DoDeferredInstanceOfKnownGlobal(LInstanceOfKnownGlobal* instr, Label* map_check); void DoDeferredInstanceMigration(LCheckMaps* instr, Register object); - void DoDeferredSIMD128ToTagged(LInstruction* instr, Runtime::FunctionId id); + void DoDeferredSIMD128ToTagged(LSIMD128ToTagged* instr, + Runtime::FunctionId id); + + template + void HandleTaggedToSIMD128(LTaggedToSIMD128* instr); + template + void HandleSIMD128ToTagged(LSIMD128ToTagged* instr); // Parallel move support. void DoParallelMove(LParallelMove* move); @@ -245,6 +254,7 @@ class LCodeGen: public LCodeGenBase { Register ToRegister(int index) const; XMMRegister ToDoubleRegister(int index) const; + XMMRegister ToSIMD128Register(int index) const; Operand BuildFastArrayOperand( LOperand* elements_pointer, LOperand* key, @@ -327,13 +337,9 @@ class LCodeGen: public LCodeGenBase { void DoLoadKeyedExternalArray(LLoadKeyed* instr); void HandleExternalArrayOpRequiresPreScale(LOperand* key, ElementsKind elements_kind); - template - void DoLoadKeyedSIMD128ExternalArray(LLoadKeyed* instr); void DoLoadKeyedFixedDoubleArray(LLoadKeyed* instr); void DoLoadKeyedFixedArray(LLoadKeyed* instr); void DoStoreKeyedExternalArray(LStoreKeyed* instr); - template - void DoStoreKeyedSIMD128ExternalArray(LStoreKeyed* instr); void DoStoreKeyedFixedDoubleArray(LStoreKeyed* instr); void DoStoreKeyedFixedArray(LStoreKeyed* instr); #ifdef _MSC_VER diff --git a/src/x64/lithium-gap-resolver-x64.cc b/src/x64/lithium-gap-resolver-x64.cc index 5b4e32d2c44..f1c112a699a 100644 --- a/src/x64/lithium-gap-resolver-x64.cc +++ b/src/x64/lithium-gap-resolver-x64.cc @@ -244,6 +244,23 @@ void LGapResolver::EmitMove(int index) { __ movsd(xmm0, src); __ movsd(cgen_->ToOperand(destination), xmm0); } + } else if (source->IsSIMD128Register()) { + XMMRegister src = cgen_->ToSIMD128Register(source); + if (destination->IsSIMD128Register()) { + __ movaps(cgen_->ToSIMD128Register(destination), src); + } else { + ASSERT(destination->IsSIMD128StackSlot()); + __ movups(cgen_->ToOperand(destination), src); + } + } else if (source->IsSIMD128StackSlot()) { + Operand src = cgen_->ToOperand(source); + if (destination->IsSIMD128Register()) { + __ movups(cgen_->ToSIMD128Register(destination), src); + } else { + ASSERT(destination->IsSIMD128StackSlot()); + __ movups(xmm0, src); + __ movups(cgen_->ToOperand(destination), xmm0); + } } else { UNREACHABLE(); } @@ -285,6 +302,19 @@ void LGapResolver::EmitSwap(int index) { __ movsd(dst, xmm0); __ movp(src, kScratchRegister); + } else if ((source->IsSIMD128StackSlot() && + destination->IsSIMD128StackSlot())) { + // Swap two XMM stack slots. + STATIC_ASSERT(kSIMD128Size == 2 * kDoubleSize); + Operand src = cgen_->ToOperand(source); + Operand dst = cgen_->ToOperand(destination); + __ movups(xmm0, src); + __ movq(kScratchRegister, dst); + __ movq(src, kScratchRegister); + __ movq(kScratchRegister, Operand(dst, kDoubleSize)); + __ movq(Operand(src, kDoubleSize), kScratchRegister); + __ movups(dst, xmm0); + } else if (source->IsDoubleRegister() && destination->IsDoubleRegister()) { // Swap two double registers. XMMRegister source_reg = cgen_->ToDoubleRegister(source); @@ -293,6 +323,14 @@ void LGapResolver::EmitSwap(int index) { __ movaps(source_reg, destination_reg); __ movaps(destination_reg, xmm0); + } else if (source->IsSIMD128Register() && destination->IsSIMD128Register()) { + // Swap two XMM registers. + XMMRegister source_reg = cgen_->ToSIMD128Register(source); + XMMRegister destination_reg = cgen_->ToSIMD128Register(destination); + __ movaps(xmm0, source_reg); + __ movaps(source_reg, destination_reg); + __ movaps(destination_reg, xmm0); + } else if (source->IsDoubleRegister() || destination->IsDoubleRegister()) { // Swap a double register and a double stack slot. ASSERT((source->IsDoubleRegister() && destination->IsDoubleStackSlot()) || @@ -307,6 +345,22 @@ void LGapResolver::EmitSwap(int index) { __ movsd(other_operand, reg); __ movaps(reg, xmm0); + } else if (source->IsSIMD128Register() || destination->IsSIMD128Register()) { + // Swap a xmm register and a xmm stack slot. + ASSERT((source->IsSIMD128Register() && + destination->IsSIMD128StackSlot()) || + (source->IsSIMD128StackSlot() && + destination->IsSIMD128Register())); + XMMRegister reg = cgen_->ToSIMD128Register(source->IsSIMD128Register() + ? source + : destination); + LOperand* other = source->IsSIMD128Register() ? destination : source; + ASSERT(other->IsSIMD128StackSlot()); + Operand other_operand = cgen_->ToOperand(other); + __ movups(xmm0, other_operand); + __ movups(other_operand, reg); + __ movaps(reg, xmm0); + } else { // No other combinations are possible. UNREACHABLE(); diff --git a/src/x64/lithium-x64.cc b/src/x64/lithium-x64.cc index a0e94d1738e..ba5860c6379 100644 --- a/src/x64/lithium-x64.cc +++ b/src/x64/lithium-x64.cc @@ -344,6 +344,19 @@ void LAccessArgumentsAt::PrintDataTo(StringStream* stream) { int LPlatformChunk::GetNextSpillIndex(RegisterKind kind) { + switch (kind) { + case GENERAL_REGISTERS: return spill_slot_count_++; + case DOUBLE_REGISTERS: return spill_slot_count_++; + case FLOAT32x4_REGISTERS: + case INT32x4_REGISTERS: { + spill_slot_count_++; + return spill_slot_count_++; + } + default: + UNREACHABLE(); + return -1; + } + return spill_slot_count_++; } @@ -353,11 +366,14 @@ LOperand* LPlatformChunk::GetNextSpillSlot(RegisterKind kind) { // Alternatively, at some point, start using half-size // stack slots for int32 values. int index = GetNextSpillIndex(kind); - if (kind == DOUBLE_REGISTERS) { - return LDoubleStackSlot::Create(index, zone()); - } else { - ASSERT(kind == GENERAL_REGISTERS); - return LStackSlot::Create(index, zone()); + switch (kind) { + case GENERAL_REGISTERS: return LStackSlot::Create(index, zone()); + case DOUBLE_REGISTERS: return LDoubleStackSlot::Create(index, zone()); + case FLOAT32x4_REGISTERS: return LFloat32x4StackSlot::Create(index, zone()); + case INT32x4_REGISTERS: return LInt32x4StackSlot::Create(index, zone()); + default: + UNREACHABLE(); + return NULL; } } @@ -1726,6 +1742,10 @@ LInstruction* LChunkBuilder::DoChange(HChange* instr) { LOperand* value = UseRegister(instr->value()); LNumberUntagD* res = new(zone()) LNumberUntagD(value); return AssignEnvironment(DefineAsRegister(res)); + } else if (to.IsSIMD128()) { + LOperand* value = UseRegister(instr->value()); + LTaggedToSIMD128* res = new(zone()) LTaggedToSIMD128(value, to); + return AssignEnvironment(DefineAsRegister(res)); } else if (to.IsSmi()) { HValue* val = instr->value(); LOperand* value = UseRegister(val); @@ -1809,6 +1829,16 @@ LInstruction* LChunkBuilder::DoChange(HChange* instr) { return DefineAsRegister(new(zone()) LInteger32ToDouble(value)); } } + } else if (from.IsSIMD128()) { + ASSERT(to.IsTagged()); + info()->MarkAsDeferredCalling(); + LOperand* value = UseRegister(instr->value()); + LOperand* temp = TempRegister(); + + // Make sure that temp and result_temp are different registers. + LUnallocated* result_temp = TempRegister(); + LSIMD128ToTagged* result = new(zone()) LSIMD128ToTagged(value, temp); + return AssignPointerMap(Define(result, result_temp)); } UNREACHABLE(); return NULL; @@ -2007,25 +2037,21 @@ LInstruction* LChunkBuilder::DoLoadKeyed(HLoadKeyed* instr) { : UseRegisterOrConstantAtStart(instr->key()); LLoadKeyed* result = NULL; - bool load_128bits_without_sse2 = IsSIMD128ElementsKind(elements_kind); if (!instr->is_typed_elements()) { LOperand* obj = UseRegisterAtStart(instr->elements()); - result = new(zone()) LLoadKeyed(obj, key, NULL); + result = new(zone()) LLoadKeyed(obj, key); } else { ASSERT( (instr->representation().IsInteger32() && !(IsDoubleOrFloatElementsKind(instr->elements_kind()))) || (instr->representation().IsDouble() && (IsDoubleOrFloatElementsKind(instr->elements_kind()))) || - (instr->representation().IsTagged() && - (IsSIMD128ElementsKind(instr->elements_kind())))); + (instr->representation().IsFloat32x4() && + IsFloat32x4ElementsKind(instr->elements_kind())) || + (instr->representation().IsInt32x4() && + IsInt32x4ElementsKind(instr->elements_kind()))); LOperand* backing_store = UseRegister(instr->elements()); - result = new(zone()) LLoadKeyed(backing_store, key, - load_128bits_without_sse2 ? TempRegister() : NULL); - if (load_128bits_without_sse2) { - info()->MarkAsDeferredCalling(); - AssignPointerMap(result); - } + result = new(zone()) LLoadKeyed(backing_store, key); } DefineAsRegister(result); @@ -2086,8 +2112,10 @@ LInstruction* LChunkBuilder::DoStoreKeyed(HStoreKeyed* instr) { !IsDoubleOrFloatElementsKind(elements_kind)) || (instr->value()->representation().IsDouble() && IsDoubleOrFloatElementsKind(elements_kind)) || - (instr->value()->representation().IsTagged() && - IsSIMD128ElementsKind(elements_kind))); + (instr->value()->representation().IsFloat32x4() && + IsFloat32x4ElementsKind(elements_kind)) || + (instr->value()->representation().IsInt32x4() && + IsInt32x4ElementsKind(elements_kind))); ASSERT((instr->is_fixed_typed_array() && instr->elements()->representation().IsTagged()) || (instr->is_external() && @@ -2103,9 +2131,7 @@ LInstruction* LChunkBuilder::DoStoreKeyed(HStoreKeyed* instr) { ? UseTempRegisterOrConstant(instr->key()) : UseRegisterOrConstantAtStart(instr->key()); LOperand* backing_store = UseRegister(instr->elements()); - LStoreKeyed* result = new(zone()) LStoreKeyed(backing_store, key, val); - bool store_128bits_without_sse2 = IsSIMD128ElementsKind(elements_kind); - return store_128bits_without_sse2 ? AssignEnvironment(result) : result; + return new(zone()) LStoreKeyed(backing_store, key, val); } diff --git a/src/x64/lithium-x64.h b/src/x64/lithium-x64.h index e628243e6fd..c084d06322d 100644 --- a/src/x64/lithium-x64.h +++ b/src/x64/lithium-x64.h @@ -139,9 +139,11 @@ class LCodeGen; V(ModI) \ V(MulI) \ V(NumberTagD) \ + V(SIMD128ToTagged) \ V(NumberTagI) \ V(NumberTagU) \ V(NumberUntagD) \ + V(TaggedToSIMD128) \ V(OsrEntry) \ V(OuterContext) \ V(Parameter) \ @@ -1526,12 +1528,11 @@ class LLoadExternalArrayPointer V8_FINAL }; -class LLoadKeyed V8_FINAL : public LTemplateInstruction<1, 2, 1> { +class LLoadKeyed V8_FINAL : public LTemplateInstruction<1, 2, 0> { public: - LLoadKeyed(LOperand* elements, LOperand* key, LOperand* temp) { + LLoadKeyed(LOperand* elements, LOperand* key) { inputs_[0] = elements; inputs_[1] = key; - temps_[0] = temp; } DECLARE_CONCRETE_INSTRUCTION(LoadKeyed, "load-keyed") @@ -1548,7 +1549,6 @@ class LLoadKeyed V8_FINAL : public LTemplateInstruction<1, 2, 1> { } LOperand* elements() { return inputs_[0]; } LOperand* key() { return inputs_[1]; } - LOperand* temp() { return temps_[0]; } virtual void PrintDataTo(StringStream* stream) V8_OVERRIDE; uint32_t additional_index() const { return hydrogen()->index_offset(); } ElementsKind elements_kind() const { @@ -2011,6 +2011,21 @@ class LNumberTagD V8_FINAL : public LTemplateInstruction<1, 1, 1> { }; +class LSIMD128ToTagged V8_FINAL : public LTemplateInstruction<1, 1, 1> { + public: + explicit LSIMD128ToTagged(LOperand* value, LOperand* temp) { + inputs_[0] = value; + temps_[0] = temp; + } + + LOperand* value() { return inputs_[0]; } + LOperand* temp() { return temps_[0]; } + + DECLARE_CONCRETE_INSTRUCTION(SIMD128ToTagged, "simd128-tag") + DECLARE_HYDROGEN_ACCESSOR(Change) +}; + + // Sometimes truncating conversion from a tagged value to an int32. class LDoubleToI V8_FINAL : public LTemplateInstruction<1, 1, 0> { public: @@ -2083,6 +2098,23 @@ class LNumberUntagD V8_FINAL : public LTemplateInstruction<1, 1, 0> { }; +class LTaggedToSIMD128 V8_FINAL : public LTemplateInstruction<1, 1, 0> { + public: + explicit LTaggedToSIMD128(LOperand* value, Representation representation) + : representation_(representation) { + inputs_[0] = value; + } + + LOperand* value() { return inputs_[0]; } + Representation representation() const { return representation_; } + + DECLARE_CONCRETE_INSTRUCTION(TaggedToSIMD128, "simd128-untag") + DECLARE_HYDROGEN_ACCESSOR(Change); + private: + Representation representation_; +}; + + class LSmiUntag V8_FINAL : public LTemplateInstruction<1, 1, 0> { public: LSmiUntag(LOperand* value, bool needs_check) diff --git a/src/x64/macro-assembler-x64.cc b/src/x64/macro-assembler-x64.cc index a11a6a5cb3f..66ef3f027d7 100644 --- a/src/x64/macro-assembler-x64.cc +++ b/src/x64/macro-assembler-x64.cc @@ -898,10 +898,10 @@ void MacroAssembler::PushCallerSaved(SaveFPRegsMode fp_mode, } // R12 to r15 are callee save on all platforms. if (fp_mode == kSaveFPRegs) { - subq(rsp, Immediate(kDoubleSize * XMMRegister::kMaxNumRegisters)); + subq(rsp, Immediate(kSIMD128Size * XMMRegister::kMaxNumRegisters)); for (int i = 0; i < XMMRegister::kMaxNumRegisters; i++) { XMMRegister reg = XMMRegister::from_code(i); - movsd(Operand(rsp, i * kDoubleSize), reg); + movups(Operand(rsp, i * kSIMD128Size), reg); } } } @@ -914,9 +914,9 @@ void MacroAssembler::PopCallerSaved(SaveFPRegsMode fp_mode, if (fp_mode == kSaveFPRegs) { for (int i = 0; i < XMMRegister::kMaxNumRegisters; i++) { XMMRegister reg = XMMRegister::from_code(i); - movsd(reg, Operand(rsp, i * kDoubleSize)); + movups(reg, Operand(rsp, i * kSIMD128Size)); } - addq(rsp, Immediate(kDoubleSize * XMMRegister::kMaxNumRegisters)); + addq(rsp, Immediate(kSIMD128Size * XMMRegister::kMaxNumRegisters)); } for (int i = kNumberOfSavedRegs - 1; i >= 0; i--) { Register reg = saved_regs[i]; @@ -3715,13 +3715,13 @@ void MacroAssembler::EnterExitFrameEpilogue(int arg_stack_space, #endif // Optionally save all XMM registers. if (save_doubles) { - int space = XMMRegister::kMaxNumAllocatableRegisters * kDoubleSize + + int space = XMMRegister::kMaxNumAllocatableRegisters * kSIMD128Size + arg_stack_space * kRegisterSize; subq(rsp, Immediate(space)); int offset = -2 * kPointerSize; for (int i = 0; i < XMMRegister::NumAllocatableRegisters(); i++) { XMMRegister reg = XMMRegister::FromAllocationIndex(i); - movsd(Operand(rbp, offset - ((i + 1) * kDoubleSize)), reg); + movups(Operand(rbp, offset - ((i + 1) * kSIMD128Size)), reg); } } else if (arg_stack_space > 0) { subq(rsp, Immediate(arg_stack_space * kRegisterSize)); @@ -3765,7 +3765,7 @@ void MacroAssembler::LeaveExitFrame(bool save_doubles) { int offset = -2 * kPointerSize; for (int i = 0; i < XMMRegister::NumAllocatableRegisters(); i++) { XMMRegister reg = XMMRegister::FromAllocationIndex(i); - movsd(reg, Operand(rbp, offset - ((i + 1) * kDoubleSize))); + movups(reg, Operand(rbp, offset - ((i + 1) * kSIMD128Size))); } } // Get the return address from the stack and restore the frame pointer. diff --git a/src/x64/macro-assembler-x64.h b/src/x64/macro-assembler-x64.h index eafeba6cc51..2fb86216d85 100644 --- a/src/x64/macro-assembler-x64.h +++ b/src/x64/macro-assembler-x64.h @@ -1146,6 +1146,10 @@ class MacroAssembler: public Assembler { void AllocateHeapNumber(Register result, Register scratch, Label* gc_required); + + // Allocate a float32x4 or int32x4 object in new space with undefined value. + // Returns tagged pointer in result register, or jumps to gc_required if new + // space is full. void AllocateSIMDHeapObject(int size, Register result, Register scratch,