From e138668779b7e868044b5be8dc316d752e5ce566 Mon Sep 17 00:00:00 2001 From: Han Date: Wed, 18 Dec 2019 18:19:30 -0500 Subject: [PATCH] Added interface in Net.cc to configure NIC from application (Config), starting to add perf and other counters such as RAPL for counting power usage. --- src/native/IxgbeDriver.cc | 532 ++++++++++++++++++++++++-------------- src/native/IxgbeDriver.h | 61 +++-- src/native/Msr.h | 2 +- src/native/Net.cc | 8 + src/native/Net.h | 10 +- src/native/NetIcmp.cc | 6 +- src/native/Rapl.cc | 11 + src/native/Rapl.h | 127 +++++++++ src/native/VirtioNet.cc | 2 + src/native/VirtioNet.h | 1 + 10 files changed, 538 insertions(+), 222 deletions(-) create mode 100644 src/native/Rapl.cc create mode 100644 src/native/Rapl.h diff --git a/src/native/IxgbeDriver.cc b/src/native/IxgbeDriver.cc index 961c985a..39ab6f4f 100644 --- a/src/native/IxgbeDriver.cc +++ b/src/native/IxgbeDriver.cc @@ -59,7 +59,7 @@ void ebbrt::IxgbeDriver::Create(pci::Device& dev) { // TODO remove? ebbrt::clock::SleepMilli(200); - ebbrt::kprintf("intel 82599 card initialzed\n"); + ebbrt::kprintf("82599 initialze complete\n"); } const ebbrt::EthernetAddress& ebbrt::IxgbeDriver::GetMacAddress() { @@ -67,50 +67,174 @@ const ebbrt::EthernetAddress& ebbrt::IxgbeDriver::GetMacAddress() { } void ebbrt::IxgbeDriver::DumpStats() { - for (size_t i = 0; i < Cpu::Count(); i++) { - ebbrt::kprintf("Core %d STATS:\n", (int)i); - ebbrt::kprintf("\t num_recv_itrs:%lld\n", ixgmq[i]->stat_num_itr); - ebbrt::kprintf("\t num_send:%lld\n", ixgmq[i]->stat_num_send); - ebbrt::kprintf("\t num_rx_desc_proc:%lld\n", ixgmq[i]->stat_num_rx); - ebbrt::kprintf("\t num_tx_desc_proc:%lld\n", ixgmq[i]->stat_num_tx); - - // reset to 0 - ixgmq[i]->stat_num_itr = 0; - ixgmq[i]->stat_num_send = 0; - ixgmq[i]->stat_num_rx = 0; - ixgmq[i]->stat_num_tx = 0; - - /*if(ixgmq[i]->stat_perf == false) { - ixgmq[i]->perfCycles = ebbrt::perf::PerfCounter(ebbrt::perf::PerfEvent::cycles); - ixgmq[i]->perfCycles.Start(); - ixgmq[i]->perfInst = ebbrt::perf::PerfCounter(ebbrt::perf::PerfEvent::instructions); - ixgmq[i]->perfInst.Start(); - ixgmq[i]->stat_perf =true; + bool printout = false; + uint64_t tins, tcycs, tllc, numr, nums, numtxbytes, numrxbytes; + double ttime, tnrg, twatts; + + tins = tcycs = tllc = numr = nums = numtxbytes = numrxbytes = 0; + ttime = tnrg = twatts = 0.0; + + for(size_t i = 0; i < Cpu::Count(); i++) { + if(ixgmq[i]->stat_init == false) { + ixgmq[i]->stat_init = true; } else { + ixgmq[i]->stat_init = false; + printout = true; + + tcycs += ixgmq[i]->totalCycles; + tins += ixgmq[i]->totalIns; + tllc += ixgmq[i]->totalLLCmisses; + + numr += ixgmq[i]->stat_num_recv; + nums += ixgmq[i]->stat_num_send; + numrxbytes += ixgmq[i]->stat_num_rx_bytes; + numtxbytes += ixgmq[i]->stat_num_tx_bytes; + + //ebbrt::kprintf("DumpStats() Core %u \t cycles:%llu \n", i, ixgmq[i]->totalCycles); + //ebbrt::kprintf("\t instructions:%llu\n", ixgmq[i]->totalIns); + //ebbrt::kprintf("\t llc_misses:%llu\n", ixgmq[i]->totalLLCmisses); + //ebbrt::kprintf("\t num_recv:%lld num_send:%lld num_rx_bytes:%lld num_tx_bytes=%lld\n", i, ixgmq[i]->stat_num_recv, ixgmq[i]->stat_num_send, ixgmq[i]->stat_num_rx_bytes, ixgmq[i]->stat_num_tx_bytes); + + if(i == 0) { + ttime = ixgmq[i]->totalTime; + tnrg = ixgmq[i]->totalNrg; + twatts = tnrg / ttime; + + //ebbrt::kprintf("\t Total Time (s): %.2llf\n", ixgmq[i]->totalTime); + //ebbrt::kprintf("\t Total Energy (j): %.2llf\n", ixgmq[i]->totalNrg); + //ebbrt::kprintf("\t Power (Watts): %.2llf\n", ixgmq[i]->totalNrg/ixgmq[i]->totalTime); + } + ixgmq[i]->perfCycles.Stop(); ixgmq[i]->perfInst.Stop(); - double cyc = static_cast(ixgmq[i]->perfCycles.Read()); - double inst = static_cast(ixgmq[i]->perfInst.Read()); - - ebbrt::kprintf("Core %d PMC:\n", (int)i); - ebbrt::kprintf("\t cycles:%llf \n", cyc); - ebbrt::kprintf("\t instructions:%llf\n", inst); - ebbrt::kprintf("\t ipc: %llf\n", inst/cyc); - ixgmq[i]->stat_perf = false; - }*/ - } + ixgmq[i]->perfLLC_miss.Stop(); + + ixgmq[i]->stat_num_recv = 0; + ixgmq[i]->stat_num_send = 0; + ixgmq[i]->stat_num_rx_bytes = 0; + ixgmq[i]->stat_num_tx_bytes = 0; + + // accumulate counters + /*ixgmq[i]->totalCycles += static_cast(ixgmq[i]->perfCycles.Read()); + ixgmq[i]->totalIns += static_cast(ixgmq[i]->perfInst.Read()); + ixgmq[i]->totalLLCmisses += static_cast(ixgmq[i]->perfLLC_miss.Read()); + + ebbrt::kprintf("DumpStats() Core %u \t cycles:%llu \n", i, ixgmq[i]->totalCycles); + ebbrt::kprintf("\t instructions:%llu\n", ixgmq[i]->totalIns); + ebbrt::kprintf("\t llc_misses:%llu\n", ixgmq[i]->totalLLCmisses); + ebbrt::kprintf("\t num_recv:%lld num_send:%lld num_rx_bytes:%lld num_tx_bytes=%lld\n", i, ixgmq[i]->stat_num_recv, ixgmq[i]->stat_num_send, ixgmq[i]->stat_num_rx_bytes, ixgmq[i]->stat_num_tx_bytes); + + // clear + ixgmq[i]->perfCycles.Clear(); + ixgmq[i]->perfInst.Clear(); + ixgmq[i]->perfLLC_miss.Clear();*/ + } + } + + if(printout) { + ebbrt::kprintf("\t cycles:%llu\n", tcycs); + ebbrt::kprintf("\t instructions:%llu\n", tins); + ebbrt::kprintf("\t IPC:%.2llf\n", (double)tins/tcycs); + ebbrt::kprintf("\t llc_misses:%llu\n", tllc); + ebbrt::kprintf("\t num_recv:%llu\n", numr); + ebbrt::kprintf("\t num_send:%llu\n", nums); + ebbrt::kprintf("\t num_rx_bytes:%llu\n", numrxbytes); + ebbrt::kprintf("\t num_tx_bytes:%lld\n", numtxbytes); + ebbrt::kprintf("\t total_time:%.2llf\n", ttime); + ebbrt::kprintf("\t total_energy:%.2llf\n", tnrg); + ebbrt::kprintf("\t power:%.2llf\n", twatts); + } + + /*uint32_t i = static_cast(Cpu::GetMine()); + + if(ixgmq[i]->stat_perf == false) { + ixgmq[i]->perfCycles = ebbrt::perf::PerfCounter(ebbrt::perf::PerfEvent::cycles); + ixgmq[i]->perfCycles.Start(); + ixgmq[i]->perfInst = ebbrt::perf::PerfCounter(ebbrt::perf::PerfEvent::instructions); + ixgmq[i]->perfInst.Start(); + ixgmq[i]->perfLLC_miss = ebbrt::perf::PerfCounter(ebbrt::perf::PerfEvent::llc_misses); + ixgmq[i]->perfLLC_miss.Start(); + + if(i == 0) { + ixgmq[i]->powerMeter = ebbrt::rapl::RaplCounter(); + ixgmq[i]->powerMeter.Start(); + auto d = ebbrt::clock::Wall::Now().time_since_epoch(); + ixgmq[i]->time_us = std::chrono::duration_cast(d).count(); + } + //ebbrt::kprintf("\t Start Time (us): %llu\n", ixgmq[i]->time_us); + + ixgmq[i]->stat_perf =true; + } + else { + ixgmq[i]->perfCycles.Stop(); + ixgmq[i]->perfInst.Stop(); + ixgmq[i]->perfLLC_miss.Stop(); + + uint64_t cyc = static_cast(ixgmq[i]->perfCycles.Read()); + uint64_t inst = static_cast(ixgmq[i]->perfInst.Read()); + uint64_t llc = static_cast(ixgmq[i]->perfLLC_miss.Read()); + + ebbrt::kprintf("Core %u STATS: num_recv:%lld num_send:%lld num_rx_bytes:%lld num_tx_bytes=%lld\n", i, ixgmq[i]->stat_num_recv, ixgmq[i]->stat_num_send, ixgmq[i]->stat_num_rx_bytes, ixgmq[i]->stat_num_tx_bytes); + ebbrt::kprintf("\t cycles:%llu \n", cyc); + ebbrt::kprintf("\t instructions:%llu\n", inst); + ebbrt::kprintf("\t llc_misses:%llu\n", llc); + ebbrt::kprintf("\t ipc: %.2llf\n", (double)inst/cyc); + + if(i == 0) { + ixgmq[i]->powerMeter = ebbrt::rapl::RaplCounter(); + ixgmq[i]->powerMeter.Stop(); + double energyj = ixgmq[i]->powerMeter.Read(); + ebbrt::kprintf("\t Energy (j): %.2llf\n", energyj); + + auto d = ebbrt::clock::Wall::Now().time_since_epoch(); + uint64_t endt = std::chrono::duration_cast(d).count(); + double totaltime = (double)(endt - (ixgmq[i]->time_us)) / 1000000.0; + + //ebbrt::kprintf("\t End (us): %llu\n", endt); + ebbrt::kprintf("\t TotalTime (s): %.2llf\n", totaltime); + ebbrt::kprintf("\t Power (Watts): %.2llf\n", energyj/totaltime); + } + ixgmq[i]->stat_perf = false; + }*/ +} + +void ebbrt::IxgbeDriver::Config(std::string s, uint32_t v) { + uint32_t i = static_cast(Cpu::GetMine()); + if(s == "rx_usecs") { + ebbrt::kprintf_force("rx-usecs = %u\n", v); + WriteEitr(i, (v << 3) | IXGBE_EITR_CNT_WDIS); + + } else if(s == "start_perf") { + ebb_->StartTimer(); + + } else if(s == "stop_perf") { + ebb_->StopTimer(); + + } else if(s == "print") { + ebbrt::kprintf_force("num_recvs=%u totalt(us) = %u\n", ixgmq[i]->stat_num_recv, ixgmq[i]->ttotalt); + + } else if(s == "clear") { + ixgmq[i]->stat_num_recv = 0; + ixgmq[i]->time_us = 0; + ixgmq[i]->ttotalt = 0; + + } + else { + + ebbrt::kprintf_force("%s Unknown command: %s\n", __PRETTY_FUNCTION__, s); + } } void ebbrt::IxgbeDriver::Send(std::unique_ptr buf, PacketInfo pinfo) { -#ifdef STATS_EN +/*#ifdef STATS_EN if(pinfo.get_stats) { DumpStats(); } -#endif + #endif*/ ebb_->Send(std::move(buf), std::move(pinfo)); } -void ebbrt::IxgbeDriver::Run() { ebb_->Run(); } +//void ebbrt::IxgbeDriver::Run() { ebb_->StartTimer(); } // After packet transmission, need to mark bit in // tx queue so that it can be used again @@ -237,9 +361,9 @@ void ebbrt::IxgbeDriverRep::AddTx(uint64_t pa, uint64_t len, ixgmq_.tx_last_tail_ = ixgmq_.tx_tail_; ixgmq_.tx_tail_ = (tail + 1) % ixgmq_.tx_size_; -#ifdef STATS_EN +/*#ifdef STATS_EN ixgmq_.stat_num_tx ++; -#endif + #endif*/ } void ebbrt::IxgbeDriverRep::Send(std::unique_ptr buf, PacketInfo pinfo) { @@ -248,14 +372,102 @@ void ebbrt::IxgbeDriverRep::Send(std::unique_ptr buf, PacketInfo pinfo) { tdesc_advance_tx_rf_t* arfx; tdesc_advance_ctxt_wb_t* actx; uint32_t mcore = static_cast(Cpu::GetMine()); + //uint32_t free_desc = 0; //int i; - + // On TSO, the maximum PAYLEN can be up to 2^18 - 1 len = buf->ComputeChainDataLength(); if (len > 262144) { ebbrt::kprintf_force("\t kabort Send() len=%u greater than TSO limit of 262144 bytes\n", len); return; } + +/*#ifdef STATS_EN + // counter initialization, only need to do once + if(ixgmq_.stat_init == true && ixgmq_.stat_start == false) { + ixgmq_.perfCycles = ebbrt::perf::PerfCounter(ebbrt::perf::PerfEvent::cycles); + ixgmq_.perfInst = ebbrt::perf::PerfCounter(ebbrt::perf::PerfEvent::instructions); + ixgmq_.perfLLC_miss = ebbrt::perf::PerfCounter(ebbrt::perf::PerfEvent::llc_misses); + ixgmq_.perfCycles.Start(); + ixgmq_.perfInst.Start(); + ixgmq_.perfLLC_miss.Start(); + + if(mcore == 0) { + ixgmq_.powerMeter = ebbrt::rapl::RaplCounter(); + ixgmq_.powerMeter.Start(); + auto d = ebbrt::clock::Wall::Now().time_since_epoch(); + ixgmq_.time_us = std::chrono::duration_cast(d).count(); + } + + ixgmq_.stat_start = true; + + // every 10000 sends + } else if (ixgmq_.stat_init == true && ixgmq_.stat_start == true && ixgmq_.stat_num_send % 10000 == 0) { + //stop counters + ixgmq_.perfCycles.Stop(); + ixgmq_.perfInst.Stop(); + ixgmq_.perfLLC_miss.Stop(); + ixgmq_.powerMeter.Stop(); + auto d = ebbrt::clock::Wall::Now().time_since_epoch(); + uint64_t endt = std::chrono::duration_cast(d).count(); + + // accumulate counters + ixgmq_.totalCycles += static_cast(ixgmq_.perfCycles.Read()); + ixgmq_.totalIns += static_cast(ixgmq_.perfInst.Read()); + ixgmq_.totalLLCmisses += static_cast(ixgmq_.perfLLC_miss.Read()); + ixgmq_.totalTime += ((double)(endt - (ixgmq_.time_us)) / 1000000.0); + ixgmq_.totalNrg += ixgmq_.powerMeter.Read(); + + // clear + ixgmq_.perfCycles.Clear(); + ixgmq_.perfInst.Clear(); + ixgmq_.perfLLC_miss.Clear(); + + // restart again + ixgmq_.perfCycles.Start(); + ixgmq_.perfInst.Start(); + ixgmq_.perfLLC_miss.Start(); + ixgmq_.powerMeter.Start(); + auto dd = ebbrt::clock::Wall::Now().time_since_epoch(); + ixgmq_.time_us = std::chrono::duration_cast(dd).count(); + } + + ixgmq_.stat_num_send ++; + ixgmq_.stat_num_tx_bytes += len; + #endif*/ + + /*if(ixgmq_.tx_tail_ > ixgmq_.tx_head_) { + free_desc = IxgbeDriver::NTXDESCS - (ixgmq_.tx_tail_ - ixgmq_.tx_head_); + } else if(ixgmq_.tx_tail_ < ixgmq_.tx_head_){ + free_desc = IxgbeDriver::NTXDESCS - ((ixgmq_.tx_tail_+IxgbeDriver::NTXDESCS) - ixgmq_.tx_head_); + } else { + free_desc = IxgbeDriver::NTXDESCS; + } + + // 40 descriptors is theoretical limit of how many descriptors can be used at once + if(free_desc < 60) { + + // from first sent descriptor + for (auto rit = ixgmq_.send_to_watch.begin(); rit != ixgmq_.send_to_watch.end(); ++rit) { + arfx = reinterpret_cast(&(ixgmq_.tx_ring_[*rit])); + + // Force memory writes to complete before letting h/w know there + // are new descriptors to fetch. (Only applicable for weak-ordered + // memory model archs, such as IA-64). + asm volatile("sfence" ::: "memory"); + + // wait until its sent + while(arfx->dd == 0) { + // makes sure all reads are finished before checking again + asm volatile("lfence":::"memory"); + } + + // increment head ptr + ixgmq_.tx_head_ = (*rit + 1) % ixgmq_.tx_size_; + //ebbrt::kprintf("\t core=%u Reclaimed *rit=%u head=%u\n", mcore, *rit, ixgmq_.tx_head_); + } + ixgmq_.send_to_watch.clear(); + }*/ if(buf->IsChained()) { b = MakeUniqueIOBuf(len); @@ -296,9 +508,10 @@ void ebbrt::IxgbeDriverRep::Send(std::unique_ptr buf, PacketInfo pinfo) { arfx->dext = 1; //ebbrt::kprintf("Send mcore=%u tail=%u tx_adv_rd_desc = 0x%llX 0x%X 0x%X\n", mcore, ixgmq_.tx_tail_, arfx->raw[0], (uint32_t)(arfx->raw[1] & 0xFFFFFFFF), (uint32_t)((arfx->raw[1] >> 32) & 0xFFFFFFFF)); + //ixgmq_.tx_last_tail_ = ixgmq_.tx_tail_; + //ixgmq_.send_to_watch.emplace_back(ixgmq_.tx_tail_); ixgmq_.tx_tail_ = (ixgmq_.tx_tail_ + 1) % ixgmq_.tx_size_; - } - else { + } else { if(len > IXGBE_MAX_DATA_PER_TXD) { actx = reinterpret_cast(&(ixgmq_.tx_ring_[ixgmq_.tx_tail_])); actx->raw_1 = 0x0; @@ -326,7 +539,6 @@ void ebbrt::IxgbeDriverRep::Send(std::unique_ptr buf, PacketInfo pinfo) { actx->l4len = pinfo.tcp_hdr_len; //ebbrt::kprintf("Send mcore=%u tail=%u tx_adv_ctxt_desc = 0x%llX 0x%X 0x%X\n", mcore, ixgmq_.tx_tail_, actx->raw_1, (uint32_t)(actx->raw_2 & 0xFFFFFFFF), (uint32_t)((actx->raw_2 >> 32) & 0xFFFFFFFF)); ixgmq_.tx_tail_ = (ixgmq_.tx_tail_ + 1) % ixgmq_.tx_size_; - //first descriptor arfx = reinterpret_cast(&(ixgmq_.tx_ring_[ixgmq_.tx_tail_])); @@ -345,6 +557,7 @@ void ebbrt::IxgbeDriverRep::Send(std::unique_ptr buf, PacketInfo pinfo) { // In Tcp Segmentation Mode (TSE), PAYLEN defines the TCP/UDP payload length, so no header length arfx->paylen = pinfo.tcp_len; //ebbrt::kprintf("Send() first descriptor mcore=%u tail=%u dtalen=%u paylen=%u tx_adv_rd_desc = 0x%llX 0x%X 0x%X\n", mcore, ixgmq_.tx_tail_, IXGBE_MAX_DATA_PER_TXD, pinfo.tcp_len, arfx->raw[0], (uint32_t)(arfx->raw[1] & 0xFFFFFFFF), (uint32_t)((arfx->raw[1] >> 32) & 0xFFFFFFFF)); + //ixgmq_.tx_last_tail_ = ixgmq_.tx_tail_; ixgmq_.tx_tail_ = (ixgmq_.tx_tail_ + 1) % ixgmq_.tx_size_; tsodata = data; @@ -366,6 +579,7 @@ void ebbrt::IxgbeDriverRep::Send(std::unique_ptr buf, PacketInfo pinfo) { if(tsolen > IXGBE_MAX_DATA_PER_TXD) { arfx->dtalen = IXGBE_MAX_DATA_PER_TXD; //ebbrt::kprintf("Send() middle descriptor(s) mcore=%u tail=%u dtalen=%u tx_adv_rd_desc = 0x%llX 0x%X 0x%X\n", mcore, ixgmq_.tx_tail_, IXGBE_MAX_DATA_PER_TXD, arfx->raw[0], (uint32_t)(arfx->raw[1] & 0xFFFFFFFF), (uint32_t)((arfx->raw[1] >> 32) & 0xFFFFFFFF)); + //ixgmq_.tx_last_tail_ = ixgmq_.tx_tail_; ixgmq_.tx_tail_ = (ixgmq_.tx_tail_ + 1) % ixgmq_.tx_size_; } else { // last descriptor @@ -374,10 +588,11 @@ void ebbrt::IxgbeDriverRep::Send(std::unique_ptr buf, PacketInfo pinfo) { arfx->rs = 1; //ebbrt::kprintf("Send() last descriptor mcore=%u tail=%u dtalen=%u tx_adv_rd_desc = 0x%llX 0x%X 0x%X\n", mcore, ixgmq_.tx_tail_, tsolen, arfx->raw[0], (uint32_t)(arfx->raw[1] & 0xFFFFFFFF), (uint32_t)((arfx->raw[1] >> 32) & 0xFFFFFFFF)); + //ixgmq_.tx_last_tail_ = ixgmq_.tx_tail_; + //ixgmq_.send_to_watch.emplace_back(ixgmq_.tx_tail_); ixgmq_.tx_tail_ = (ixgmq_.tx_tail_ + 1) % ixgmq_.tx_size_; } - } - + } } else if(len > 1490 && len < IXGBE_MAX_DATA_PER_TXD) { actx = reinterpret_cast(&(ixgmq_.tx_ring_[ixgmq_.tx_tail_])); @@ -427,6 +642,8 @@ void ebbrt::IxgbeDriverRep::Send(std::unique_ptr buf, PacketInfo pinfo) { // In Tcp Segmentation Mode (TSE), PAYLEN defines the TCP/UDP payload size arfx->paylen = pinfo.tcp_len; //ebbrt::kprintf("Send mcore=%u tail=%u dtalen=%u paylen=%u tx_adv_rd_desc = 0x%llX 0x%X 0x%X\n", mcore, ixgmq_.tx_tail_, len, pinfo.tcp_len, arfx->raw[0], (uint32_t)(arfx->raw[1] & 0xFFFFFFFF), (uint32_t)((arfx->raw[1] >> 32) & 0xFFFFFFFF)); + //ixgmq_.tx_last_tail_ = ixgmq_.tx_tail_; + //ixgmq_.send_to_watch.emplace_back(ixgmq_.tx_tail_); ixgmq_.tx_tail_ = (ixgmq_.tx_tail_ + 1) % ixgmq_.tx_size_; } else { actx = reinterpret_cast(&(ixgmq_.tx_ring_[ixgmq_.tx_tail_])); @@ -468,10 +685,9 @@ void ebbrt::IxgbeDriverRep::Send(std::unique_ptr buf, PacketInfo pinfo) { arfx->dtyp = 0x3; arfx->eop = 1; - arfx->ifcs = 1; - arfx->rs = 1; - + arfx->ifcs = 1; + arfx->dext = 1; arfx->tse = 0; @@ -482,101 +698,27 @@ void ebbrt::IxgbeDriverRep::Send(std::unique_ptr buf, PacketInfo pinfo) { arfx->txsm = 1; //} //ebbrt::kprintf("Send mcore=%u tail=%u tx_adv_rd_desc = 0x%llX 0x%X 0x%X\n", mcore, ixgmq_.tx_tail_, arfx->raw[0], (uint32_t)(arfx->raw[1] & 0xFFFFFFFF), (uint32_t)((arfx->raw[1] >> 32) & 0xFFFFFFFF)); + //ixgmq_.tx_last_tail_ = ixgmq_.tx_tail_; + //ixgmq_.send_to_watch.emplace_back(ixgmq_.tx_tail_); ixgmq_.tx_tail_ = (ixgmq_.tx_tail_ + 1) % ixgmq_.tx_size_; } } - /*if(len > 1448) { - //dumpPacketContents(reinterpret_cast(data), len); - //ebbrt::kabort("kabort Send()\n"); - actx = reinterpret_cast(&(ixgmq_.tx_ring_[ixgmq_.tx_tail_])); - actx->raw_1 = 0x0; - actx->raw_2 = 0x0; - actx->iplen = IPHDR_LEN; - actx->maclen = ETHHDR_LEN; - // ip packet type = ipv4: 01 - actx->ipv4 = 1; - // l4type = tcp: 01 - actx->l4t = 1; - // for context descriptor 0b0010 - actx->dtyp = 0x2; - // descriptor extension, one for advanced mode - actx->dext = 1; - // from Linux - actx->mss = 1448; - // TCP header length, with no tcp options == 20 - actx->l4len = pinfo.tcp_hdr_len; - - ebbrt::kprintf("Send mcore=%u tail=%u tx_adv_ctxt_desc = 0x%llX 0x%X 0x%X\n", mcore, ixgmq_.tx_tail_, actx->raw_1, (uint32_t)(actx->raw_2 & 0xFFFFFFFF), (uint32_t)((actx->raw_2 >> 32) & 0xFFFFFFFF)); - - ixgmq_.tx_tail_ = (ixgmq_.tx_tail_ + 1) % ixgmq_.tx_size_; - - arfx = reinterpret_cast(&(ixgmq_.tx_ring_[ixgmq_.tx_tail_])); - arfx->raw[0] = 0x0; - arfx->raw[1] = 0x0; - arfx->address = data; - - // Holds length in bytes of data buffer at the address pointed to by this specific descriptor. - // Max length is 15.5 KB - arfx->dtalen = len; - arfx->dtyp = 0x3; - arfx->eop = 1; - arfx->rs = 1; - arfx->ifcs = 1; - arfx->dext = 1; - arfx->tse = 1; - - arfx->ixsm = 1; - arfx->txsm = 1; - // In Tcp Segmentation Mode (TSE), PAYLEN defines the TCP/UDP payload size - arfx->paylen = pinfo.tcp_len; - - ebbrt::kprintf("Send mcore=%u tail=%u tx_adv_rd_desc = 0x%llX 0x%X 0x%X\n", mcore, ixgmq_.tx_tail_, arfx->raw[0], (uint32_t)(arfx->raw[1] & 0xFFFFFFFF), (uint32_t)((arfx->raw[1] >> 32) & 0xFFFFFFFF)); - ixgmq_.tx_tail_ = (ixgmq_.tx_tail_ + 1) % ixgmq_.tx_size_; - //ebbrt::kabort("Exiting\n"); - - } else { - arfx = reinterpret_cast(&(ixgmq_.tx_ring_[ixgmq_.tx_tail_])); - arfx->raw[0] = 0x0; - arfx->raw[1] = 0x0; - - arfx->address = data; - - // Holds length in bytes of data buffer at the address pointed to by this specific descriptor. - // Max length is 15.5 KB - arfx->dtalen = len; - - // In a single-send packet, PAYLEN defines the entire packet size fetched from host memory. - arfx->paylen = len; - - // crc checksum - arfx->ifcs = 1; - - // rs bit should only be set when eop is set - arfx->eop = 1; - arfx->rs = 1; - - // type is advanced - arfx->dtyp = 0x3; - arfx->dext = 1; - - ebbrt::kprintf("Send mcore=%u tail=%u tcp_hdr_len=%u tcp_len=%u tx_adv_rd_desc = 0x%llX 0x%X 0x%X\n", mcore, ixgmq_.tx_tail_, pinfo.tcp_hdr_len, pinfo.tcp_len, arfx->raw[0], (uint32_t)(arfx->raw[1] & 0xFFFFFFFF), (uint32_t)((arfx->raw[1] >> 32) & 0xFFFFFFFF)); - ixgmq_.tx_tail_ = (ixgmq_.tx_tail_ + 1) % ixgmq_.tx_size_; - }*/ + //ebbrt::kprintf("\t Send() core=%u head=%u tail=%u free_desc=%u\n", mcore, ixgmq_.tx_head_, ixgmq_.tx_tail_, free_desc); - // Force memory writes to complete before letting h/w know there - // are new descriptors to fetch. (Only applicable for weak-ordered - // memory model archs, such as IA-64). asm volatile("sfence" ::: "memory"); + //ebbrt::kprintf("\t Send() core=%u head=%u last_tail=%u tail=%u free_desc=%u\n", mcore, ixgmq_.tx_head_, ixgmq_.tx_last_tail_, ixgmq_.tx_tail_, free_desc); WriteTdt_1(mcore, ixgmq_.tx_tail_); - // keep looping until processed while(arfx->dd == 0) { // makes sure all reads are finished before checking again asm volatile("lfence":::"memory"); } + auto d = ebbrt::clock::Wall::Now().time_since_epoch(); + ixgmq_.time_us = std::chrono::duration_cast(d).count(); + //rtdh = ReadTdh_1(mcore); //rtdt = ReadTdt_1(mcore); //ebbrt::kprintf("\t Send() core=%u After len=%d rtdh=%u %rtdt=%u tail=%u\n\n", mcore, len, rtdh, rtdt, ixgmq_.tx_tail_); @@ -2012,21 +2154,18 @@ void ebbrt::IxgbeDriver::Init() { // Fill in RSS redirection table (128 entries), sets which core the lowest 7 bits of hashed output goes to // hacky atm for (auto i = 0; i < 32; i += 4) { - if(ncore > 0) { - //WriteReta(i, 0x0000000); - //WriteReta(i+1, 0x0000000); - //WriteReta(i+2, 0x0000000); - //WriteReta(i+3, 0x0000000); + /*if(ncore > 0) { + WriteReta(i, 0x0000000); + WriteReta(i+1, 0x0000000); + WriteReta(i+2, 0x0000000); + WriteReta(i+3, 0x0000000); WriteReta(i, 0x03020100); WriteReta(i+1, 0x07060504); WriteReta(i+2, 0x0B0A0908); WriteReta(i+3, 0x0F0E0D0C); - } - /*if(ncore > 0) { - WriteReta(i, 0x01010101); }*/ // all route to core 0 - /*if(ncore == 1) { + if(ncore == 1) { WriteReta(i, 0x0000000); WriteReta(i+1, 0x0000000); WriteReta(i+2, 0x0000000); @@ -2046,49 +2185,15 @@ void ebbrt::IxgbeDriver::Init() { WriteReta(i+1, 0x7060504); WriteReta(i+2, 0x3020100); WriteReta(i+3, 0x7060504); + } else if(ncore == 16){ + WriteReta(i, 0x03020100); + WriteReta(i+1, 0x07060504); + WriteReta(i+2, 0x0B0A0908); + WriteReta(i+3, 0x0F0E0D0C); } else { - WriteReta(i, 0x3020100); //8 - WriteReta(i+1, 0x7060504); //8 - WriteReta(i+2, 0xB0A0908); // 8 - //WriteReta(i+3, 0x3020100); - //WriteReta(i+2, 0xB0A0908); - //WriteReta(i+3, 0xF0E0D0C); - }*/ + ebbrt::kabort("%s: Can only redirect interrupts to 16 cores\n", __FUNCTION__); + } } - - //temp -/* WriteReta(3, 0x3020100); - WriteReta(7, 0x7060504); - WriteReta(11, 0xB0A0908); - WriteReta(15, 0x3020100); - WriteReta(19, 0x7060504); - WriteReta(23, 0xB0A0908); - WriteReta(27, 0x3020100); - WriteReta(31, 0x7060504); - WriteReta(35, 0xB0A0908); - WriteReta(39, 0x3020100); - WriteReta(43, 0x7060504); - WriteReta(47, 0xB0A0908); - WriteReta(51, 0x3020100); - WriteReta(55, 0x7060504); - WriteReta(59, 0xB0A0908); - WriteReta(63, 0x3020100); - WriteReta(67, 0x7060504); - WriteReta(71, 0xB0A0908); - WriteReta(75, 0x3020100); - WriteReta(79, 0x7060504); - WriteReta(83, 0xB0A0908); - WriteReta(87, 0x3020100); - WriteReta(91, 0x7060504); - WriteReta(95, 0xB0A0908); - WriteReta(99, 0x3020100); - WriteReta(103, 0x7060504); - WriteReta(107, 0xB0A0908); - WriteReta(111, 0x3020100); - WriteReta(115, 0x7060504); - WriteReta(119, 0xB0A0908); - WriteReta(123, 0x3020100); - WriteReta(127, 0x3080400);*/ for (auto i = 0; i < 128; i++) { WriteFtqf(i, 0x0); @@ -2380,7 +2485,7 @@ void ebbrt::IxgbeDriver::SetupMultiQueue(uint32_t i) { WriteTdt(i, 0x0); ixgmq[i]->tx_tail_=0; - + // TODO: set up dca txctrl FreeBSD? // clear TXdescWBROen //WriteDcaTxctrlTxdescWbro(i, ~(0x1 << 11)); @@ -2414,9 +2519,9 @@ uint32_t ebbrt::IxgbeDriverRep::GetRxBuf(uint32_t* len, uint64_t* bAddr, rdesc_adv_wb_t* tmp; tmp = reinterpret_cast(&(ixgmq_.rx_ring_[ixgmq_.rx_head_])); - // if rx packet not ready + // if no rx packets ready if (!(tmp->dd)) { - return 1; + return 0; } auto rsccnt = tmp->rsccnt; @@ -2458,8 +2563,8 @@ uint32_t ebbrt::IxgbeDriverRep::GetRxBuf(uint32_t* len, uint64_t* bAddr, // bump head ptr ixgmq_.rx_head_ = (ixgmq_.rx_head_ + 1) % ixgmq_.rx_size_; - - return 0; + + return 1; } // not sure what case this is, no context started, eop is set but rsccnt > 0 else if (rsccnt > 0 && tmp->eop && !(ixgmq_.rsc_used)) { @@ -2492,7 +2597,7 @@ uint32_t ebbrt::IxgbeDriverRep::GetRxBuf(uint32_t* len, uint64_t* bAddr, // bump head ptr ixgmq_.rx_head_ = (ixgmq_.rx_head_ + 1) % ixgmq_.rx_size_; - return 0; + return 1; } // START NEW RSC CONTEXT else if (rsccnt > 0 && !(tmp->eop) && !(ixgmq_.rsc_used)) { @@ -2550,7 +2655,7 @@ uint32_t ebbrt::IxgbeDriverRep::GetRxBuf(uint32_t* len, uint64_t* bAddr, *process_rsc = true; - return 0; + return 1; } else { // shouldn't hit here ebbrt::kabort("%s Not sure what state\n", __FUNCTION__); @@ -2594,7 +2699,7 @@ uint32_t ebbrt::IxgbeDriverRep::GetRxBuf(uint32_t* len, uint64_t* bAddr, } #endif*/ - return 1; + return 0; } void ebbrt::IxgbeDriverRep::ReceivePoll() { @@ -2606,16 +2711,30 @@ void ebbrt::IxgbeDriverRep::ReceivePoll() { uint32_t rnt; uint32_t rxhead; process_rsc = false; -#ifdef STATS_EN - ixgmq_.stat_num_itr ++; -#endif rxflag = 0; count = 0; rnt = 0; uint32_t mcore = static_cast(Cpu::GetMine()); +#ifdef STATS_EN + ixgmq_.stat_num_recv ++; +#endif - // get address of buffer with data - while (GetRxBuf(&len, &bAddr, &rxflag, &process_rsc, &rnt, &rxhead) == 0) { + if(ixgmq_.time_us == 0) { + //auto d = ebbrt::clock::Wall::Now().time_since_epoch(); + //ixgmq_.time_us = std::chrono::duration_cast(d).count(); + } else { + auto d = ebbrt::clock::Wall::Now().time_since_epoch(); + uint64_t endt = std::chrono::duration_cast(d).count(); + ixgmq_.ttotalt += (endt - ixgmq_.time_us); + + //ebbrt::kprintf("Core %u: time elapsed (us): %llu\n", mcore, endt - ixgmq_.time_us); + + //auto dd = ebbrt::clock::Wall::Now().time_since_epoch(); + //ixgmq_.time_us = std::chrono::duration_cast(dd).count(); + } + + // while there are still packets received + while (GetRxBuf(&len, &bAddr, &rxflag, &process_rsc, &rnt, &rxhead) == 1) { // hit last rsc context, start to process all buffers if (process_rsc) { process_rsc = false; @@ -2650,15 +2769,20 @@ void ebbrt::IxgbeDriverRep::ReceivePoll() { }*/ root_.itf_.Receive(std::move(b), rxflag); } else { - //count ++; + count ++; //ebbrt::kprintf("Core: %d ReceivePoll() len=%d rxhead=%d\n", mcore, len, rxhead); + +#ifdef STATS_EN + ixgmq_.stat_num_rx_bytes += len; +#endif ixgmq_.circ_buffer_[rxhead]->SetLength(len); auto b = std::move(ixgmq_.circ_buffer_[rxhead]); // bump tail ptr ixgmq_.rx_tail_ = (ixgmq_.rx_tail_ + 1) % ixgmq_.rx_size_; + /*if (len > 60) { ebbrt::kprintf("Core: %d ReceivePoll() len=%d rxhead=%d START\n", mcore, len, rxhead); @@ -2690,13 +2814,8 @@ void ebbrt::IxgbeDriverRep::ReceivePoll() { reinterpret_cast((ixgmq_.circ_buffer_[rxhead])->MutData()); // update buffer with new adder ixgmq_.rx_ring_[rxhead].buffer_address = rxphys; - - // TODO: Update tail register here or above? - //if (count > 0) { - // update reg WriteRdt_1(mcore, ixgmq_.rx_tail_); - //} - + /*// done with buffer addr above, now to reuse it auto tail = ixgmq_.rx_tail_; @@ -2741,6 +2860,12 @@ void ebbrt::IxgbeDriverRep::ReceivePoll() { }*/ } } + + // TODO: Update tail register here or above? +// if (count > 0) { + // update reg + // WriteRdt_1(mcore, ixgmq_.rx_tail_); + //} } ebbrt::IxgbeDriverRep::IxgbeDriverRep(const IxgbeDriver& root) @@ -2748,6 +2873,23 @@ ebbrt::IxgbeDriverRep::IxgbeDriverRep(const IxgbeDriver& root) ixgmq_(root.GetMultiQueue(Cpu::GetMine())), receive_callback_([this]() { ReceivePoll(); }) { //this->ReceivePoll(); + /*auto timeout = + std::chrono::seconds(1); + timer->Start(*this, timeout,true);*/ +} + +void ebbrt::IxgbeDriverRep::IxgbeDriverRep::StartTimer() { + auto timeout = std::chrono::seconds(1); + timer->Start(*this, timeout, true); +} + +void ebbrt::IxgbeDriverRep::IxgbeDriverRep::StopTimer() { + timer->Stop(*this); +} + +void ebbrt::IxgbeDriverRep::IxgbeDriverRep::Fire() { + uint32_t mcore = static_cast(Cpu::GetMine()); + ebbrt::kprintf_force("Core %u: Fire()\n", mcore); } uint16_t ebbrt::IxgbeDriverRep::ReadRdh_1(uint32_t n) { diff --git a/src/native/IxgbeDriver.h b/src/native/IxgbeDriver.h index 468b9f79..6bcfcd97 100644 --- a/src/native/IxgbeDriver.h +++ b/src/native/IxgbeDriver.h @@ -19,6 +19,7 @@ #include "Pfn.h" #include "SlabAllocator.h" #include "Perf.h" +#include "Rapl.h" // Receive Side Scaling (RSC) enabled //#define RSC_EN @@ -28,11 +29,12 @@ //#define TX_HEAD_WB // Collect Statistics Flag -//#define STATS_EN +#define STATS_EN //#define MAX_DESC -namespace ebbrt { +namespace ebbrt { + // Per-core receive and transmit queue typedef struct { rdesc_legacy_t* rx_ring; @@ -67,7 +69,7 @@ class IxgbeDriver : public EthernetDevice { // each core gets a queue struct ixgmq.resize(Cpu::Count()); } - + static void Create(pci::Device& dev); static bool Probe(pci::Device& dev) { if (dev.GetVendorId() == kIxgbeVendorId && @@ -77,12 +79,14 @@ class IxgbeDriver : public EthernetDevice { } return false; } - - void Run(); + + //void Run(); void Send(std::unique_ptr buf, PacketInfo pinfo) override; + void Config(std::string s, uint32_t v) override; const EthernetAddress& GetMacAddress() override; protected: + static const constexpr uint16_t kIxgbeVendorId = 0x8086; static const constexpr uint16_t kIxgbeDeviceId = 0x10FB; @@ -101,10 +105,8 @@ class IxgbeDriver : public EthernetDevice { static const constexpr uint32_t NTXDESCS = 8192; static const constexpr uint32_t NRXDESCS = 8192; #else - //static const constexpr uint32_t NTXDESCS = 512; - //static const constexpr uint32_t NRXDESCS = 512; - static const constexpr uint32_t NTXDESCS = 128; - static const constexpr uint32_t NRXDESCS = 128; + static const constexpr uint32_t NTXDESCS = 64; + static const constexpr uint32_t NRXDESCS = 64; #endif // Linux Defaults @@ -114,7 +116,7 @@ class IxgbeDriver : public EthernetDevice { //static const constexpr uint32_t RXBUFSZ = 4096; //static const constexpr uint32_t RXBUFSZ = 16384; - static const constexpr uint8_t ITR_INTERVAL = 6; + static const constexpr uint8_t ITR_INTERVAL = 200; // 3 bits only (0 - 7) in (RSC_DELAY + 1) * 4 us static const constexpr uint8_t RSC_DELAY = 1; @@ -142,6 +144,11 @@ class IxgbeDriver : public EthernetDevice { // TODO: should be optimized rsc_chain_.reserve(NRXDESCS+1); + // keeps a log of descriptors where eop == 1 + // used to coalesce reclaiming of tx descriptors + // once the threshold of some limit is hit + send_to_watch.reserve(NRXDESCS); + // RX ring buffer allocation auto sz = align::Up(sizeof(rdesc_legacy_t) * NRXDESCS, 4096); auto order = Fls(sz - 1) - pmem::kPageShift + 1; @@ -198,7 +205,7 @@ class IxgbeDriver : public EthernetDevice { ebbrt::kbugon((tx_size_bytes_ & 0x7F) != 0, "tx_size_bytes_ not 128 byte aligned\n"); } - + size_t rx_head_; size_t rx_tail_; size_t rx_size_; @@ -215,7 +222,8 @@ class IxgbeDriver : public EthernetDevice { std::vector> circ_buffer_; std::vector> rsc_chain_; - + std::vector send_to_watch; + rdesc_legacy_t* rx_ring_; tdesc_legacy_t* tx_ring_; bool* tx_isctx_; @@ -228,18 +236,28 @@ class IxgbeDriver : public EthernetDevice { #endif // stats - uint64_t stat_num_itr{0}; + uint64_t stat_num_recv{0}; uint64_t stat_num_send{0}; - uint64_t stat_num_rx{0}; - uint64_t stat_num_tx{0}; - - bool stat_perf{false}; + uint64_t stat_num_rx_bytes{0}; + uint64_t stat_num_tx_bytes{0}; + uint64_t time_us{0}; + uint64_t ttotalt{0}; + uint64_t totalCycles{0}; + uint64_t totalIns{0}; + uint64_t totalLLCmisses{0}; + double totalNrg{0.0}; + double totalTime{0.0}; + + bool stat_start{false}; + bool stat_init{false}; ebbrt::perf::PerfCounter perfCycles; ebbrt::perf::PerfCounter perfInst; ebbrt::perf::PerfCounter perfLLC_ref; ebbrt::perf::PerfCounter perfLLC_miss; ebbrt::perf::PerfCounter perfTLB_store_miss; ebbrt::perf::PerfCounter perfTLB_load_miss; + ebbrt::rapl::RaplCounter powerMeter; + }; private: @@ -483,7 +501,7 @@ class IxgbeDriver : public EthernetDevice { friend class IxgbeDriverRep; }; // class IxgbeDriver -class IxgbeDriverRep : public MulticoreEbb { +class IxgbeDriverRep : public MulticoreEbb, Timer::Hook { public: explicit IxgbeDriverRep(const IxgbeDriver& root); void Run(); @@ -495,7 +513,9 @@ class IxgbeDriverRep : public MulticoreEbb { enum l4_type l4type); void AddTx(uint64_t pa, uint64_t len, uint64_t totallen, bool first, bool last, uint8_t ctx, bool ip_cksum, bool tcpudp_cksum, bool tse, int hdr_len); - + void StartTimer(); + void StopTimer(); + private: uint16_t ReadRdh_1(uint32_t n); uint16_t ReadRdt_1(uint32_t n); @@ -509,7 +529,8 @@ class IxgbeDriverRep : public MulticoreEbb { uint32_t ReadTdt_1(uint32_t n); uint32_t GetRxBuf(uint32_t* len, uint64_t* bAddr, uint64_t* rxflag, bool* process_rsc, uint32_t* rnt, uint32_t* rxhead); - + void Fire() override; + const IxgbeDriver& root_; e10k_queue_t& ixgq_; IxgbeDriver::e10Kq& ixgmq_; diff --git a/src/native/Msr.h b/src/native/Msr.h index 9adc0699..a5358f4a 100644 --- a/src/native/Msr.h +++ b/src/native/Msr.h @@ -29,7 +29,7 @@ inline uint64_t Read(uint32_t index) { inline void Write(uint32_t index, uint64_t data) { uint32_t low = data; - uint32_t high = data >> 32; + uint32_t high = (data >> 32) & 0xFFFFFFFF; #ifdef __EBBRT_ENABLE_BAREMETAL_NIC__ // TODO - correct fix is here? diff --git a/src/native/Net.cc b/src/native/Net.cc index f65bfe47..c3f63e4a 100644 --- a/src/native/Net.cc +++ b/src/native/Net.cc @@ -59,3 +59,11 @@ void ebbrt::NetworkManager::Interface::Send(std::unique_ptr b, PacketInfo pinfo) { ether_dev_.Send(std::move(b), std::move(pinfo)); } + +void ebbrt::NetworkManager::Config(std::string s, uint32_t v) { + interface_->Config(std::move(s), v); +} + +void ebbrt::NetworkManager::Interface::Config(std::string s, uint32_t v) { + ether_dev_.Config(std::move(s), v); +} diff --git a/src/native/Net.h b/src/native/Net.h index 65ad3595..4125f2f2 100644 --- a/src/native/Net.h +++ b/src/native/Net.h @@ -55,6 +55,7 @@ class EthernetDevice { public: virtual void Send(std::unique_ptr buf, PacketInfo pinfo = PacketInfo()) = 0; + virtual void Config(std::string s, uint32_t v) = 0; virtual const EthernetAddress& GetMacAddress() = 0; virtual ~EthernetDevice() {} }; @@ -246,6 +247,8 @@ class NetworkManager : public StaticSharedEbb { std::unique_ptr buf); void SendIp(std::unique_ptr buf, Ipv4Address src, Ipv4Address dst, uint8_t proto, PacketInfo pinfo = PacketInfo()); + void Config(std::string s, uint32_t v); + const EthernetAddress& MacAddress(); const ItfAddress* Address() const { return address_.get(); } void SetAddress(std::unique_ptr address) { @@ -256,7 +259,7 @@ class NetworkManager : public StaticSharedEbb { private: struct DhcpPcb : public CacheAligned, public Timer::Hook { void Fire() override; - + UdpPcb udp_pcb; DhcpMessage last_offer; enum State { kInactive, kSelecting, kRequesting, kBound } state; @@ -311,9 +314,10 @@ class NetworkManager : public StaticSharedEbb { Interface& NewInterface(EthernetDevice& ether_dev); Ipv4Address IpAddress(); - + void Config(std::string s, uint32_t v); + private: - Future StartDhcp(); + Future StartDhcp(); void SendIp(std::unique_ptr buf, Ipv4Address src, Ipv4Address dst, uint8_t proto, PacketInfo = PacketInfo()); void TcpReset(bool ack, uint32_t seqno, uint32_t ackno, diff --git a/src/native/NetIcmp.cc b/src/native/NetIcmp.cc index a03c616e..623e7684 100644 --- a/src/native/NetIcmp.cc +++ b/src/native/NetIcmp.cc @@ -12,7 +12,7 @@ void ebbrt::NetworkManager::Interface::ReceiveIcmp( EthernetHeader& eth_header, Ipv4Header& ip_header, std::unique_ptr buf) { auto packet_len = buf->ComputeChainDataLength(); - ebbrt::kprintf("ReceiveIcmp() packet_len=%u\n", packet_len); + //ebbrt::kprintf("ReceiveIcmp() packet_len=%u\n", packet_len); if (unlikely(packet_len < sizeof(IcmpHeader))) return; @@ -20,7 +20,7 @@ void ebbrt::NetworkManager::Interface::ReceiveIcmp( auto dp = buf->GetMutDataPointer(); auto& icmp_header = dp.Get(); - ebbrt::kprintf("ReceiveIcmp() packet_len=%u\n", packet_len); + //ebbrt::kprintf("ReceiveIcmp() packet_len=%u\n", packet_len); #ifndef __EBBRT_ENABLE_BAREMETAL_NIC__ // software checksum if (IpCsum(*buf)) @@ -51,7 +51,7 @@ void ebbrt::NetworkManager::Interface::ReceiveIcmp( PacketInfo pinfo; pinfo.flags = 0; // hijacking ping to dump ixgbe statistics - pinfo.get_stats = false; + pinfo.get_stats = true; //#ifdef __EBBRT_ENABLE_BAREMETAL_NIC__ // hardware ip checksum offload // pinfo.flags |= PacketInfo::kNeedsIpCsum; diff --git a/src/native/Rapl.cc b/src/native/Rapl.cc new file mode 100644 index 00000000..a1438b9d --- /dev/null +++ b/src/native/Rapl.cc @@ -0,0 +1,11 @@ +#include "Debug.h" +//#include "Msr.h" +#include "Rapl.h" + +ebbrt::rapl::RaplCounter::~RaplCounter() { + return; +} + +double ebbrt::rapl::RaplCounter::Read() { + return counter_offset; +} diff --git a/src/native/Rapl.h b/src/native/Rapl.h new file mode 100644 index 00000000..5bd2ca8d --- /dev/null +++ b/src/native/Rapl.h @@ -0,0 +1,127 @@ +// Copyright Boston University SESA Group 2013 - 2016. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) +// +#ifndef BAREMETAL_SRC_INCLUDE_EBBRT_RAPL_H_ +#define BAREMETAL_SRC_INCLUDE_EBBRT_RAPL_H_ +#include +#include + +#include "Debug.h" +#include "Msr.h" +#include "Clock.h" + +namespace ebbrt { +namespace rapl { + const constexpr uint32_t kMsrIntelRaplPowerUnit = 0x606; + + /* Package RAPL Domain */ + const constexpr uint32_t kMsrPkgRaplPowerLimit = 0x610; + const constexpr uint32_t kMsrIntelPkgEnergyStatus = 0x611; + const constexpr uint32_t kMsrPkgPerfStatus = 0x613; + const constexpr uint32_t kMsrPkgPowerInfo = 0x614; + + /* PP0 RAPL Domain */ + const constexpr uint32_t kMsrPp0PowerLimit = 0x638; + const constexpr uint32_t kMsrIntelPp0EnergyStatus = 0x639; + const constexpr uint32_t kMsrPp0Policy = 0x63A; + const constexpr uint32_t kMsrPp0PerfStatus = 0x63B; + + /* PP1 RAPL Domain, may reflect to uncore devices */ + const constexpr uint32_t kMsrPp1PowerLimit = 0x640; + const constexpr uint32_t kMsrPp1EnergyStatus = 0x641; + const constexpr uint32_t kMsrPp1Polcy = 0x642; + + /* DRAM RAPL Domain */ + const constexpr uint32_t kMsrDramPowerLimit = 0x618; + const constexpr uint32_t kMsrDramEnergyStatus = 0x619; + const constexpr uint32_t kMsrDramPerfStatus = 0x61B; + const constexpr uint32_t kMsrDramPowerInfo = 0x61C; + + /* PSYS RAPL Domain */ + const constexpr uint32_t kMsrPlatformEnergyStatus = 0x64d; + + /* RAPL UNIT BITMASK */ + const constexpr uint32_t POWER_UNIT_OFFSET = 0; + const constexpr uint32_t POWER_UNIT_MASK = 0x0F; + + const constexpr uint32_t ENERGY_UNIT_OFFSET = 0x08; + const constexpr uint32_t ENERGY_UNIT_MASK = 0x1F00; + + const constexpr uint32_t TIME_UNIT_OFFSET = 0x10; + const constexpr uint32_t TIME_UNIT_MASK = 0xF000; + + class RaplCounter { //: public Timer::Hook { + public: + RaplCounter() { + uint64_t res = ebbrt::msr::Read(kMsrIntelRaplPowerUnit); + rapl_power_units = pow(0.5,(double)(res&0xf)); + rapl_cpu_energy_units = pow(0.5,(double)((res>>8)&0x1f)); + rapl_time_units = pow(0.5,(double)((res>>16)&0xf)); + rapl_dram_energy_units = rapl_cpu_energy_units; + }; + // move constructors + RaplCounter(RaplCounter&& other) { + rapl_power_units = other.rapl_power_units; + rapl_cpu_energy_units = other.rapl_cpu_energy_units; + rapl_time_units = other.rapl_time_units; + rapl_dram_energy_units = other.rapl_dram_energy_units; + + other.rapl_power_units = 0.0; + other.rapl_cpu_energy_units = 0.0; + other.rapl_time_units = 0.0; + other.rapl_dram_energy_units = 0.0; + }; + RaplCounter& operator=(RaplCounter&& other) { + rapl_power_units = other.rapl_power_units; + rapl_cpu_energy_units = other.rapl_cpu_energy_units; + rapl_time_units = other.rapl_time_units; + rapl_dram_energy_units = other.rapl_dram_energy_units; + + other.rapl_power_units = 0.0; + other.rapl_cpu_energy_units = 0.0; + other.rapl_time_units = 0.0; + other.rapl_dram_energy_units = 0.0; + + return *this; + }; + + // delete implicitly created copy constructor + RaplCounter(const RaplCounter& other) = delete; + RaplCounter& operator=(const RaplCounter& other) = delete; + + ~RaplCounter(); + void Start() { + uint64_t res = ebbrt::msr::Read(kMsrIntelPkgEnergyStatus); + counter_offset = (double)res*rapl_cpu_energy_units; + + /*ebbrt::kprintf("\t\tPower units = %.3fW\n",rapl_power_units); + ebbrt::kprintf("\t\tCPU Energy units = %.8fJ\n",rapl_cpu_energy_units); + ebbrt::kprintf("\t\tDRAM Energy units = %.8fJ\n",rapl_dram_energy_units); + ebbrt::kprintf("\t\tTime units = %.8fs\n",rapl_time_units); */ + //ebbrt::kprintf("Package Energy before: %.6fJ\n", counter_offset); + } + + void Stop() { + uint64_t res = ebbrt::msr::Read(kMsrIntelPkgEnergyStatus); + double after = (double)res*rapl_cpu_energy_units; + //ebbrt::kprintf("Package Energy after: %.6fJ\n", after); + counter_offset = after - counter_offset; + //ebbrt::kprintf("Total Package Energy used: %.6fJ\n", after - counter_offset); + } + + double Read(); + private: + double rapl_power_units{0.0}; + double rapl_cpu_energy_units{0.0}; + double rapl_time_units{0.0}; + double rapl_dram_energy_units{0.0}; + double counter_offset{0.0}; + //void Fire() override; + }; + +} // namespace rapl +} // namespace ebbrt + +#endif diff --git a/src/native/VirtioNet.cc b/src/native/VirtioNet.cc index 61064a2f..07c982e5 100644 --- a/src/native/VirtioNet.cc +++ b/src/native/VirtioNet.cc @@ -164,6 +164,8 @@ ebbrt::VirtioNetRep::VirtioNetRep(const VirtioNetDriver& root) receive_callback_([this]() { ReceivePoll(); }), circ_buffer_head_(0), circ_buffer_tail_(0) {} +void ebbrt::VirtioNetDriver::Config(std::string s, uint32_t v) {} + void ebbrt::VirtioNetDriver::Send(std::unique_ptr buf, PacketInfo pinfo) { ebb_->Send(std::move(buf), std::move(pinfo)); diff --git a/src/native/VirtioNet.h b/src/native/VirtioNet.h index 63b616b5..dac8b043 100644 --- a/src/native/VirtioNet.h +++ b/src/native/VirtioNet.h @@ -24,6 +24,7 @@ class VirtioNetDriver : public VirtioDriver, static void Create(pci::Device& dev); static uint32_t GetDriverFeatures(); void Send(std::unique_ptr buf, PacketInfo pinfo) override; + void Config(std::string s, uint32_t v) override; const EthernetAddress& GetMacAddress() override; private: