Commit f0b01553 authored by Peter Steinbach's avatar Peter Steinbach Committed by GitHub

Merge pull request #17 from psteinb/amd_zen_support

Amd zen support
parents bd2c9bc4 b4f8217e
......@@ -5,11 +5,12 @@
#include "detail/ct/detect_compiler.hpp"
#include "detail/ct/detect_arch.hpp"
#include "detail/rt/x86_cpuid.hpp"
#include "detail/tags.hpp"
#include "detail/bit_view.hpp"
#include "detail/definitions.hpp"
#include "detail/rt/x86_cpuid.hpp"
#include "detail/rt/x86_meta.hpp"
#include "detail/rt/x86_sizes.hpp"
#include <iostream>
......@@ -37,108 +38,6 @@ namespace compass {
}
static std::string vendor(ct::x86_tag) {
std::array<std::uint32_t,4> regs = rt::cpuid(0);
std::string vendor_name = "";
if(!regs.empty()){
vendor_name.resize(3*4);
std::copy(reinterpret_cast<char*>(&regs[ct::ebx]),reinterpret_cast<char*>(&regs[ct::ebx])+4,
vendor_name.begin());
std::copy(reinterpret_cast<char*>(&regs[ct::edx]),reinterpret_cast<char*>(&regs[ct::edx])+4,
vendor_name.begin()+4);
std::copy(reinterpret_cast<char*>(&regs[ct::ecx]),reinterpret_cast<char*>(&regs[ct::ecx])+4,
vendor_name.begin()+8);
}
return vendor_name;
}
//for details, see https://en.wikipedia.org/wiki/CPUID#EAX=80000002h,80000003h,80000004h:_Processor_Brand_String
static std::string brand(ct::x86_tag) {
std::string value = "";
auto regs = rt::cpuid(0x80000000);
if(regs[ct::eax] < 0x80000004)
return value;
value.resize(48);
char* value_begin = &value[0];
for(std::uint32_t i = 2; i<5;++i){
auto ret = rt::cpuid(0x80000000 + i);
for(std::uint32_t r = 0; r<4;++r){
std::uint32_t* tgt = reinterpret_cast<std::uint32_t*>(value_begin + (i-2)*16u + r*4u);
*tgt = ret[r];
}
}
return value;
}
static std::string device_name(ct::x86_tag) {
std::string brand_str = compass::runtime::detail::brand(ct::x86_tag());
std::string vendor = compass::runtime::detail::vendor(ct::x86_tag());
std::size_t find_pos = 0;
bool is_intel = false;
bool is_amd = false;
//remove Genuine in Vendor string if present, Intel host
if((find_pos = vendor.find("Genuine"))!=std::string::npos){
vendor.erase(find_pos,7);
is_intel = true;
}
//remove Authentic in Vendor string if present, AMD host, e.g. AMD EPYC 7401P 24-Core Processor
if((find_pos = vendor.find("Authentic"))!=std::string::npos){
vendor.erase(find_pos,9);
is_amd = true;
}
std::string value = "";
if((find_pos = brand_str.find(vendor)) != std::string::npos){
if(is_intel){
//based on the Intel chip test strings that are known
auto second_bracket_itr = brand_str.rfind(")");
auto last_at_itr = brand_str.rfind("@");
value = brand_str.substr(second_bracket_itr+1,last_at_itr-(second_bracket_itr+1));
if((find_pos = value.find(" CPU "))!=std::string::npos){
value.erase(find_pos,5);
}
//TODO: why run this 2 times?
if((find_pos = value.find(" CPU "))!=std::string::npos){
value.erase(find_pos,5);
}
value.erase(std::remove_if(value.begin(), value.end(), isspace), value.end());
}
if(is_amd){
auto end_itr = brand_str.rfind("Processor");
value = brand_str.substr(find_pos+4,end_itr-4);
}
}
return value;
}
// //too difficult for now
// //https://stackoverflow.com/questions/2901694/programmatically-detect-number-of-physical-processors-cores-or-if-hyper-threadin
// static std::uint32_t physical_threads(ct::x86_tag) {
......
#ifndef COMPASS_RT_X86_META_H_
#define COMPASS_RT_X86_META_H_
#include "detail/ct/detect_os.hpp"
#include "detail/ct/detect_compiler.hpp"
#include "detail/ct/detect_arch.hpp"
#include "detail/rt/x86_cpuid.hpp"
#include <iostream>
#include <string>
#include <algorithm>
#include <thread>
namespace compass {
namespace runtime {
namespace detail {
static std::string vendor(ct::x86_tag) {
std::array<std::uint32_t,4> regs = rt::cpuid(0);
std::string vendor_name = "";
if(!regs.empty()){
vendor_name.resize(3*4);
std::copy(reinterpret_cast<char*>(&regs[ct::ebx]),reinterpret_cast<char*>(&regs[ct::ebx])+4,
vendor_name.begin());
std::copy(reinterpret_cast<char*>(&regs[ct::edx]),reinterpret_cast<char*>(&regs[ct::edx])+4,
vendor_name.begin()+4);
std::copy(reinterpret_cast<char*>(&regs[ct::ecx]),reinterpret_cast<char*>(&regs[ct::ecx])+4,
vendor_name.begin()+8);
}
return vendor_name;
}
//for details, see https://en.wikipedia.org/wiki/CPUID#EAX=80000002h,80000003h,80000004h:_Processor_Brand_String
static std::string brand(ct::x86_tag) {
std::string value = "";
auto regs = rt::cpuid(0x80000000);
if(regs[ct::eax] < 0x80000004)
return value;
value.resize(48);
char* value_begin = &value[0];
for(std::uint32_t i = 2; i<5;++i){
auto ret = rt::cpuid(0x80000000 + i);
for(std::uint32_t r = 0; r<4;++r){
std::uint32_t* tgt = reinterpret_cast<std::uint32_t*>(value_begin + (i-2)*16u + r*4u);
*tgt = ret[r];
}
}
return value;
}
static std::string device_name(ct::x86_tag) {
std::string brand_str = compass::runtime::detail::brand(ct::x86_tag());
std::string vendor = compass::runtime::detail::vendor(ct::x86_tag());
std::size_t find_pos = 0;
bool is_intel = false;
bool is_amd = false;
//remove Genuine in Vendor string if present, Intel host
if((find_pos = vendor.find("Genuine"))!=std::string::npos){
vendor.erase(find_pos,7);
is_intel = true;
}
//remove Authentic in Vendor string if present, AMD host, e.g. AMD EPYC 7401P 24-Core Processor
if((find_pos = vendor.find("Authentic"))!=std::string::npos){
vendor.erase(find_pos,9);
is_amd = true;
}
std::string value = "";
if((find_pos = brand_str.find(vendor)) != std::string::npos){
if(is_intel){
//based on the Intel chip test strings that are known
auto second_bracket_itr = brand_str.rfind(")");
auto last_at_itr = brand_str.rfind("@");
value = brand_str.substr(second_bracket_itr+1,last_at_itr-(second_bracket_itr+1));
if((find_pos = value.find(" CPU "))!=std::string::npos){
value.erase(find_pos,5);
}
//TODO: why run this 2 times?
if((find_pos = value.find(" CPU "))!=std::string::npos){
value.erase(find_pos,5);
}
value.erase(std::remove_if(value.begin(), value.end(), isspace), value.end());
}
if(is_amd){
auto end_itr = brand_str.rfind("Processor");
value = brand_str.substr(find_pos+4,end_itr-4);
}
}
return value;
}
};//detail
};//runtime
};//compass
#endif /* COMPASS_RT_X86_META_H_ */
......@@ -5,6 +5,7 @@
#include "detail/ct/detect_compiler.hpp"
#include "detail/ct/detect_arch.hpp"
#include "detail/rt/x86_meta.hpp"
#include "detail/rt/x86_cpuid.hpp"
#include "detail/tags.hpp"
#include "detail/bit_view.hpp"
......@@ -22,6 +23,7 @@ namespace compass {
namespace detail {
using bitview = compass::utility::bit_view<std::uint32_t>;
using current_arch_t = ct::arch::type;
namespace size{
......@@ -29,16 +31,14 @@ namespace compass {
class cacheline
{
std::vector<std::uint32_t> ebx_data_;
std::vector<std::uint32_t> sizes_in_bytes_;
cacheline():
ebx_data_()
{
void on_intel(){
ebx_data_.reserve(3);
std::uint32_t maxlevel = 8;//maximum - 1 that can be mapped to 3 bits in eax[7:5]
std::uint32_t maxlevel = 8;//maximum - 1 that can be mapped to 3 bits in eax[7:5]
std::uint32_t eax = 0;
sizes_in_bytes_.reserve(maxlevel);
for(std::uint32_t l = 0;l<maxlevel;++l)
{
auto regs = cpuid(0x04,0,l);
......@@ -53,9 +53,57 @@ namespace compass {
if(truelevel != l)//this is the wrong level
continue;
ebx_data_.push_back(regs[ct::ebx]);
std::uint32_t value = bitview(regs[ct::ebx]).range(0,11);
sizes_in_bytes_.push_back(value);
}
}
void on_amd(){
sizes_in_bytes_.reserve(3);
auto regs = cpuid(0x80000005);
std::uint32_t ecx = regs[ct::ecx];
auto bv = bitview(ecx);//L1data cache
std::uint32_t linesize = bv.range(0,7);
if(!linesize)//this is not a data cache, as the L1 cacheline size is 0
return;
sizes_in_bytes_.push_back(linesize);
auto l23regs = cpuid(0x80000006);
ecx = l23regs[ct::ecx];
auto bv2 = bitview(ecx);//L2 cache
linesize = bv2.range(0,7);
sizes_in_bytes_.push_back(linesize);
auto bv3 = bitview(l23regs[ct::edx]);//L3 cache
linesize = bv3.range(0,7);
sizes_in_bytes_.push_back(linesize);
}
cacheline():
sizes_in_bytes_()
{
auto brand = compass::runtime::detail::vendor( current_arch_t() );
if(brand.find("AMD") != std::string::npos){
on_amd();
}
if(brand.find("Intel") != std::string::npos){
on_intel();
}
}
public:
......@@ -66,36 +114,29 @@ namespace compass {
}
static std::uint32_t levels_available(ct::x86_tag){
return cacheline::get().ebx_data_.size();
return cacheline::get().sizes_in_bytes_.size();
}
static std::uint32_t level(int _lvl, ct::x86_tag){
auto reg = cacheline::get().ebx_data_.at(_lvl-1);
std::uint32_t value = bitview(reg).range(0,11);
auto value = cacheline::get().sizes_in_bytes_.at(_lvl-1);
return value + 1;
return value;
}
};
class cache
{
std::vector<std::uint32_t> ebx_data_;
std::vector<std::uint32_t> ecx_data_;
cache():
ebx_data_(),
ecx_data_()
{
ebx_data_.reserve(3);
ecx_data_.reserve(3);
std::uint32_t maxlevel = 8;//maximum - 1 that can be mapped to 3 bits in eax[7:5]
std::vector<std::uint32_t> sizes_in_bytes_;
//TODO: refactor this sooner than later
void on_intel() {
std::uint32_t eax = 0;
std::uint32_t maxlevel = 8;//maximum - 1 that can be mapped to 3 bits in eax[7:5]
sizes_in_bytes_.reserve(8);
for(std::uint32_t l = 0;l<maxlevel;++l)
{
auto regs = cpuid(0x04,0,l);
......@@ -110,11 +151,67 @@ namespace compass {
if(truelevel != l)//this is the wrong level
continue;
ebx_data_.push_back(regs[ct::ebx]);
ecx_data_.push_back(regs[ct::ecx]);
std::uint32_t ebx = regs[ct::ebx];
const bitview bv_ebx = bitview(ebx);
const std::uint32_t ecx = regs[ct::ecx];
std::uint32_t ways = 1 + bv_ebx.range(22,31);
std::uint32_t partitions = 1 + bv_ebx.range(12,21);
std::uint32_t line_size = 1 + bv_ebx.range(0,11);
std::uint32_t sets = 1 + ecx;
std::uint32_t value = ways*partitions*line_size*sets;
sizes_in_bytes_.push_back(value);
}
}
void on_amd(){
sizes_in_bytes_.reserve(3);
auto regs = cpuid(0x80000005);
std::uint32_t ecx = regs[ct::ecx];
auto bv = bitview(ecx);//L1data cache
std::uint32_t test_linesize = bv.range(0,7);
if(!test_linesize)//this is not a data cache, as the L1 cacheline size is 0
return;
sizes_in_bytes_.push_back(bv.range(24,31)*1024);//AMD puts the numbers in kB
auto l23regs = cpuid(0x80000006);
ecx = l23regs[ct::ecx];
auto bv2 = bitview(ecx);//L2 cache
auto l2size = bv2.range(16,31);
l2size &= 0xffff;
sizes_in_bytes_.push_back(l2size*1024);//AMD puts the numbers in kB
auto bv3 = bitview(l23regs[ct::edx]);
auto l3size = bv3.range(19,31);//AMD manual says bits [18,31], experiments on a Ryzen Threadripper 1900X showed that [19,31] gives the right result
l3size *= 512*1024;
sizes_in_bytes_.push_back(l3size);//AMD puts the numbers in kB
}
cache():
sizes_in_bytes_()
{
auto brand = compass::runtime::detail::vendor( current_arch_t() );
if(brand.find("AMD") != std::string::npos){
on_amd();
}
if(brand.find("Intel") != std::string::npos){
on_intel();
}
}
......@@ -126,38 +223,26 @@ namespace compass {
}
static std::uint32_t levels_available(ct::x86_tag){
return cache::get().ebx_data_.size();
return cache::get().sizes_in_bytes_.size();
}
static std::uint32_t level(int _lvl, ct::x86_tag){
if(_lvl <= 0){
std::cerr << "compass::size::cache requested invalid cache level (received: "<<
_lvl << ", found on this host: [1," << cache::get().ebx_data_.size() + 1 << "]\n";
_lvl << ", found on this host: [1," << cache::get().sizes_in_bytes_.size() + 1 << "]\n";
return 0;
}
std::uint32_t index = _lvl - 1;
if(!(index < cache::get().ebx_data_.size())){
if(!(index < cache::get().sizes_in_bytes_.size())){
std::cerr << "compass::size::cache requested invalid cache level (received: "<<
_lvl << ", found on this host: [1," << cache::get().ebx_data_.size() + 1 << "]\n";
_lvl << ", found on this host: [1," << cache::get().sizes_in_bytes_.size() + 1 << "]\n";
return 0;
}
std::uint32_t ebx = cache::get().ebx_data_[index];
const bitview bv_ebx = bitview(ebx);
const std::uint32_t ecx = cache::get().ecx_data_[index];
std::uint32_t ways = 1 + bv_ebx.range(22,31);
std::uint32_t partitions = 1 + bv_ebx.range(12,21);
std::uint32_t line_size = 1 + bv_ebx.range(0,11);
std::uint32_t sets = 1 + ecx;
std::uint32_t value = ways*partitions*line_size*sets;
return value;
return cache::get().sizes_in_bytes_[index];
}
};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment