diff --git a/Cargo.lock b/Cargo.lock index 78a73c8d..8b26b3c2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -748,6 +748,15 @@ dependencies = [ "libc", ] +[[package]] +name = "crc32fast" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +dependencies = [ + "cfg-if", +] + [[package]] name = "criterion" version = "0.5.1" @@ -897,6 +906,10 @@ dependencies = [ "cuda-config", ] +[[package]] +name = "cudart" +version = "0.1.0" + [[package]] name = "cxx" version = "1.0.116" @@ -953,6 +966,17 @@ dependencies = [ "ordered-float", ] +[[package]] +name = "derive_more" +version = "0.99.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "dialoguer" version = "0.11.0" @@ -1228,6 +1252,16 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" +[[package]] +name = "flate2" +version = "1.0.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6c98ee8095e9d1dcbf2fcc6d95acccb90d1c81db1e44725c6a984b1dbdfb010" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + [[package]] name = "flume" version = "0.11.0" @@ -1270,6 +1304,17 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "from-pest" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3380d8b4f459e3bb35904036044393332e71d5316be9061d9b545c44b6064db" +dependencies = [ + "log", + "pest", + "void", +] + [[package]] name = "funty" version = "2.0.0" @@ -2108,6 +2153,31 @@ dependencies = [ "winapi", ] +[[package]] +name = "num" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05180d69e3da0e530ba2a1dae5110317e49e3b7f3d41be227dc5f92e49ee7af" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "608e7659b5c3d7cba262d894801b9ec9d00de989e8a82bd4bef91d08da45cdc0" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + [[package]] name = "num-complex" version = "0.4.5" @@ -2126,6 +2196,29 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-iter" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d869c01cc0c455284163fd0092f1f93835385ccab5a98a0dcc497b2f8bf055a9" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" +dependencies = [ + "autocfg", + "num-bigint", + "num-integer", + "num-traits", +] + [[package]] name = "num-traits" version = "0.2.18" @@ -2215,7 +2308,9 @@ version = "0.32.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" dependencies = [ + "flate2", "memchr", + "ruzstd", ] [[package]] @@ -2409,6 +2504,18 @@ dependencies = [ "ucd-trie", ] +[[package]] +name = "pest-ast" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40b5ac58ac48a503d1efdcf0ff044b442c07ac4645d179c62d4af79db89f9cda" +dependencies = [ + "itertools 0.10.5", + "proc-macro2", + "quote", + "syn 2.0.48", +] + [[package]] name = "pest_derive" version = "2.7.7" @@ -2688,6 +2795,22 @@ dependencies = [ "which", ] +[[package]] +name = "ptx" +version = "0.1.0" +dependencies = [ + "bytes", + "color-eyre", + "from-pest", + "itertools 0.10.5", + "num", + "object", + "pest", + "pest-ast", + "pest_derive", + "thiserror", +] + [[package]] name = "pyo3" version = "0.20.2" @@ -3104,6 +3227,17 @@ version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" +[[package]] +name = "ruzstd" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "58c4eb8a81997cf040a091d1f7e1938aeab6749d3a0dfa73af43cdc32393483d" +dependencies = [ + "byteorder", + "derive_more", + "twox-hash", +] + [[package]] name = "ryu" version = "1.0.16" @@ -3119,6 +3253,10 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "sass" +version = "0.1.0" + [[package]] name = "schannel" version = "0.1.23" @@ -3402,6 +3540,12 @@ dependencies = [ "parking_lot 0.11.2", ] +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "stats" version = "0.1.0" @@ -3835,6 +3979,16 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "494f2baf446447eb9b49ece9bbc391b8b251ceb4778f7362ef09dd9eadec390f" +[[package]] +name = "twox-hash" +version = "1.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" +dependencies = [ + "cfg-if", + "static_assertions", +] + [[package]] name = "typenum" version = "1.17.0" @@ -4028,6 +4182,12 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +[[package]] +name = "void" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" + [[package]] name = "vte" version = "0.11.1" diff --git a/Cargo.toml b/Cargo.toml index 3a30aad6..9e8eecfb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,6 +2,9 @@ members = [ "validate", "validate/remote", + "sass", + "ptx", + "cuda/cudart", "profile", "playground", "playground/sys", diff --git a/cuda/cudart/Cargo.toml b/cuda/cudart/Cargo.toml new file mode 100644 index 00000000..5d49a068 --- /dev/null +++ b/cuda/cudart/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "cudart" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] diff --git a/cuda/cudart/src/lib.rs b/cuda/cudart/src/lib.rs new file mode 100644 index 00000000..7d12d9af --- /dev/null +++ b/cuda/cudart/src/lib.rs @@ -0,0 +1,14 @@ +pub fn add(left: usize, right: usize) -> usize { + left + right +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn it_works() { + let result = add(2, 2); + assert_eq!(result, 4); + } +} diff --git a/ptx/Cargo.toml b/ptx/Cargo.toml new file mode 100644 index 00000000..686f1f2f --- /dev/null +++ b/ptx/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "ptx" +version = "0.1.0" +edition = "2021" + +[dependencies] +# anyhow = "1" +color-eyre = "0" +thiserror = "1" +pest = "2" +pest_derive = "2" +pest-ast = "0" +from-pest = "0" +num = "0" +itertools = "0" + +object = "0" +bytes = "1" + +[lib] +crate-type = ["cdylib", "rlib"] diff --git a/ptx/README.md b/ptx/README.md new file mode 100644 index 00000000..b47c45a0 --- /dev/null +++ b/ptx/README.md @@ -0,0 +1,11 @@ +## gpucachesim PTX + +Custom (non-LLVM) PTX frontend used by gpucachesim for functional simulation. + +The PTX (Parallel Thread eXecution) assembly language is ... + +The provided libraries may in the future be used for + +- static analysis +- PTX synthesis +- functional simulation diff --git a/ptx/src/ast.rs b/ptx/src/ast.rs new file mode 100644 index 00000000..dc19b14d --- /dev/null +++ b/ptx/src/ast.rs @@ -0,0 +1,105 @@ +use super::ptx::Rule; +use pest::Span; +use std::path::PathBuf; +use thiserror::Error; + +fn span_into_str(span: Span) -> &str { + span.as_str() +} + +// #[derive(Debug)] +// pub enum Directive { +// VariableDecl, +// Function, +// Version { version: f64, newer: bool }, +// AddressSize, +// Target, +// File, +// Loc, +// } + +#[derive(PartialEq, Debug)] +pub enum FunctionDeclHeader { + Entry, + VisibleEntry, + WeakEntry, + Func, + VisibleFunc, + WeakFunc, + ExternFunc, +} + +#[derive(PartialEq, Debug)] +pub enum ASTNode<'a> { + // Directive(Directive), + FunctionDefn{ + // name: &'a str, + // body: Vec>, + }, + FunctionDecl{ + name: &'a str, + // params: &'a str, + }, + FunctionDeclHeader(FunctionDeclHeader), + VariableDeclDirective, + FunctionDirective, + VersionDirective { version: f64, newer: bool }, + AddressSizeDirective(u32), + TargetDirective(Vec<&'a str>), + FileDirective{ + id: usize, + path: PathBuf, + size: Option, + lines: Option, + }, + LocDirective, + Double(f64), + SignedInt(i64), + UnsignedInt(u64), + Str(&'a str), + Identifier(&'a str), + EOI, +} + +// #[derive(Debug)] +// pub struct AST { +// pub nodes: Vec, +// } + +#[derive(Error, Debug)] +pub enum ParseError<'a> { + #[error("failed to parse {rule:?}")] + Rule { rule: Rule }, + + #[error("failed to parse: {0}")] + Unexpected(&'a str), +} + +// impl TryFrom for Program { +// type Error = ParseError; + +// fn try_from(program: Rule) -> Result { +// Ok(Self { +// statements: Vec::new(), +// }) +// } +// } + +// pub struct Double(f64); + +// impl From> for Double { +// fn from(pair: Pair) -> Self { +// Self(0.0) +// } +// } + +// #[derive(Debug, FromPest)] +// #[pest_ast(rule(Rule::field))] +// pub struct Field { +// #[pest_ast(outer(with(span_into_str), with(str::parse), with(Result::unwrap)))] +// pub value: f64, +// } + +#[derive(Debug, FromPest)] +#[pest_ast(rule(Rule::EOI))] +struct EOI; diff --git a/ptx/src/lib.rs b/ptx/src/lib.rs new file mode 100644 index 00000000..6925b766 --- /dev/null +++ b/ptx/src/lib.rs @@ -0,0 +1,898 @@ +#![allow(dead_code)] + +#[macro_use] +extern crate pest_derive; +#[macro_use] +extern crate pest_ast; +#[macro_use] +extern crate pest; + +mod ast; +mod ptx; + +use crate::ptx::Rule; +use ast::{ASTNode, FunctionDeclHeader, ParseError}; +use color_eyre::eyre; +use pest::iterators::Pair; +use pest::Parser; +use std::fs; +use std::path::{Path, PathBuf}; + +fn walk(pair: Pair) -> eyre::Result { + match pair.as_rule() { + Rule::function_defn => { + let inner = pair.into_inner().map(|p| walk(p)); + println!("{:?}", inner.collect::>>()); + // Ok(ASTNode::FunctionDefn { name: "test" }) + Ok(ASTNode::FunctionDefn {}) + } + Rule::function_decl => { + let inner = pair.into_inner().map(|p| walk(p)); + println!("{:?}", inner.collect::>>()); + Ok(ASTNode::FunctionDecl { name: "test" }) + } + Rule::function_ident_param => { + // extract identifier and param_list + let inner = pair.into_inner().map(|p| walk(p)); + println!("{:?}", inner.collect::>>()); + Ok(ASTNode::EOI) + } + Rule::function_decl_header => { + let header = match pair.into_inner().next().map(|p| p.as_rule()) { + Some(Rule::function_decl_header_entry) => Ok(FunctionDeclHeader::Entry), + Some(Rule::function_decl_header_visible_entry) => { + Ok(FunctionDeclHeader::VisibleEntry) + } + Some(Rule::function_decl_header_weak_entry) => Ok(FunctionDeclHeader::WeakEntry), + Some(Rule::function_decl_header_func) => Ok(FunctionDeclHeader::Func), + Some(Rule::function_decl_header_visible_func) => { + Ok(FunctionDeclHeader::VisibleFunc) + } + Some(Rule::function_decl_header_weak_func) => Ok(FunctionDeclHeader::WeakFunc), + Some(Rule::function_decl_header_extern_func) => Ok(FunctionDeclHeader::ExternFunc), + _ => Err(ParseError::Unexpected( + "expected valid function decl header", + )), + }?; + Ok(ASTNode::FunctionDeclHeader(header)) + } + Rule::statement_block => { + let inner = pair.into_inner().map(|p| walk(p)); + println!("{:?}", inner.collect::>>()); + Ok(ASTNode::EOI) + } + Rule::version_directive => { + let mut iter = pair.into_inner(); + let double = iter.next().map(|p| walk(p)).unwrap(); + let newer = iter.next().map(|v| v.as_str() == "+").unwrap_or(false); + + match double { + Ok(ASTNode::Double(version)) => Ok(ASTNode::VersionDirective { version, newer }), + _ => unreachable!(), + } + } + Rule::target_directive => { + let identifiers: Vec<&str> = pair + .into_inner() + .flat_map(|id| match id.as_rule() { + Rule::identifier => Some(id.as_str()), + _ => None, + }) + .collect(); + Ok(ASTNode::TargetDirective(identifiers)) + } + Rule::address_size_directive => { + let size: u32 = pair + .into_inner() + .next() + .and_then(|s| s.as_str().parse().ok()) + .unwrap(); + Ok(ASTNode::AddressSizeDirective(size)) + } + Rule::file_directive => { + let mut inner = pair.into_inner().map(|p| walk(p)); + let id: usize = match inner.next() { + Some(Ok(ASTNode::SignedInt(value))) => Ok(value.try_into()?), + Some(Ok(ASTNode::UnsignedInt(value))) => Ok(value.try_into()?), + _ => Err(ParseError::Unexpected("expected id")), + }?; + let path: PathBuf = match inner.next() { + Some(Ok(ASTNode::Str(value))) => Ok(value.into()), + _ => Err(ParseError::Unexpected("expected file path")), + }?; + let size: Option = match inner.next() { + Some(Ok(ASTNode::SignedInt(value))) => Some(value.try_into()?), + Some(Ok(ASTNode::UnsignedInt(value))) => Some(value.try_into()?), + _ => None, + }; + let lines: Option = match inner.next() { + Some(Ok(ASTNode::SignedInt(value))) => Some(value.try_into()?), + Some(Ok(ASTNode::UnsignedInt(value))) => Some(value.try_into()?), + _ => None, + }; + Ok(ASTNode::FileDirective { + id, + path, + size, + lines, + }) + } + Rule::identifier => Ok(ASTNode::Identifier(pair.as_str())), + Rule::string => Ok(ASTNode::Str(pair.as_str())), + Rule::double => { + // let value = pair.as_str(); + // todo + Ok(ASTNode::Double(0f64)) + } + Rule::integer => { + let value = pair.as_str(); + let unsigned = value.ends_with("U"); + if value.starts_with("0b") || value.starts_with("0B") { + // binary + return if unsigned { + Ok(ASTNode::UnsignedInt(u64::from_str_radix( + &value[2..value.len() - 1], + 2, + )?)) + } else { + Ok(ASTNode::SignedInt(i64::from_str_radix(&value[2..], 2)?)) + }; + } + if value.ends_with("U") { + Ok(ASTNode::UnsignedInt( + value[..value.len() - 1].parse::()?, + )) + } else { + Ok(ASTNode::SignedInt(value.parse::()?)) + } + // let decimal = ; + // hex: sscanf(yytext,"%x", &yylval->int_value + // decimal: atoi(yytext) + } + Rule::EOI => Ok(ASTNode::EOI), + other => { + eprintln!("unhandled rule: {:?}", other); + Ok(ASTNode::EOI) + } // Rule::number => str::parse(pair.as_str()).unwrap(), + // Rule::sum => { + // let mut pairs = pair.into_inner(); + + // let num1 = pairs.next().unwrap(); + // let num2 = pairs.next().unwrap(); + + // process(num1) + process(num2) + // } + } +} + +pub fn gpgpu_ptx_sim_load_ptx_from_filename(path: &Path) -> eyre::Result { + let source = fs::read_to_string(path)?; + // let source = String::from_utf8(fs::read(path)?)?; + let parse_tree = ptx::Parser::parse(ptx::Rule::program, &source)?; + + // let ast: Program = parse_tree.try_into()?; + // Program::from(&parse_tree); + let ast = parse_tree + // .iter() + // .flat_map(|pair| walk(pair)) + .map(|pair| walk(pair)) + // match pair.as_rule() { + // Rule::version_directive => { + // println!("{:?}", pair); + // // let mut pairs = pair.into_inner(); + // let version = 0.1f64; + // let newer = false; + // // let version = pairs.next().unwrap(); + // // let newer = pairs.next().ok(); + // Some(Statement::Directive(Directive::Version { version, newer })) + // } + // Rule::EOI => None, + // other => { + // eprintln!("unhandled rule: {:?}", other); + // None + // } + // ) + .collect::>>()?; + println!("ast = {:#?}", ast); + + // for record in parse_tree { + // println!("{:?}", record.as_rule()); + // match record.as_rule() { + // Rule::directive => { + // record_count += 1; + + // for field in record.into_inner() { + // field_sum += field.as_str().parse::().unwrap(); + // } + // } + // Rule::EOI => (), + // other => panic!("unhandled rule: {}", other), + // } + // } + + // println!("parse tree = {:#?}", parse_tree); + // let ast: Program = File::from_pest(&mut parse_tree).expect("infallible"); + // println!("syntax tree = {:#?}", syntax_tree); + // println!(); + Ok(0) +} + +#[cfg(test)] +mod tests { + use super::*; + use color_eyre::eyre; + use std::path::PathBuf; + + macro_rules! ast_tests { + ($($name:ident: $value:expr,)*) => { + $( + #[test] + fn $name() -> eyre::Result<()> { + let (rule, source, expected) = $value; + let nodes = ptx::Parser::parse(rule, &source)? + .map(|p| walk(p)) + .collect::>>()?; + assert_eq!(Some(expected), nodes.into_iter().next()); + Ok(()) + } + )* + } +} + + ast_tests! { + ast_integer_decimal_0: (ptx::Rule::integer, "0", ASTNode::SignedInt(0)), + ast_integer_decimal_1: (ptx::Rule::integer, "-12", ASTNode::SignedInt(-12)), + ast_integer_decimal_2: (ptx::Rule::integer, "12U", ASTNode::UnsignedInt(12)), + ast_integer_decimal_3: (ptx::Rule::integer, "01110011001", ASTNode::SignedInt(1110011001)), + ast_integer_binary_0: (ptx::Rule::integer, "0b01110011001", ASTNode::SignedInt(921)), + ast_integer_binary_1: (ptx::Rule::integer, "0b01110011001U", ASTNode::UnsignedInt(921)), + } + + #[test] + fn build_ast() -> eyre::Result<()> { + let ptx_file = PathBuf::from("../kernels/mm/small.ptx"); + gpgpu_ptx_sim_load_ptx_from_filename(&ptx_file)?; + Ok(()) + } + + // kernelData * getKernelFunctionHelper(const char * bytes, unsigned int size, char * kernel, int kernelNumber) { + // char* target = 0; + // int index = 0; + // kernelData* kern = 0; + // unsigned long long maxLoc = size; + // + // if(kernel) { + // target = (char*) malloc(strlen(kernel)+1); + // strcpy(target, kernel); + // } + // + // //Search each ELF for target kernel: + // unsigned long long loc = 0x0; + // while(!kern && loc < size) { + // //Deal with padding and etc. + // if(loc % 4 != 0) { + // loc += 4 - (loc % 4); + // } + // while(true) { + // if(loc >= size || *((unsigned int*)(bytes + loc)) == 0xba55ed50) { + // break; + // } else { + // loc += 4; + // } + // } + // if(loc >= size) { + // break; + // } + // + // fatHeader * head = (fatHeader*) &bytes[loc]; + // int headloc = loc; + // + // //Check magic number: + // if(head->magic != 0xba55ed50 || head->unknown != 0x00100001) { + // if(!loc) { + // fprintf(stderr, "WARNING: unrecognized magic number for CUDA fatbin\n"); + // } + // else { + // fprintf(stderr, "SANITY CHECK ERROR ~1052: no magic number; possible misaligned fatbin.\n"); + // } + // } + // + // loc += 0x10; + // if(loc >= maxLoc) { + // fprintf(stderr, "SANITY CHECK ERROR ~1044: fatbin is located out of bounds.\n"); + // return 0; + // } + // while(!kern && loc < head->size + headloc + 0x10) { + // unsigned int * type = (unsigned int*) &bytes[loc]; + // unsigned int * offset = (unsigned int*) &bytes[loc + 4]; + // unsigned long long * size = (unsigned long long*) &bytes[loc + 8]; + // + // int architecture = bytes[loc + 28]; + // + // if(*size > headloc + head->size || loc + *offset + *size > headloc + 0x10 + head->size) { + // fprintf(stderr, "SANITY CHECK FAILED ~1053: fatbin values out of bounds.\n"); + // return 0; + // } + // + // loc += *offset; + // if(loc >= maxLoc) { + // fprintf(stderr, "SANITY CHECK FAILED ~1060: fatbin loc out of bounds.\n"); + // return 0; + // } + // + // if((*type & 0xffff) == 0x2) {//this part of the fatbin contains an ELF + // //Parse raw ELF data: + // ELF * elf = bytes2ELF(bytes + loc); + // loc += *size; + // if(!elf) { + // cerr << "SANITY CHECK ERROR em~885: unable to parse ELF.\n"; + // return 0; + // } + // if(loc > maxLoc) { + // fprintf(stderr, "SANITY CHECK ERROR em~889: fatbin loc out of bounds.\n"); + // return 0; + // } + // + // //Look for kernel code section: + // int scnIndex = 0; + // int numSections = getNumSections(elf); + // for(int x = 0; x < numSections; x++) { + // ELF_Section section = getSection(elf, x); + // const ELF_SHeader shdr = getHeader(elf, section); + // const char * name = getName(elf, section); + // scnIndex = x; + // + // if(!target) { + // bool containsKernel = !strncmp(name, ".text.", 6); + // if(containsKernel) { + // if(index == kernelNumber) { + // target = (char*) malloc(strlen(name) + 2); //note: the extra malloc'd space avoids string errors elsewhere + // strcpy(target, name + 6); + // } + // else { + // index++; + // } + // } + // } + // + // if(target && strlen(name) > 6 && !strcmp(name + 6, target)) {//this section contains the kernel function + // //Copy data into a single char array: + // char * bytes = (char*) malloc(shdr.sh_size); + // memcpy(bytes, getSectionData(elf, section), shdr.sh_size); + // + // //Prepare return value: + // kern = (kernelData*) malloc(sizeof(kernelData)); + // kern->sharedMemory = 0; + // kern->min_stack_size = -1; + // kern->max_stack_size = -1; + // kern->frame_size = -1; + // kern->bytes = bytes; + // kern->name = target; + // kern->arch = architecture; + // kern->functionNames = 0; + // kern->numBytes = shdr.sh_size; + // kern->numRegisters = shdr.sh_info >> 24; + // kern->symIndex = shdr.sh_info & 0xff; + // + // break; + // } + // } + // + // if(kern) { + // for(int x = 0; x < numSections; x++) { + // ELF_Section section = getSection(elf, x); + // const ELF_SHeader shdr = getHeader(elf, section); + // const char * name = getName(elf, section); + // + // //If section contains shared memory data, note size of shared memory + // if(!strncmp(".nv.shared.", name, 11) && !strcmp(name + 11, kern->name)) { + // kern->sharedMemory = shdr.sh_size; + // } + // + // //Elseif symbol table, get subroutine names (if we can) + // else if(shdr.sh_type == SHT_SYMTAB) { + // if(shdr.sh_size % shdr.sh_entsize) { + // cerr << "SANITY CHECK ERROR em~956: fractional symbol count.\n"; + // } + // + // //Find & change appropriate values in this symbol table: + // int numSyms = shdr.sh_size / shdr.sh_entsize; + // for(int y = 0; y < numSyms; y++) { + // const ELF_Sym sym = getSymbol(elf, section, y); + // if(sym.st_info == 0x22 && sym.st_shndx == scnIndex) { + // const char * symName = getName(elf, shdr, sym); + // char * copy = (char*) malloc(strlen(symName) + 1); + // strcpy(copy, symName); + // addLast(&kern->functionNames, copy); + // } + // } + // } + // + // //Elseif .nv.info section, get local memory metadata + // else if(!strcmp(".nv.info", name)) { + // const char * bytes = getSectionData(elf, section); + // + // //Find appropriate values in section data: + // for(unsigned int x = 0; x < shdr.sh_size;) { + // CUDA_INFO * ci = (CUDA_INFO*)(bytes+x); + // + // if(ci->format > maxFormat || ci->format < minFormat) { + // cerr << "ERROR: EIFMT type (0x" << std::hex << (int)ci->format << std::dec << ") out of range.\n"; + // } + // if(ci->attribute > maxAttribute || ci->attribute < minAttribute) { + // //cerr << "ERROR: EIATTR type (0x" << std::hex << (int)ci->attribute << std::dec << ") out of range.\n"; + // } + // + // int datasize = 0; + // if(ci->format == EIFMT_NVAL) { + // datasize = 0; + // } else if(ci->format == EIFMT_BVAL) { + // datasize = 1; + // } else if(ci->format == EIFMT_HVAL) { + // datasize = 2; + // } else if(ci->format == EIFMT_SVAL) { + // //TODO I don't know if this is correct for all attribute types: + // datasize = 2; + // short * sdata = (short*)ci->data; + // datasize += sdata[0]; + // } + // + // if(ci->attribute == EIATTR_MIN_STACK_SIZE) { + // if(ci->format == EIFMT_SVAL) { + // if(datasize == 10) { + // int * temp = (int*) (ci->data + 2); + // int funcid = temp[0]; + // if(funcid == kern->symIndex) { + // kern->min_stack_size = temp[1]; + // } + // } else { + // cerr << "ERROR: Unexpected datasize (" << datasize << ") for min_stack_size.\n"; + // } + // } else { + // cerr << "ERROR: Unexpected format for min_stack_size.\n"; + // } + // } + // if(ci->attribute == EIATTR_MAX_STACK_SIZE) { + // if(ci->format == EIFMT_SVAL) { + // if(datasize == 10) { + // int * temp = (int*) (ci->data + 2); + // int funcid = temp[0]; + // if(funcid == kern->symIndex) { + // kern->max_stack_size = temp[1]; + // } + // } else { + // cerr << "ERROR: Unexpected datasize (" << datasize << ") for max_stack_size.\n"; + // } + // } else { + // cerr << "ERROR: Unexpected format for max_stack_size.\n"; + // } + // } else if(ci->attribute == EIATTR_FRAME_SIZE) { + // if(ci->format == EIFMT_SVAL) { + // if(datasize == 10) { + // int * temp = (int*) (ci->data + 2); + // int funcid = temp[0]; + // if(funcid == kern->symIndex) { + // kern->frame_size = temp[1]; + // } + // } else { + // cerr << "ERROR: Unexpected datasize (" << datasize << ") for frame_size.\n"; + // } + // } else { + // cerr << "ERROR: Unexpected format for frame_size.\n"; + // } + // } + // + // x += datasize + 2; + // } + // } + // } + // } + // + // cleanELF(elf); + // } + // else {//not an ELF, ignore it + // loc += *size; + // if(loc > maxLoc) { + // fprintf(stderr, "SANITY CHECK FAILED em~1058: fatbin loc out of bounds.\n"); + // return 0; + // } + // } + // } + // } + // + // //Cleanup & return: + // if(kern && kern->bytes) { + // return kern; + // } + // else { + // if(target) { + // free(target); + // } + // return 0; + // } + // } + // + + #[repr(usize)] + #[derive(Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)] + pub enum RegionKind { + PTX = 1, + ELF = 2, + } + + #[test] + /// see https://pdfs.semanticscholar.org/5096/25785304410039297b741ad2007e7ce0636b.pdf + /// see https://dl.acm.org/doi/abs/10.5555/3314872.3314900 + /// see https://github.com/daadaada/turingas/blob/master/turingas/cubin.py + fn read_ptx_section() -> eyre::Result<()> { + // use object::{FileHeader, Object, ObjectSection}; + use object::read::elf::FileHeader; + use object::read::elf::{SectionHeader, SectionTable}; + // use object::read::Object; + // use object::read::elf::{ElfSection64, FileHeader, Rela, SectionHeader, SectionTable}; + // use object::read::ObjectSection; + use object::Endianness; + // use object::ReadRef; + // use object::{elf, ReadRef, SectionIndex}; + + let binary = fs::read("/home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_disabled")?; + let kind = object::FileKind::parse(&*binary)?; + assert_eq!(kind, object::FileKind::Elf64); + + let elf = object::elf::FileHeader64::::parse(&*binary)?; + let endianness = elf.endian()?; + dbg!(&endianness); + let e_machine = elf.e_machine(endianness); + dbg!(&e_machine); + // elf::FileHeader64 < Endianness >> (data); + // + // let out_data = match kind { + // object::FileKind::Elf32 => copy_file::>(in_data).unwrap(), + // object::FileKind::Elf64 => copy_file::>(in_data).unwrap(), + // _ => { + // eprintln!("Not an ELF file"); + // process::exit(1); + // } + // }; + // + // let file = object::File::parse(&*binary_data)?; + // let sections: SectionTable<'_, _> = file.sections(endianness, &*binary)?; + + //Locate fatbin section on CUDA 4.0 + // https://github.com/decodecudabinary/Decoding-CUDA-Binary/blob/master/tools/src/elfmanip.cpp#L1026 + // target = ".rodata"; + // for(int x = 0; x < numSections; x++) { + // ELF_Section section = getSection(elf, x); + // const ELF_SHeader shdr = getHeader(elf, section); + // const char * name = getName(elf, section); + // + // //Check if section is .nv_fatbin: + // if(name && !strcmp(target, name)) { + // const char * bytes = getSectionData(elf, section); + // kernelData* answer = getKernelFunctionHelper(bytes, shdr.sh_size, kernel, kernelNumber); + // cleanELF(elf); + // return answer; + // } + // } + + let sections: SectionTable<_> = elf.sections(endianness, &*binary)?; + for section in sections.iter() { + // let string_table = section.strings(endianness, &*binary)?.unwrap_or_default(); + // dbg!(string_table); + // let name = section.sh_name(endianness); + let section_name = sections.section_name(endianness, section)?; + // let name = section.name(endianness, string_table)?; + // let Ok(name) = section.name(endianness, string_table) else { + // continue; + // }; + // let section_name = String::from_utf8_lossy(section_name)?; + let section_name = std::str::from_utf8(section_name)?; + println!("{}", section_name); + match section_name { + ".nv_fatbin" | ".nvFatBinSegment" => {} // | ".text" => {} + _ => continue, + }; + // ".text.func" is the kernel name? + // e_machine + // e_machine(&self, endian: Self::Endian) -> u16 + println!("========"); + // the first 8 bytes are the .nv_fatbin magic number, + // and the remaining eight bytes contain the size of the + // rest of the region. + // The rest of the region alternates between detailed + // headers and the embedded file (ELF, PTX, or cubin) which + // the detailed header describes. + // In the detailed header, the first 4-byte word contains the + // embedded file’s type and ptxas flags; the lower two bytes + // have a value of 2 for GPU ELF files. + // The second word is the offset of the embedded file, + // relative to the start of this detailed header. + // The dword comprising the third and fourth words holds + // the size of the embedded file. + // The seventh word is the code version, + // which is dependent on the compiler. + // The eighth word contains the target architecture - a value + // of 20 for compute capability 2.0, a value of 35 for compute + // capability 3.5, etcetera. + // + // The rest of the detailed header contains less important + // metadata, such as the operating system or the source code’s + // filename. + // Another section of the CPU ELF that is unique to CUDA + // programs is called .nvFatBinSegment. + // It contains metadata about the .nv_fatbin section, + // such as the starting addresses of its regions. + // Its size is a multiple of six words (24 bytes), + // where the third word in each group of six is an address + // inside of the .nv_fatbin section. + // If we modify the .nv_fatbin, then these addresses need to + // be changed to match it. + // let mut data = section.data(endianness, &*binary)?.to_owned(); + let data = section.data(endianness, &*binary)?; + // let data = object::Bytes(data); + // let data = object::Bytes(data); + use bytes::{Buf, BufMut, Bytes, BytesMut}; + use object::ReadRef; + // let mut buf = BytesMut::from(&mut *data); + // let mut buf = Bytes::from(&*data); + + if section_name == ".nv_fatbin" { + // let buf = Bytes::from(data).into_buf(); + + // parse the large header + // let mut nv_fatbin_magic_number = [0; 8]; + // buf.copy_to_slice(&mut nv_fatbin_magic_number); + // let flag = data.read::>(4)?.get(endianness); + let mut pos = 0; + loop { + let nv_fatbin_magic_number = data + .read::>(&mut pos) + .map_err(|_| eyre::eyre!("failed to read nv fatbin magic number"))? + .get(endianness); + assert_eq!(nv_fatbin_magic_number, 0x00100001ba55ed50); + + let region_size = data + .read::>(&mut pos) + .map_err(|_| eyre::eyre!("failed to read main header segment size"))? + .get(endianness); + + // let nv_fatbin_magic_number = data.read_slice::>(8); + + // .get(endianness); + // let mut segment_size = [0; 8]; + // buf.copy_to_slice(&mut segment_size); + // println!("total size in bytes: {} {:x}", data.len(), data.len(),); + println!( + "\n\t => REGION SIZE: {:b} {:x}\n\n", + // u64::from_le_bytes(segment_size), + region_size, + region_size, + ); + // println!( + // "magic number: {:b} {:x}", + // nv_fatbin_magic_number, nv_fatbin_magic_number, + // ); + // assert_eq!(segment_size, 0x480); + + // // let mut skip = [0; 4]; + // // buf.copy_to_slice(&mut skip); + // // println!("skipped: {:?}", &skip); + // + + let offset_start = pos; + let typ = data + .read::>(&mut pos) + .map_err(|_| eyre::eyre!("failed to read typ"))? + .get(endianness); + + // assert_eq!(typ, 2); // 2 is ELF + let flags = data + .read::>(&mut pos) + .map_err(|_| eyre::eyre!("failed to read flags"))? + .get(endianness); + + let offset = data + .read::>(&mut pos) + .map_err(|_| eyre::eyre!("failed to read offset"))? + .get(endianness); + + let size = data + .read::>(&mut pos) + .map_err(|_| eyre::eyre!("failed to read size"))? + .get(endianness); + + // skip 3 words + dbg!(String::from_utf8_lossy( + &data[pos as usize..pos as usize + 12] + )); + pos += 12; + // let _skipped = data + // .read_bytes(12) + // .map_err(|_| eyre::eyre!("failed to skip ahead"))?; + + let code_version = data + .read::>(&mut pos) + .map_err(|_| eyre::eyre!("failed to read code version"))? + .get(endianness); + + let architecture = data + .read::>(&mut pos) + .map_err(|_| eyre::eyre!("failed to read architecture"))? + .get(endianness); + + dbg!(typ, flags, offset, size, code_version, architecture); + + //this part of the fatbin contains an ELF + // ELF magic number: 0x464c457f + + // let start = nt_size as usize; + // println!("STARTING FROM SEGMENT SIZE"); + // dbg!(&start); + // for i in 0..8 { + // println!("{:02X?}", &data[(start + (i * 32))..(start + (i + 1) * 32)]); + // } + // println!("STARTING FROM OFFSET"); + // // let start = (offset - (pos - offset_start) as u32) as usize; + // let start = (offset_start + offset as u64) as usize; + // // let start = offset as usize; + // // let start = pos as usize; + // dbg!(&start); + // for i in 0..8 { + // println!("{:02X?}", &data[(start + (i * 32))..(start + (i + 1) * 32)]); + // } + + if typ == RegionKind::ELF as u16 { + let fatbin_elf_start = (offset_start + offset as u64) as usize; + let fatbin_elf_end = fatbin_elf_start + size as usize; + + dbg!(fatbin_elf_start, fatbin_elf_end); + let fatbin_elf_data = &data[fatbin_elf_start..fatbin_elf_end]; + let inner_kind = object::FileKind::parse(fatbin_elf_data)?; + assert_eq!(inner_kind, object::FileKind::Elf64); + + let fatbin_elf = + object::elf::FileHeader64::::parse(fatbin_elf_data)?; + let fatbin_endianness = fatbin_elf.endian()?; + dbg!(&fatbin_endianness); + let fatbin_e_machine = fatbin_elf.e_machine(fatbin_endianness); + dbg!(&fatbin_e_machine); + + assert_eq!(fatbin_e_machine, 190, "fatbin ELF is CUDA"); + + // iterate over kernel sections in ELF + let kernel_sections = fatbin_elf.sections(endianness, fatbin_elf_data)?; + + // find the kernels in this region + use itertools::Itertools; + let kernel_names: Vec<_> = kernel_sections + .iter() + .filter_map(|sec| { + let name = kernel_sections.section_name(endianness, sec).ok()?; + let name = std::str::from_utf8(name).ok()?; + name.strip_prefix(".text.") + // if name.starts_with(".text.") { + // // found a kernel code section + // } else { + // None + // } + }) + .dedup() + .sorted() + .collect(); + + dbg!(&kernel_names); + + for kernel_name in &kernel_names { + // get shared memory data, note size of shared memory + let shmem = kernel_sections.section_by_name( + endianness, + format!(".nv.shared.{}", kernel_name).as_bytes(), + ); + if let Some((_, shmem_section)) = shmem { + let shared_mem_bytes = shmem_section.sh_size(endianness); + dbg!(shared_mem_bytes); + } + + // get shared memory data, note size of shared memory + let info = kernel_sections.section_by_name( + endianness, + format!(".nv.info.{}", kernel_name).as_bytes(), + ); + if let Some((_, info_section)) = info { + let info_data = info_section.data(endianness, &*fatbin_elf_data); + // let shared_mem_bytes = info_section.sh_size(endianness); + // dbg!(shared_mem_bytes); + } + } + for (kernel_section_idx, kernel_section) in + kernel_sections.iter().enumerate() + { + // kernel_section.section + let kernel_section_name = + kernel_sections.section_name(endianness, kernel_section)?; + let kernel_section_name = std::str::from_utf8(kernel_section_name)?; + println!("======================="); + println!("{:>3} => {}", kernel_section_idx, kernel_section_name); + + let kernel_section_data = + kernel_section.data(endianness, fatbin_elf_data)?; + + // so turns out, the text sections are actually the + // SASS assembly instructions. + // This can be verified with usign cuobjdump -sass + println!("{}", String::from_utf8_lossy(kernel_section_data)); + println!("{:02X?}", &kernel_section_data); + println!("======================="); + + let section_type = kernel_section.sh_type(endianness); + let section_size = kernel_section.sh_size(endianness); + let section_entry_size = kernel_section.sh_entsize(endianness); + if section_type == object::elf::SHT_SYMTAB { + assert_eq!( + section_size % section_entry_size, + 0, + "fractional symbol count" + ); + } + } + // let fatbin_data = section.data(endianness, &*binary)?; + + pos = fatbin_elf_end as u64; + } else if typ == RegionKind::PTX as u16 { + let ptx_start = (offset_start + offset as u64) as usize; + let ptx_end = ptx_start + size as usize; + + dbg!(ptx_start, ptx_end); + let ptx_data = &data[ptx_start..ptx_end]; + + println!("{}", String::from_utf8_lossy(&ptx_data)); + println!("{:02X?}", &ptx_data); + println!("======================="); + } + // break; + + // let typ = buf.get_u16_le(); + // assert_eq!(typ, 2); // 2 is ELF + // let flags = buf.get_u16_le(); + // let offset = buf.get_u32_le(); + // let size = buf.get_u64_le(); // 4th + // 5,6,7 + // let _ = buf.get_u32_le(); + // let _ = buf.get_u32_le(); + // let _ = buf.get_u32_le(); + // let code_version = buf.get_u32_le(); + // let architecture = buf.get_u32_le(); + // let mut skip = [0; 20]; + // buf.copy_to_slice(&mut skip); + // println!("skipped: {:?}", &skip); + // dbg!(architecture); + } + } else if section_name == ".nvFatBinSegment" { + dbg!(data.len()); + let num_regions = data.len() / (6 * 4); + dbg!(&num_regions); + for region in 0..num_regions { + // read the third word in group of 6 + let word = region * (6 * 4) + (3 * 4); + dbg!(&word); + + let region_starting_address = data + .read_at::>(word as u64) + .map_err(|_| { + eyre::eyre!("failed to read starting address for region {}", region) + })? + .get(endianness); + dbg!(®ion_starting_address); + } + } + + let data = String::from_utf8_lossy(&data); + // println!("{}", data); + println!("========"); + println!("\n\n\n"); + // println!("{:?}", section.data()?); + // println!("{}", String::from_utf8_lossy(section.data()?)); + } + Ok(()) + } +} diff --git a/ptx/src/ptx.pest b/ptx/src/ptx.pest new file mode 100644 index 00000000..508873aa --- /dev/null +++ b/ptx/src/ptx.pest @@ -0,0 +1,404 @@ +// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#syntax +program = _{ SOI ~ stmt* ~ EOI } +stmt = _{ + directive_statement + | function_defn +} +// | function_decl + +directive_statement = _{ + variable_declaration ~ ";" + | function_decl ~ ";" + | version_directive + | address_size_directive + | target_directive + | file_directive + | loc_directive +} + +variable_declaration = { + ".constptr" ~ identifier ~ "," ~ identifier ~ "," ~ integer + | variable_spec ~ identifier_spec ~ "=" ~ literal_operand + | variable_spec ~ identifier_spec ~ "=" ~ initializer_list + | variable_spec ~ identifier_list +} +initializer_list = { "{" ~ initializer_list ~ "}" | "{" ~ literal_list ~ "}" } +literal_list = { literal_operand ~ "," ~ literal_list | literal_operand } +identifier_list = { identifier_spec ~ "," ~ identifier_list | identifier_spec } + +function_defn = { + function_decl ~ block_spec_list ~ statement_block + | function_decl ~ statement_block +} + +statement_block = { "{" ~ statement_list? ~ "}" } +statement_list = _{ + statement_block ~ statement_list + | instruction_statement ~ statement_list + | directive_statement ~ statement_list + | prototype_block ~ statement_list + | statement_block + | instruction_statement + | directive_statement +} + +instruction_statement = { + pred_spec ~ instruction ~ ";" + | instruction ~ ";" + | identifier ~ ":" +} +pred_spec = { + "@!" ~ identifier + | "@" ~ identifier ~ ( + // note: equ must precede eq + ".lt" | ".equ" | ".eq" | ".le" | ".ne" | ".ge" + | ".gtu" | ".neu" | ".cf" | ".sf" | ".nsf" + )? +} +instruction = { + opcode_spec ~ operand_list + | opcode_spec +} +operand_list = _{ operand ~ "," ~ operand_list | operand } +operand = { + // starts with % or WARP_SZ + builtin_operand + // starts with \d 0x 0b 0\octets (int) 0f (float) 0d \d+.\d+ (double) + | literal_operand + // tex_operand precedes due to "[" ... ~ "," ~ "{" ... "}" "]" + // precedes memory_operand, which is just "[" x | x+1 | 1 "]" + | tex_operand + // memory_operand precedes tex_operand due to "[" x | x+1 | 1 "]" + | memory_operand + // vector precedes due to "{" ... "}" + | "-"? ~ vector_operand + // binary expression of identifiers always precede unary + | "-"? ~ identifier ~ ("+" | "|" | "\\") ~ integer ~ lohi_option? + | ("!" | "-")? ~ identifier +} +vector_operand = { + "{" ~ identifier ~ ("," ~ identifier)* ~ "}" +} + +tex_operand = { "[" ~ identifier ~ "," ~ vector_operand ~ "]" } + +builtin_operand = { special_register ~ dimension_modifier | special_register } + +memory_operand = { + "[" ~ address_expression ~ "]" + // todo: check if the order is brackets is fine + | identifier ~ "[" ~ (twin_operand | address_expression | literal_operand) ~ "]" + | "-" ~ memory_operand +} +twin_operand = { + // todo: are we missing something here? + identifier ~ "+=" ~ identifier ~ lohi_option? + | identifier ~ "+=" ~ integer + | identifier ~ "+" ~ identifier ~ lohi_option? +} + +address_expression = { + // precedence because of the plus sign + identifier ~ "+" ~ integer + | identifier ~ lohi_option? + | integer +} + +opcode_spec = { + opcode ~ option* +} +option_list = { option ~ option_list | option } + +function_decl = { + function_decl_header ~ "(" ~ param_entry ~ ")" ~ function_ident_param + | function_decl_header ~ function_ident_param + | function_decl_header +} + +function_ident_param = { + identifier ~ "(" ~ param_list? ~ ")" + | identifier +} + +param_list = _{ + param_entry ~ "," ~ param_list + | param_entry +} + +special_register = { + "%clock" + | "%halfclock" + | "%clock64" + | "%ctaid" + | "%envreg" ~ ASCII_DIGIT+ + | "%gridid" + | "%laneid" + | "%lanemask_eq" + | "%lanemask_le" + | "%lanemask_lt" + | "%lanemask_ge" + | "%lanemask_gt" + | "%nctaid" + | "%ntid" + | "%nsmid" + | "%nwarpid" + | "%pm" ~ ('0'..'3') + | "%smid" + | "%tid" + | "%warpid" + | "WARP_SZ" +} + +dimension_modifier = { ".0" | ".1" | ".2" | ".x" | ".y" | ".z" } + +function_decl_header = { + function_decl_header_entry + | function_decl_header_visible_entry + | function_decl_header_weak_entry + | function_decl_header_func + | function_decl_header_visible_func + | function_decl_header_weak_func + | function_decl_header_extern_func +} +function_decl_header_entry = { ".entry" } +function_decl_header_visible_entry = { ".visible" ~ ".entry" } +function_decl_header_weak_entry = { ".weak" ~ ".entry" } +function_decl_header_func = { ".func" } +function_decl_header_visible_func = { ".visible" ~ ".func" } +function_decl_header_weak_func = { ".weak" ~ ".func" } +function_decl_header_extern_func = { ".extern" ~ ".func" } + +param_entry = { + ".param" ~ variable_spec ~ ptr_spec? ~ identifier_spec + | ".reg" ~ variable_spec ~ identifier_spec +} +identifier_spec = { + identifier ~ "<" ~ integer ~ ">" + | identifier ~ "[" ~ "]" + | identifier ~ "[" ~ integer ~ "]" + | identifier +} + +address_size_directive = { ".address_size" ~ integer } +version_directive = { ".version" ~ double ~ "+"? } +target_directive = { ".target" ~ identifier ~ ("," ~ identifier){0,2} } +file_directive = { ".file" ~ integer ~ quoted ~ (("," ~ integer){2})? } +loc_directive = { ".loc" ~ integer ~ integer ~ integer } + +block_spec = { + ".maxntid" ~ integer ~ "," ~ integer ~ "," ~ integer + | ".minnctapersm" ~ integer + | ".maxnctapersm" ~ integer +} +block_spec_list = { block_spec ~ block_spec_list | block_spec } + +ptr_spec = { + ".ptr" ~ ptr_space_spec ~ ptr_align_spec + | ".ptr" ~ ptr_align_spec +} +ptr_space_spec = { ".global" | ".local" | ".shared" | ".const" } +ptr_align_spec = { ".align" ~ integer } +align_spec = { ".align" ~ integer } + +var_spec = { + space_spec | type_spec | align_spec + | ".visible" | ".extern" | ".weak" +} +variable_spec = { var_spec ~ variable_spec | var_spec } + +option = { + type_spec + | compare_spec + | addressable_spec + | rounding_mode + | wmma_spec + | prmt_spec + | atomic_operation_spec + | ".sync" + | ".arrive" + | ".red" + | ".uni" + | ".wide" + | ".any" + | ".all" + | ".ballot" + | ".global" + | ".cta" + | ".sys" + | ".1d" + | ".2d" + | ".3d" + | ".sat" + | ".ftz" + | ".neg" + | ".approx" + | ".full" + | ".exit" + | ".abs" + | ".to" + | ".half" + | ".extp" + | ".ca" + | ".cg" + | ".cs" + | ".lu" + | ".cv" + | ".wb" + | ".wt" + | ".nc" + | ".up" + | ".down" + | ".bfly" + | ".idx" +} + +atomic_operation_spec = { + ".and" + | ".popc" + | ".or" + | ".xor" + | ".cas" + | ".exch" + | ".add" + | ".inc" + | ".dec" + | ".min" + | ".max" +} +rounding_mode = { floating_point_rounding_mode | integer_rounding_mode } +floating_point_rounding_mode = { ".rn" | ".rz" | ".rm" | ".rp" } +integer_rounding_mode = { ".rni" | ".rzi" | ".rmi" | ".rpi" } + +compare_spec = { + ".eq" | ".ne" | ".lt" | ".le" | ".gt" | ".ge" | ".lo" | ".ls" + | ".hi" | ".hs" | ".equ" | ".neu" | ".ltu" | ".leu" | ".gtu" + | ".geu" | ".num" | ".nan" +} +prmt_spec = { ".f4e" | ".b4e" | ".rc8" | ".rc16" | ".ecl" | ".ecr" } +wmma_spec = { + wmma_directive ~ layout ~ configuration + | wmma_directive ~ layout ~ layout ~ configuration +} +wmma_directive = { ".a.sync" | ".b.sync" | ".c.sync" | ".d.sync" | ".mma.sync" } +layout = { ".row" | ".col" } +configuration = { ".m16n16k16" | ".m32n8k16" | ".m8n32k16" } + +prototype_block = { prototype_decl ~ prototype_call } +prototype_decl = { + identifier ~ ":" + ~ ".callprototype" ~ "(" ~ prototype_param? ~ ")" + ~ identifier ~ "(" ~ prototype_param? ~ ")" ~ ";" +} +prototype_call = { + opcode ~ "(" ~ identifier ~ ")" ~ "," + ~ operand ~ "," ~ "(" ~ identifier ~ ")" ~ "," ~ identifier ~ ";" + | + opcode ~ identifier ~ "," + ~ "(" ~ identifier ~ ")" ~ "," ~ identifier ~ ";" +} +prototype_param = { ".param" ~ (".b32" | ".b64") ~ identifier } +opcode = { + // note: "addp" == "addc" > "add" + // note: "andn" > "and" + "abs" | "addp" | "addc" | "add" | "andn" | "and" | "atom" | "activemask" + // note: "bar.warp" > "bar" + // note: "bfind" > "bfi" + // note: "breakaddr" > "break" + | "bar.warp" | "bar" | "bfe" | "bfind" | "bfi" | "bra" | "brx" | "brev" | "brkpt" | "breakaddr" | "break" + // note: "callp" > "call" + // note: "cvta" > "cvt" + | "callp" | "call" | "clz" | "cnot" | "cos" | "cvta" | "cvt" + | "div" | "dp4a" + | "ex2" | "exit" + | "fma" + | "isspacep" + // note: "ld.volatile" > "ldu" > "ld" + | "ld.volatile" | "ldu" | "ld" | "lg2" + // note: "mad24" > "madc" == "madp" > "mad" + // note: "mul24" > "mul" + | "mad24" | "madc" | "madp" | "mad" | "max" | "membar" | "min" | "mov" | "mul24" | "mul" + | "neg" | "nandn" | "norn" | "not" | "nop" + // note: "orn" > "or" + | "orn" | "or" + // note: "prefetchu" > "prefetch" + | "pmevent" | "popc" | "prefetchu" | "prefetch" | "prmt" + // note: "retp" > "ret" + | "rcp" | "red" | "rem" | "retp" | "ret" | "rsqrt" + // note: "setp" > "set" + // note: "st.volatile" > "st" + // note: "subc" > "sub" + | "sad" | "selp" | "setp" | "set" | "shfl" | "shl" | "shr" | "sin" | "slct" | "sqrt" | "sst" | "ssy" | "st.volatile" | "st" | "subc" | "sub" | "suld" | "sured" | "surst" | "suq" + | "tex" | "txq" | "trap" + // note: "setp" > "set" + | "vabsdiff" | "vadd" | "vmad" | "vmax" | "vmin" | "vset" | "vshl" | "vshr" | "vsub" | "vote" + // note: "wmma.load" == "wmma.store" > "wmma" + | "wmma.load" | "wmma.store" | "wmma" + | "xor" +} + +space_spec = { ".reg" | ".sreg" | addressable_spec } +addressable_spec = { + ".const" | ".global" | ".local" + | ".local" | ".param" | ".shared" + | ".sstarr" | ".surf" | ".tex" +} +type_spec = { vector_spec ~ scalar_type | scalar_type } +vector_spec = { ".v2" | ".v3" | ".v4" } +scalar_type = { + ".s8" | ".s16" | ".s32" | ".s64" + | ".u8" | ".u16" | ".u32" | ".u64" + | ".f16" | ".f32" | ".f64" | ".ff64" + | ".b8" | ".b16" | ".b32" | ".b64" | ".bb64" | ".bb128" + | ".pred" | ".texref" | ".sampleref" | ".surfref" +} + +lohi_option = { ".lo" | ".hi" } +literal_operand = { + // float always precedes because it starts with 0[fF] + float + // double always precedes integers because it either + // - contains a decimal point (.) + // - starts with 0[dD] + | double + | integer +} + +quoted = _{ "\"" ~ string ~ "\"" } +string = ${ (!"\"" ~ ANY)* } + +// followsym: [a-zA-Z0-9_$] +// identifier: [a-zA-Z]{followsym}* | {[_$%]{followsym}+ +followsym = @{ "$" | "_" | ASCII_ALPHANUMERIC } +identifier = @{ ("_" | "$" | "%") ~ followsym+ | ASCII_ALPHA ~ followsym* } + +// 0[xX][0-9a-fA-F]+U? +// 0[0-7]+U? +// 0[bB][01]+U? +// [-]?[0-9]+U? +integer = @{ + hex + | octal + | binary + | ("-"? ~ ASCII_DIGIT+ ~ "U"?) +} +hex = @{ "0" ~ ("x" | "X" ) ~ ASCII_HEX_DIGIT+ ~ "U"? } +binary = @{ "0" ~ ("b" | "B") ~ ASCII_BIN_DIGIT+ ~ "U"? } +octal = @{ "0" ~ ASCII_OCT_DIGIT+ ~ "U"? } + +// [0-9]+\.[0-9]+ +// 0[dD][0-9a-fA-F]{16} +double = @{ + ASCII_DIGIT+ ~ "." ~ ASCII_DIGIT+ + | "0" ~ ("d" | "D") ~ ASCII_HEX_DIGIT{16} +} + +// 0[fF][0-9a-fA-F]{8} +float = @{ + "0" ~ ("f" | "F") ~ ASCII_HEX_DIGIT{8} +} + +COMMENT = _{ + "/*" ~ (!"*/" ~ ANY)* ~ "*/" // multiline + | "//" ~ (!"\n" ~ ANY)* // single line +} +WHITESPACE = _{ " " | "\t" | NEWLINE } // skip whitespace diff --git a/ptx/src/ptx.rs b/ptx/src/ptx.rs new file mode 100644 index 00000000..5a6ab1be --- /dev/null +++ b/ptx/src/ptx.rs @@ -0,0 +1,3 @@ +#[derive(Parser)] +#[grammar = "./ptx.pest"] +pub struct Parser; diff --git a/sass/Cargo.toml b/sass/Cargo.toml new file mode 100644 index 00000000..5f9cf326 --- /dev/null +++ b/sass/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "sass" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] diff --git a/sass/README.md b/sass/README.md new file mode 100644 index 00000000..6f507e7e --- /dev/null +++ b/sass/README.md @@ -0,0 +1,3 @@ +## gpucachesim SASS + +In the future, we aim to support functional simulation for SASS. diff --git a/sass/src/lib.rs b/sass/src/lib.rs new file mode 100644 index 00000000..7d12d9af --- /dev/null +++ b/sass/src/lib.rs @@ -0,0 +1,14 @@ +pub fn add(left: usize, right: usize) -> usize { + left + right +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn it_works() { + let result = add(2, 2); + assert_eq!(result, 4); + } +}