// Charter Lexer - 词法分析器 // 将源代码转换为Token流 use logos::Logos; use serde::{Deserialize, Serialize}; #[derive(Logos, Debug, Clone, PartialEq, Serialize, Deserialize)] #[logos(skip r"[ \t\r\n]+")] #[logos(skip r"///[^\n]*")] #[logos(skip r"//[^\n]*")] #[logos(skip r"/\*([^*]|\*[^/])*\*/")] pub enum Token { // 关键字 #[token("asset")] #[token("资产")] Asset, #[token("contract")] #[token("合约")] Contract, #[token("fn")] #[token("函数")] Fn, #[token("let")] #[token("让")] Let, #[token("mut")] #[token("可变")] Mut, #[token("const")] #[token("常量")] Const, #[token("if")] #[token("如果")] If, #[token("else")] #[token("否则")] Else, #[token("for")] #[token("对于")] For, #[token("in")] #[token("在")] In, #[token("while")] #[token("循环")] While, #[token("return")] #[token("返回")] Return, #[token("emit")] #[token("触发")] Emit, #[token("module")] #[token("模块")] Module, #[token("import")] #[token("使用")] Import, #[token("as")] As, // NAC特有关键字 #[token("gnacs")] Gnacs, #[token("sovereignty")] Sovereignty, #[token("require")] #[token("要求")] Require, #[token("requires")] Requires, #[token("ensures")] Ensures, #[token("require_cr")] RequireCR, #[token("verify_cr")] VerifyCR, // 修饰符 #[token("pub")] Pub, #[token("public")] #[token("公开")] Public, #[token("private")] #[token("私有")] Private, #[token("internal")] Internal, #[token("payable")] Payable, #[token("view")] View, #[token("pure")] Pure, // 基础类型 #[token("uint8")] #[token("u8")] Uint8, #[token("uint16")] #[token("u16")] Uint16, #[token("uint32")] #[token("u32")] Uint32, #[token("uint64")] #[token("u64")] Uint64, #[token("uint128")] #[token("u128")] Uint128, #[token("uint256")] #[token("u256")] Uint256, #[token("int8")] #[token("i8")] Int8, #[token("int16")] #[token("i16")] Int16, #[token("int32")] #[token("i32")] Int32, #[token("int64")] #[token("i64")] Int64, #[token("int128")] #[token("i128")] Int128, #[token("int256")] #[token("i256")] Int256, #[token("bool")] Bool, #[token("string")] String, #[token("bytes")] Bytes, #[token("address")] Address, #[token("hash")] Hash, #[token("timestamp")] Timestamp, // NAC类型 #[token("DID")] DID, #[token("GNACSCode")] GNACSCode, #[token("ConstitutionalReceipt")] ConstitutionalReceipt, #[token("AssetInstance")] AssetInstance, #[token("ACC20")] ACC20, #[token("ACC721")] ACC721, #[token("ACC1155")] ACC1155, #[token("ACCRWA")] ACCRWA, // XTZH 汇率系统类型(Issue #61) #[token("XTZHRate")] XTZHRate, #[token("xtzh")] #[token("XTZH")] Xtzh, #[token("@builtin")] BuiltinAttr, #[token("@system")] SystemAttr, #[token("@view")] ViewAttr, #[token("@payable")] PayableAttr, #[token("sdr")] #[token("SDR")] Sdr, #[token("gold_coverage")] GoldCoverage, #[token("emergency_freeze")] EmergencyFreeze, // 主权类型 #[token("A0")] A0, #[token("C0")] C0, #[token("C1")] C1, #[token("C2")] C2, #[token("D0")] D0, #[token("D1")] D1, #[token("D2")] D2, // 布尔字面量 #[token("true")] #[token("真")] True, #[token("false")] #[token("假")] False, // 标识符(支持中文) #[regex(r"[a-zA-Z_\u4e00-\u9fa5][a-zA-Z0-9_\u4e00-\u9fa5]*", |lex| lex.slice().to_string())] Identifier(String), // 整数字面量 #[regex(r"[0-9]+", |lex| lex.slice().parse().ok())] Integer(u64), // 十六进制数 #[regex(r"0x[0-9a-fA-F]+", |lex| lex.slice().to_string())] HexNumber(String), // 字符串字面量 #[regex(r#""([^"\\]|\\.)*""#, |lex| { let s = lex.slice(); s[1..s.len()-1].to_string() })] StringLiteral(String), // DID字面量 #[regex(r"did:nac:[a-zA-Z0-9_]+:[a-zA-Z0-9_]+:[a-zA-Z0-9_]+", |lex| lex.slice().to_string())] DIDLiteral(String), // 运算符 #[token("+")] Plus, #[token("-")] Minus, #[token("*")] Star, #[token("/")] Slash, #[token("%")] Percent, #[token("=")] Assign, #[token("==")] Equal, #[token("!=")] NotEqual, #[token("<")] Less, #[token(">")] Greater, #[token("<=")] LessEqual, #[token(">=")] GreaterEqual, #[token("&&")] And, #[token("||")] Or, #[token("!")] Not, #[token("&")] Ampersand, // 分隔符 #[token("(")] LeftParen, #[token(")")] RightParen, #[token("{")] LeftBrace, #[token("}")] RightBrace, #[token("[")] LeftBracket, #[token("]")] RightBracket, #[token(",")] Comma, #[token(";")] Semicolon, #[token(":")] Colon, #[token(".")] Dot, #[token("->")] Arrow, } pub fn tokenize(source: &str) -> anyhow::Result> { let mut tokens = Vec::new(); let mut lex = Token::lexer(source); while let Some(token) = lex.next() { match token { Ok(t) => tokens.push(t), Err(_) => { return Err(anyhow::anyhow!( "词法错误: 无法识别的token '{}'", lex.slice() )); } } } Ok(tokens) } #[cfg(test)] mod tests { use super::*; #[test] fn test_tokenize_keywords() { let source = "asset contract fn let if else"; let tokens = tokenize(source).expect("FIX-006: unexpected None/Err"); assert_eq!(tokens.len(), 6); assert_eq!(tokens[0], Token::Asset); assert_eq!(tokens[1], Token::Contract); assert_eq!(tokens[2], Token::Fn); } #[test] fn test_tokenize_nac_types() { let source = "DID GNACSCode ConstitutionalReceipt"; let tokens = tokenize(source).expect("FIX-006: unexpected None/Err"); assert_eq!(tokens.len(), 3); assert_eq!(tokens[0], Token::DID); assert_eq!(tokens[1], Token::GNACSCode); assert_eq!(tokens[2], Token::ConstitutionalReceipt); } #[test] fn test_tokenize_literals() { let source = r#"123 0x1234 "hello" true false"#; let tokens = tokenize(source).expect("FIX-006: unexpected None/Err"); assert_eq!(tokens.len(), 5); assert!(matches!(tokens[0], Token::Integer(123))); assert!(matches!(tokens[1], Token::HexNumber(_))); assert!(matches!(tokens[2], Token::StringLiteral(_))); assert_eq!(tokens[3], Token::True); assert_eq!(tokens[4], Token::False); } #[test] fn test_tokenize_did() { let source = "did:nac:main:user:0x1234"; let tokens = tokenize(source).expect("FIX-006: unexpected None/Err"); assert_eq!(tokens.len(), 1); assert!(matches!(tokens[0], Token::DIDLiteral(_))); } }