450 lines
7.7 KiB
Rust
450 lines
7.7 KiB
Rust
// Charter Lexer - 词法分析器
|
||
// 将源代码转换为Token流
|
||
|
||
use logos::Logos;
|
||
use serde::{Deserialize, Serialize};
|
||
|
||
#[derive(Logos, Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||
#[logos(skip r"[ \t\r\n]+")]
|
||
#[logos(skip r"///[^\n]*")]
|
||
#[logos(skip r"//[^\n]*")]
|
||
#[logos(skip r"/\*([^*]|\*[^/])*\*/")]
|
||
pub enum Token {
|
||
// 关键字
|
||
#[token("asset")]
|
||
#[token("资产")]
|
||
Asset,
|
||
|
||
#[token("contract")]
|
||
#[token("合约")]
|
||
Contract,
|
||
|
||
#[token("fn")]
|
||
#[token("函数")]
|
||
Fn,
|
||
|
||
#[token("let")]
|
||
#[token("让")]
|
||
Let,
|
||
|
||
#[token("mut")]
|
||
#[token("可变")]
|
||
Mut,
|
||
|
||
#[token("const")]
|
||
#[token("常量")]
|
||
Const,
|
||
|
||
#[token("if")]
|
||
#[token("如果")]
|
||
If,
|
||
|
||
#[token("else")]
|
||
#[token("否则")]
|
||
Else,
|
||
|
||
#[token("for")]
|
||
#[token("对于")]
|
||
For,
|
||
|
||
#[token("in")]
|
||
#[token("在")]
|
||
In,
|
||
|
||
#[token("while")]
|
||
#[token("循环")]
|
||
While,
|
||
|
||
#[token("return")]
|
||
#[token("返回")]
|
||
Return,
|
||
|
||
#[token("emit")]
|
||
#[token("触发")]
|
||
Emit,
|
||
|
||
#[token("module")]
|
||
#[token("模块")]
|
||
Module,
|
||
|
||
#[token("import")]
|
||
#[token("使用")]
|
||
Import,
|
||
|
||
#[token("as")]
|
||
As,
|
||
|
||
// NAC特有关键字
|
||
#[token("gnacs")]
|
||
Gnacs,
|
||
|
||
#[token("sovereignty")]
|
||
Sovereignty,
|
||
|
||
#[token("require")]
|
||
#[token("要求")]
|
||
Require,
|
||
|
||
#[token("requires")]
|
||
Requires,
|
||
|
||
#[token("ensures")]
|
||
Ensures,
|
||
|
||
#[token("require_cr")]
|
||
RequireCR,
|
||
|
||
#[token("verify_cr")]
|
||
VerifyCR,
|
||
|
||
// 修饰符
|
||
#[token("pub")]
|
||
Pub,
|
||
|
||
#[token("public")]
|
||
#[token("公开")]
|
||
Public,
|
||
|
||
#[token("private")]
|
||
#[token("私有")]
|
||
Private,
|
||
|
||
#[token("internal")]
|
||
Internal,
|
||
|
||
#[token("payable")]
|
||
Payable,
|
||
|
||
#[token("view")]
|
||
View,
|
||
|
||
#[token("pure")]
|
||
Pure,
|
||
|
||
// 基础类型
|
||
#[token("uint8")]
|
||
#[token("u8")]
|
||
Uint8,
|
||
|
||
#[token("uint16")]
|
||
#[token("u16")]
|
||
Uint16,
|
||
|
||
#[token("uint32")]
|
||
#[token("u32")]
|
||
Uint32,
|
||
|
||
#[token("uint64")]
|
||
#[token("u64")]
|
||
Uint64,
|
||
|
||
#[token("uint128")]
|
||
#[token("u128")]
|
||
Uint128,
|
||
|
||
#[token("uint256")]
|
||
#[token("u256")]
|
||
Uint256,
|
||
|
||
#[token("int8")]
|
||
#[token("i8")]
|
||
Int8,
|
||
|
||
#[token("int16")]
|
||
#[token("i16")]
|
||
Int16,
|
||
|
||
#[token("int32")]
|
||
#[token("i32")]
|
||
Int32,
|
||
|
||
#[token("int64")]
|
||
#[token("i64")]
|
||
Int64,
|
||
|
||
#[token("int128")]
|
||
#[token("i128")]
|
||
Int128,
|
||
|
||
#[token("int256")]
|
||
#[token("i256")]
|
||
Int256,
|
||
|
||
#[token("bool")]
|
||
Bool,
|
||
|
||
#[token("string")]
|
||
String,
|
||
|
||
#[token("bytes")]
|
||
Bytes,
|
||
|
||
#[token("address")]
|
||
Address,
|
||
|
||
#[token("hash")]
|
||
Hash,
|
||
|
||
#[token("timestamp")]
|
||
Timestamp,
|
||
|
||
// NAC类型
|
||
#[token("DID")]
|
||
DID,
|
||
|
||
#[token("GNACSCode")]
|
||
GNACSCode,
|
||
|
||
#[token("ConstitutionalReceipt")]
|
||
ConstitutionalReceipt,
|
||
|
||
#[token("AssetInstance")]
|
||
AssetInstance,
|
||
|
||
#[token("ACC20")]
|
||
ACC20,
|
||
|
||
#[token("ACC721")]
|
||
ACC721,
|
||
|
||
#[token("ACC1155")]
|
||
ACC1155,
|
||
|
||
#[token("ACCRWA")]
|
||
ACCRWA,
|
||
// XTZH 汇率系统类型(Issue #61)
|
||
#[token("XTZHRate")]
|
||
XTZHRate,
|
||
|
||
#[token("xtzh")]
|
||
#[token("XTZH")]
|
||
Xtzh,
|
||
|
||
#[token("@builtin")]
|
||
BuiltinAttr,
|
||
|
||
#[token("@system")]
|
||
SystemAttr,
|
||
|
||
#[token("@view")]
|
||
ViewAttr,
|
||
|
||
#[token("@payable")]
|
||
PayableAttr,
|
||
|
||
#[token("sdr")]
|
||
#[token("SDR")]
|
||
Sdr,
|
||
|
||
#[token("gold_coverage")]
|
||
GoldCoverage,
|
||
|
||
#[token("emergency_freeze")]
|
||
EmergencyFreeze,
|
||
|
||
// 主权类型
|
||
#[token("A0")]
|
||
A0,
|
||
|
||
#[token("C0")]
|
||
C0,
|
||
|
||
#[token("C1")]
|
||
C1,
|
||
|
||
#[token("C2")]
|
||
C2,
|
||
|
||
#[token("D0")]
|
||
D0,
|
||
|
||
#[token("D1")]
|
||
D1,
|
||
|
||
#[token("D2")]
|
||
D2,
|
||
|
||
// 布尔字面量
|
||
#[token("true")]
|
||
#[token("真")]
|
||
True,
|
||
|
||
#[token("false")]
|
||
#[token("假")]
|
||
False,
|
||
|
||
// 标识符(支持中文)
|
||
#[regex(r"[a-zA-Z_\u4e00-\u9fa5][a-zA-Z0-9_\u4e00-\u9fa5]*", |lex| lex.slice().to_string())]
|
||
Identifier(String),
|
||
|
||
// 整数字面量
|
||
#[regex(r"[0-9]+", |lex| lex.slice().parse().ok())]
|
||
Integer(u64),
|
||
|
||
// 十六进制数
|
||
#[regex(r"0x[0-9a-fA-F]+", |lex| lex.slice().to_string())]
|
||
HexNumber(String),
|
||
|
||
// 字符串字面量
|
||
#[regex(r#""([^"\\]|\\.)*""#, |lex| {
|
||
let s = lex.slice();
|
||
s[1..s.len()-1].to_string()
|
||
})]
|
||
StringLiteral(String),
|
||
|
||
// DID字面量
|
||
#[regex(r"did:nac:[a-zA-Z0-9_]+:[a-zA-Z0-9_]+:[a-zA-Z0-9_]+", |lex| lex.slice().to_string())]
|
||
DIDLiteral(String),
|
||
|
||
// 运算符
|
||
#[token("+")]
|
||
Plus,
|
||
|
||
#[token("-")]
|
||
Minus,
|
||
|
||
#[token("*")]
|
||
Star,
|
||
|
||
#[token("/")]
|
||
Slash,
|
||
|
||
#[token("%")]
|
||
Percent,
|
||
|
||
#[token("=")]
|
||
Assign,
|
||
|
||
#[token("==")]
|
||
Equal,
|
||
|
||
#[token("!=")]
|
||
NotEqual,
|
||
|
||
#[token("<")]
|
||
Less,
|
||
|
||
#[token(">")]
|
||
Greater,
|
||
|
||
#[token("<=")]
|
||
LessEqual,
|
||
|
||
#[token(">=")]
|
||
GreaterEqual,
|
||
|
||
#[token("&&")]
|
||
And,
|
||
|
||
#[token("||")]
|
||
Or,
|
||
|
||
#[token("!")]
|
||
Not,
|
||
|
||
#[token("&")]
|
||
Ampersand,
|
||
|
||
// 分隔符
|
||
#[token("(")]
|
||
LeftParen,
|
||
|
||
#[token(")")]
|
||
RightParen,
|
||
|
||
#[token("{")]
|
||
LeftBrace,
|
||
|
||
#[token("}")]
|
||
RightBrace,
|
||
|
||
#[token("[")]
|
||
LeftBracket,
|
||
|
||
#[token("]")]
|
||
RightBracket,
|
||
|
||
#[token(",")]
|
||
Comma,
|
||
|
||
#[token(";")]
|
||
Semicolon,
|
||
|
||
#[token(":")]
|
||
Colon,
|
||
|
||
#[token(".")]
|
||
Dot,
|
||
|
||
#[token("->")]
|
||
Arrow,
|
||
}
|
||
|
||
pub fn tokenize(source: &str) -> anyhow::Result<Vec<Token>> {
|
||
let mut tokens = Vec::new();
|
||
let mut lex = Token::lexer(source);
|
||
|
||
while let Some(token) = lex.next() {
|
||
match token {
|
||
Ok(t) => tokens.push(t),
|
||
Err(_) => {
|
||
return Err(anyhow::anyhow!(
|
||
"词法错误: 无法识别的token '{}'",
|
||
lex.slice()
|
||
));
|
||
}
|
||
}
|
||
}
|
||
|
||
Ok(tokens)
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
|
||
#[test]
|
||
fn test_tokenize_keywords() {
|
||
let source = "asset contract fn let if else";
|
||
let tokens = tokenize(source).expect("FIX-006: unexpected None/Err");
|
||
|
||
assert_eq!(tokens.len(), 6);
|
||
assert_eq!(tokens[0], Token::Asset);
|
||
assert_eq!(tokens[1], Token::Contract);
|
||
assert_eq!(tokens[2], Token::Fn);
|
||
}
|
||
|
||
#[test]
|
||
fn test_tokenize_nac_types() {
|
||
let source = "DID GNACSCode ConstitutionalReceipt";
|
||
let tokens = tokenize(source).expect("FIX-006: unexpected None/Err");
|
||
|
||
assert_eq!(tokens.len(), 3);
|
||
assert_eq!(tokens[0], Token::DID);
|
||
assert_eq!(tokens[1], Token::GNACSCode);
|
||
assert_eq!(tokens[2], Token::ConstitutionalReceipt);
|
||
}
|
||
|
||
#[test]
|
||
fn test_tokenize_literals() {
|
||
let source = r#"123 0x1234 "hello" true false"#;
|
||
let tokens = tokenize(source).expect("FIX-006: unexpected None/Err");
|
||
|
||
assert_eq!(tokens.len(), 5);
|
||
assert!(matches!(tokens[0], Token::Integer(123)));
|
||
assert!(matches!(tokens[1], Token::HexNumber(_)));
|
||
assert!(matches!(tokens[2], Token::StringLiteral(_)));
|
||
assert_eq!(tokens[3], Token::True);
|
||
assert_eq!(tokens[4], Token::False);
|
||
}
|
||
|
||
#[test]
|
||
fn test_tokenize_did() {
|
||
let source = "did:nac:main:user:0x1234";
|
||
let tokens = tokenize(source).expect("FIX-006: unexpected None/Err");
|
||
|
||
assert_eq!(tokens.len(), 1);
|
||
assert!(matches!(tokens[0], Token::DIDLiteral(_)));
|
||
}
|
||
}
|