NAC_Blockchain/charter-compiler/src/lexer/mod.rs

450 lines
7.7 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Charter Lexer - 词法分析器
// 将源代码转换为Token流
use logos::Logos;
use serde::{Deserialize, Serialize};
#[derive(Logos, Debug, Clone, PartialEq, Serialize, Deserialize)]
#[logos(skip r"[ \t\r\n]+")]
#[logos(skip r"///[^\n]*")]
#[logos(skip r"//[^\n]*")]
#[logos(skip r"/\*([^*]|\*[^/])*\*/")]
pub enum Token {
// 关键字
#[token("asset")]
#[token("资产")]
Asset,
#[token("contract")]
#[token("合约")]
Contract,
#[token("fn")]
#[token("函数")]
Fn,
#[token("let")]
#[token("")]
Let,
#[token("mut")]
#[token("可变")]
Mut,
#[token("const")]
#[token("常量")]
Const,
#[token("if")]
#[token("如果")]
If,
#[token("else")]
#[token("否则")]
Else,
#[token("for")]
#[token("对于")]
For,
#[token("in")]
#[token("")]
In,
#[token("while")]
#[token("循环")]
While,
#[token("return")]
#[token("返回")]
Return,
#[token("emit")]
#[token("触发")]
Emit,
#[token("module")]
#[token("模块")]
Module,
#[token("import")]
#[token("使用")]
Import,
#[token("as")]
As,
// NAC特有关键字
#[token("gnacs")]
Gnacs,
#[token("sovereignty")]
Sovereignty,
#[token("require")]
#[token("要求")]
Require,
#[token("requires")]
Requires,
#[token("ensures")]
Ensures,
#[token("require_cr")]
RequireCR,
#[token("verify_cr")]
VerifyCR,
// 修饰符
#[token("pub")]
Pub,
#[token("public")]
#[token("公开")]
Public,
#[token("private")]
#[token("私有")]
Private,
#[token("internal")]
Internal,
#[token("payable")]
Payable,
#[token("view")]
View,
#[token("pure")]
Pure,
// 基础类型
#[token("uint8")]
#[token("u8")]
Uint8,
#[token("uint16")]
#[token("u16")]
Uint16,
#[token("uint32")]
#[token("u32")]
Uint32,
#[token("uint64")]
#[token("u64")]
Uint64,
#[token("uint128")]
#[token("u128")]
Uint128,
#[token("uint256")]
#[token("u256")]
Uint256,
#[token("int8")]
#[token("i8")]
Int8,
#[token("int16")]
#[token("i16")]
Int16,
#[token("int32")]
#[token("i32")]
Int32,
#[token("int64")]
#[token("i64")]
Int64,
#[token("int128")]
#[token("i128")]
Int128,
#[token("int256")]
#[token("i256")]
Int256,
#[token("bool")]
Bool,
#[token("string")]
String,
#[token("bytes")]
Bytes,
#[token("address")]
Address,
#[token("hash")]
Hash,
#[token("timestamp")]
Timestamp,
// NAC类型
#[token("DID")]
DID,
#[token("GNACSCode")]
GNACSCode,
#[token("ConstitutionalReceipt")]
ConstitutionalReceipt,
#[token("AssetInstance")]
AssetInstance,
#[token("ACC20")]
ACC20,
#[token("ACC721")]
ACC721,
#[token("ACC1155")]
ACC1155,
#[token("ACCRWA")]
ACCRWA,
// XTZH 汇率系统类型Issue #61
#[token("XTZHRate")]
XTZHRate,
#[token("xtzh")]
#[token("XTZH")]
Xtzh,
#[token("@builtin")]
BuiltinAttr,
#[token("@system")]
SystemAttr,
#[token("@view")]
ViewAttr,
#[token("@payable")]
PayableAttr,
#[token("sdr")]
#[token("SDR")]
Sdr,
#[token("gold_coverage")]
GoldCoverage,
#[token("emergency_freeze")]
EmergencyFreeze,
// 主权类型
#[token("A0")]
A0,
#[token("C0")]
C0,
#[token("C1")]
C1,
#[token("C2")]
C2,
#[token("D0")]
D0,
#[token("D1")]
D1,
#[token("D2")]
D2,
// 布尔字面量
#[token("true")]
#[token("")]
True,
#[token("false")]
#[token("")]
False,
// 标识符(支持中文)
#[regex(r"[a-zA-Z_\u4e00-\u9fa5][a-zA-Z0-9_\u4e00-\u9fa5]*", |lex| lex.slice().to_string())]
Identifier(String),
// 整数字面量
#[regex(r"[0-9]+", |lex| lex.slice().parse().ok())]
Integer(u64),
// 十六进制数
#[regex(r"0x[0-9a-fA-F]+", |lex| lex.slice().to_string())]
HexNumber(String),
// 字符串字面量
#[regex(r#""([^"\\]|\\.)*""#, |lex| {
let s = lex.slice();
s[1..s.len()-1].to_string()
})]
StringLiteral(String),
// DID字面量
#[regex(r"did:nac:[a-zA-Z0-9_]+:[a-zA-Z0-9_]+:[a-zA-Z0-9_]+", |lex| lex.slice().to_string())]
DIDLiteral(String),
// 运算符
#[token("+")]
Plus,
#[token("-")]
Minus,
#[token("*")]
Star,
#[token("/")]
Slash,
#[token("%")]
Percent,
#[token("=")]
Assign,
#[token("==")]
Equal,
#[token("!=")]
NotEqual,
#[token("<")]
Less,
#[token(">")]
Greater,
#[token("<=")]
LessEqual,
#[token(">=")]
GreaterEqual,
#[token("&&")]
And,
#[token("||")]
Or,
#[token("!")]
Not,
#[token("&")]
Ampersand,
// 分隔符
#[token("(")]
LeftParen,
#[token(")")]
RightParen,
#[token("{")]
LeftBrace,
#[token("}")]
RightBrace,
#[token("[")]
LeftBracket,
#[token("]")]
RightBracket,
#[token(",")]
Comma,
#[token(";")]
Semicolon,
#[token(":")]
Colon,
#[token(".")]
Dot,
#[token("->")]
Arrow,
}
pub fn tokenize(source: &str) -> anyhow::Result<Vec<Token>> {
let mut tokens = Vec::new();
let mut lex = Token::lexer(source);
while let Some(token) = lex.next() {
match token {
Ok(t) => tokens.push(t),
Err(_) => {
return Err(anyhow::anyhow!(
"词法错误: 无法识别的token '{}'",
lex.slice()
));
}
}
}
Ok(tokens)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_tokenize_keywords() {
let source = "asset contract fn let if else";
let tokens = tokenize(source).expect("FIX-006: unexpected None/Err");
assert_eq!(tokens.len(), 6);
assert_eq!(tokens[0], Token::Asset);
assert_eq!(tokens[1], Token::Contract);
assert_eq!(tokens[2], Token::Fn);
}
#[test]
fn test_tokenize_nac_types() {
let source = "DID GNACSCode ConstitutionalReceipt";
let tokens = tokenize(source).expect("FIX-006: unexpected None/Err");
assert_eq!(tokens.len(), 3);
assert_eq!(tokens[0], Token::DID);
assert_eq!(tokens[1], Token::GNACSCode);
assert_eq!(tokens[2], Token::ConstitutionalReceipt);
}
#[test]
fn test_tokenize_literals() {
let source = r#"123 0x1234 "hello" true false"#;
let tokens = tokenize(source).expect("FIX-006: unexpected None/Err");
assert_eq!(tokens.len(), 5);
assert!(matches!(tokens[0], Token::Integer(123)));
assert!(matches!(tokens[1], Token::HexNumber(_)));
assert!(matches!(tokens[2], Token::StringLiteral(_)));
assert_eq!(tokens[3], Token::True);
assert_eq!(tokens[4], Token::False);
}
#[test]
fn test_tokenize_did() {
let source = "did:nac:main:user:0x1234";
let tokens = tokenize(source).expect("FIX-006: unexpected None/Err");
assert_eq!(tokens.len(), 1);
assert!(matches!(tokens[0], Token::DIDLiteral(_)));
}
}