NAC_Blockchain/charter-compiler/src/semantic/mod.rs

649 lines
24 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Charter Semantic Analyzer - 语义分析器
// 基于NAC UDM类型系统进行类型检查和语义验证
use crate::parser::ast::*;
use nac_udm::prelude::*;
use nac_udm::primitives::SovereigntyType;
use std::collections::HashMap;
use thiserror::Error;
#[derive(Error, Debug)]
#[allow(dead_code)]
pub enum SemanticError {
#[error("类型不匹配: 期望 {expected}, 实际 {actual}")]
TypeMismatch { expected: String, actual: String },
#[error("未定义的变量: {0}")]
UndefinedVariable(String),
#[error("未定义的函数: {0}")]
UndefinedFunction(String),
#[error("无效的GNACS编码: {0}")]
InvalidGNACSCode(String),
#[error("缺少GNACS声明")]
MissingGNACSDeclaration,
#[error("无效的主权类型")]
InvalidSovereigntyType,
#[error("宪法收据验证失败")]
ConstitutionalReceiptValidationFailed,
#[error("重复定义: {0}")]
DuplicateDefinition(String),
}
#[allow(dead_code)]
pub struct SemanticAnalyzer {
// 符号表
variables: HashMap<String, TypeAnnotation>,
functions: HashMap<String, FunctionSignature>,
assets: HashMap<String, AssetDefinition>,
contracts: HashMap<String, ContractDefinition>,
// 当前作用域
current_scope: Vec<HashMap<String, TypeAnnotation>>,
}
#[derive(Debug, Clone)]
#[allow(dead_code)]
struct FunctionSignature {
name: String,
parameters: Vec<TypeAnnotation>,
return_type: Option<TypeAnnotation>,
}
impl SemanticAnalyzer {
pub fn new() -> Self {
Self {
variables: HashMap::new(),
functions: HashMap::new(),
assets: HashMap::new(),
contracts: HashMap::new(),
current_scope: vec![HashMap::new()],
}
}
pub fn analyze(&mut self, program: &Program) -> Result<(), SemanticError> {
// 第一遍:收集所有顶层定义
for item in &program.items {
match item {
TopLevelItem::Asset(asset) => {
self.collect_asset(asset)?;
}
TopLevelItem::Contract(contract) => {
self.collect_contract(contract)?;
}
TopLevelItem::Function(function) => {
self.collect_function(function)?;
}
_ => {}
}
}
// 第二遍:验证所有定义
for item in &program.items {
match item {
TopLevelItem::Asset(asset) => {
self.validate_asset(asset)?;
}
TopLevelItem::Contract(contract) => {
self.validate_contract(contract)?;
}
TopLevelItem::Function(function) => {
self.validate_function(function)?;
}
_ => {}
}
}
Ok(())
}
fn collect_asset(&mut self, asset: &AssetDefinition) -> Result<(), SemanticError> {
if self.assets.contains_key(&asset.name) {
return Err(SemanticError::DuplicateDefinition(asset.name.clone()));
}
self.assets.insert(asset.name.clone(), asset.clone());
Ok(())
}
fn collect_contract(&mut self, contract: &ContractDefinition) -> Result<(), SemanticError> {
if self.contracts.contains_key(&contract.name) {
return Err(SemanticError::DuplicateDefinition(contract.name.clone()));
}
self.contracts.insert(contract.name.clone(), contract.clone());
Ok(())
}
fn collect_function(&mut self, function: &FunctionDeclaration) -> Result<(), SemanticError> {
if self.functions.contains_key(&function.name) {
return Err(SemanticError::DuplicateDefinition(function.name.clone()));
}
// 构造函数签名
let signature = FunctionSignature {
name: function.name.clone(),
parameters: function.parameters.iter().map(|p| p.type_annotation.clone()).collect(),
return_type: function.return_type.clone(),
};
self.functions.insert(function.name.clone(), signature);
Ok(())
}
fn validate_asset(&mut self, asset: &AssetDefinition) -> Result<(), SemanticError> {
// 验证GNACS编码
self.validate_gnacs_code(&asset.gnacs_code)?;
// 验证主权声明
if let Some(sovereignty) = &asset.sovereignty {
self.validate_sovereignty(sovereignty)?;
}
// 验证字段
for field in &asset.fields {
self.validate_field(field)?;
}
// 验证方法
for method in &asset.methods {
self.validate_method(method)?;
}
Ok(())
}
fn validate_contract(&mut self, contract: &ContractDefinition) -> Result<(), SemanticError> {
// 验证字段
for field in &contract.fields {
self.validate_field(field)?;
}
// 验证方法
for method in &contract.methods {
self.validate_method(method)?;
}
Ok(())
}
fn validate_function(&mut self, function: &FunctionDeclaration) -> Result<(), SemanticError> {
// 创建新的作用域
self.push_scope();
// 添加参数到作用域
for param in &function.parameters {
self.add_variable(&param.name, param.type_annotation.clone())?;
}
// 验证函数体
self.validate_block(&function.body)?;
// 退出作用域
self.pop_scope();
Ok(())
}
fn validate_field(&self, field: &FieldDeclaration) -> Result<(), SemanticError> {
// 验证字段类型是否有效
self.validate_type(&field.type_annotation)?;
Ok(())
}
/// 验证类型注解是否有效
fn validate_type(&self, type_ann: &TypeAnnotation) -> Result<(), SemanticError> {
match type_ann {
// 基础类型都是有效的
TypeAnnotation::Uint8 | TypeAnnotation::Uint16 | TypeAnnotation::Uint32 |
TypeAnnotation::Uint64 | TypeAnnotation::Uint128 | TypeAnnotation::Uint256 |
TypeAnnotation::Int8 | TypeAnnotation::Int16 | TypeAnnotation::Int32 |
TypeAnnotation::Int64 | TypeAnnotation::Int128 | TypeAnnotation::Int256 |
TypeAnnotation::Bool | TypeAnnotation::Address | TypeAnnotation::String |
TypeAnnotation::Bytes | TypeAnnotation::Hash | TypeAnnotation::Timestamp |
TypeAnnotation::DID | TypeAnnotation::GNACSCode |
TypeAnnotation::ConstitutionalReceipt | TypeAnnotation::AssetInstance |
TypeAnnotation::ACC20 | TypeAnnotation::ACC721 | TypeAnnotation::ACC1155 |
TypeAnnotation::ACCRWA => Ok(()),
// 数组类型需要验证元素类型
TypeAnnotation::Array(element_type, _) => self.validate_type(element_type),
// Vec类型需要验证元素类型
TypeAnnotation::Vec(element_type) => self.validate_type(element_type),
// 引用类型需要验证内部类型
TypeAnnotation::Reference(inner_type) => self.validate_type(inner_type),
}
}
fn validate_method(&mut self, method: &MethodDeclaration) -> Result<(), SemanticError> {
// 创建新的作用域
self.push_scope();
// 添加参数到作用域
for param in &method.parameters {
self.add_variable(&param.name, param.type_annotation.clone())?;
}
// 验证requires子句
for expr in &method.requires {
self.validate_expression(expr)?;
}
// 验证ensures子句
for expr in &method.ensures {
self.validate_expression(expr)?;
}
// 验证方法体
self.validate_block(&method.body)?;
// 弹出作用域
self.pop_scope();
Ok(())
}
fn validate_block(&mut self, block: &Block) -> Result<(), SemanticError> {
for statement in &block.statements {
self.validate_statement(statement)?;
}
Ok(())
}
fn validate_statement(&mut self, statement: &Statement) -> Result<(), SemanticError> {
match statement {
Statement::Let(let_stmt) => {
let expr_type = self.infer_expression_type(&let_stmt.value)?;
if let Some(type_annotation) = &let_stmt.type_annotation {
if !self.types_compatible(type_annotation, &expr_type) {
return Err(SemanticError::TypeMismatch {
expected: format!("{:?}", type_annotation),
actual: format!("{:?}", expr_type),
});
}
}
self.add_variable(&let_stmt.name, expr_type)?;
Ok(())
}
Statement::Assign(assign_stmt) => {
let var_type = self.get_variable_type(&assign_stmt.target)?;
let expr_type = self.infer_expression_type(&assign_stmt.value)?;
if !self.types_compatible(&var_type, &expr_type) {
return Err(SemanticError::TypeMismatch {
expected: format!("{:?}", var_type),
actual: format!("{:?}", expr_type),
});
}
Ok(())
}
Statement::If(if_stmt) => {
let cond_type = self.infer_expression_type(&if_stmt.condition)?;
if !matches!(cond_type, TypeAnnotation::Bool) {
return Err(SemanticError::TypeMismatch {
expected: "bool".to_string(),
actual: format!("{:?}", cond_type),
});
}
self.validate_block(&if_stmt.then_block)?;
if let Some(else_block) = &if_stmt.else_block {
self.validate_block(else_block)?;
}
Ok(())
}
Statement::For(for_stmt) => {
self.push_scope();
// 推断迭代器的元素类型
let iterator_type = self.infer_expression_type(&for_stmt.iterable)?;
let element_type = match iterator_type {
TypeAnnotation::Array(element_type, _) => *element_type,
TypeAnnotation::String => TypeAnnotation::Uint8, // 字符串迭代返回字节
TypeAnnotation::Bytes => TypeAnnotation::Uint8, // 字节数组迭代返回字节
_ => {
// 如果不是可迭代类型默认为uint256用于range迭代
TypeAnnotation::Uint256
}
};
self.add_variable(&for_stmt.variable, element_type)?;
self.validate_block(&for_stmt.body)?;
self.pop_scope();
Ok(())
}
Statement::While(while_stmt) => {
let cond_type = self.infer_expression_type(&while_stmt.condition)?;
if !matches!(cond_type, TypeAnnotation::Bool) {
return Err(SemanticError::TypeMismatch {
expected: "bool".to_string(),
actual: format!("{:?}", cond_type),
});
}
self.validate_block(&while_stmt.body)?;
Ok(())
}
Statement::Return(return_stmt) => {
if let Some(value) = &return_stmt.value {
self.validate_expression(value)?;
}
Ok(())
}
Statement::Emit(emit_stmt) => {
// 验证事件名称是否存在
// 实现事件表查找
// 实际应该:
// 1. 从当前合约的事件表中查找事件名称
// 2. 验证参数数量和类型是否匹配
// 3. 如果不匹配,返回错误
// if !self.event_table.contains_key(&emit_stmt.event_name) {
// return Err(SemanticError::EventNotFound(emit_stmt.event_name.clone()));
// }
// 当前简化处理,只验证参数表达式
for arg in &emit_stmt.arguments {
self.validate_expression(arg)?;
}
Ok(())
}
Statement::RequireCR(expr) | Statement::VerifyCR(expr) => {
let expr_type = self.infer_expression_type(expr)?;
if !matches!(expr_type, TypeAnnotation::ConstitutionalReceipt) {
return Err(SemanticError::ConstitutionalReceiptValidationFailed);
}
Ok(())
}
Statement::Expression(expr) => {
self.validate_expression(expr)?;
Ok(())
}
}
}
fn validate_expression(&self, expr: &Expression) -> Result<(), SemanticError> {
self.infer_expression_type(expr)?;
Ok(())
}
fn infer_expression_type(&self, expr: &Expression) -> Result<TypeAnnotation, SemanticError> {
match expr {
Expression::Integer(_) => Ok(TypeAnnotation::Uint256),
Expression::HexNumber(_) => Ok(TypeAnnotation::Uint256),
Expression::String(_) => Ok(TypeAnnotation::String),
Expression::Boolean(_) => Ok(TypeAnnotation::Bool),
Expression::GNACSCode(_) => Ok(TypeAnnotation::GNACSCode),
Expression::DID(_) => Ok(TypeAnnotation::DID),
Expression::Identifier(name) => self.get_variable_type(name),
Expression::Binary(op, left, right) => {
let left_type = self.infer_expression_type(left)?;
let right_type = self.infer_expression_type(right)?;
match op {
BinaryOp::Add | BinaryOp::Sub | BinaryOp::Mul | BinaryOp::Div | BinaryOp::Mod => {
if self.is_numeric_type(&left_type) && self.is_numeric_type(&right_type) {
Ok(left_type)
} else {
Err(SemanticError::TypeMismatch {
expected: "numeric type".to_string(),
actual: format!("{:?}", left_type),
})
}
}
BinaryOp::Equal | BinaryOp::NotEqual | BinaryOp::Less | BinaryOp::Greater |
BinaryOp::LessEqual | BinaryOp::GreaterEqual => {
Ok(TypeAnnotation::Bool)
}
BinaryOp::And | BinaryOp::Or => {
if matches!(left_type, TypeAnnotation::Bool) && matches!(right_type, TypeAnnotation::Bool) {
Ok(TypeAnnotation::Bool)
} else {
Err(SemanticError::TypeMismatch {
expected: "bool".to_string(),
actual: format!("{:?}", left_type),
})
}
}
}
}
Expression::Unary(op, expr) => {
let expr_type = self.infer_expression_type(expr)?;
match op {
UnaryOp::Not => {
if matches!(expr_type, TypeAnnotation::Bool) {
Ok(TypeAnnotation::Bool)
} else {
Err(SemanticError::TypeMismatch {
expected: "bool".to_string(),
actual: format!("{:?}", expr_type),
})
}
}
UnaryOp::Neg => {
if self.is_numeric_type(&expr_type) {
Ok(expr_type)
} else {
Err(SemanticError::TypeMismatch {
expected: "numeric type".to_string(),
actual: format!("{:?}", expr_type),
})
}
}
}
}
Expression::FunctionCall(name, args) => {
// 函数调用类型推断
// 验证参数类型
for arg in args {
self.validate_expression(arg)?;
}
// 实现函数表查找,获取准确的返回类型
// 实际应该:
// 1. 从函数表中查找函数名称
// 2. 验证参数数量和类型是否匹配
// 3. 返回函数的返回类型
// if let Some(func_sig) = self.function_table.get(name) {
// return Ok(func_sig.return_type.clone());
// }
// 当前简化处理:
// - 内置函数返回特定类型
// - 其他函数默认返回uint256
match name.as_str() {
"balance" | "allowance" => Ok(TypeAnnotation::Uint256),
"transfer" | "approve" => Ok(TypeAnnotation::Bool),
"gnacs_encode" => Ok(TypeAnnotation::Bytes),
"gnacs_decode" => Ok(TypeAnnotation::String),
"cr_create" | "cr_get" => Ok(TypeAnnotation::ConstitutionalReceipt),
"asset_dna" => Ok(TypeAnnotation::AssetInstance),
_ => Ok(TypeAnnotation::Uint256),
}
}
Expression::MemberAccess(object, member) => {
// 成员访问类型推断
let object_type = self.infer_expression_type(object)?;
// 实现完整的类型成员查找
// 实际应该:
// 1. 根据object_type查找类型定义
// 2. 从类型的成员表中查找成员名称
// 3. 返回成员的类型
// if let Some(type_def) = self.type_table.get(&object_type) {
// if let Some(member_type) = type_def.members.get(member) {
// return Ok(member_type.clone());
// }
// }
// 当前简化处理:根据常见成员名推断
match member.as_str() {
"length" => Ok(TypeAnnotation::Uint256),
"balance" => Ok(TypeAnnotation::Uint256),
"owner" | "sender" | "origin" => Ok(TypeAnnotation::Address),
"timestamp" => Ok(TypeAnnotation::Timestamp),
"value" => Ok(TypeAnnotation::Uint256),
_ => {
// 默认返回对象类型(用于自定义结构体)
Ok(object_type)
}
}
}
Expression::If(condition, then_expr, else_expr) => {
self.validate_expression(condition)?;
self.validate_expression(then_expr)?;
self.validate_expression(else_expr)?;
// if表达式的类型是then分支的类型
let then_type = self.infer_expression_type(then_expr)?;
Ok(then_type)
}
Expression::Cast(expr, target_type) => {
self.validate_expression(expr)?;
// 类型转换的结果类型就是目标类型
Ok(target_type.clone())
}
Expression::ArrayAccess(array, index) => {
// 数组访问类型推断
let array_type = self.infer_expression_type(array)?;
let index_type = self.infer_expression_type(index)?;
// 验证索引类型必须是整数
if !self.is_numeric_type(&index_type) {
return Err(SemanticError::TypeMismatch {
expected: "numeric type".to_string(),
actual: format!("{:?}", index_type),
});
}
// 返回数组元素类型
match array_type {
TypeAnnotation::Array(element_type, _) => Ok(*element_type),
TypeAnnotation::String | TypeAnnotation::Bytes => Ok(TypeAnnotation::Uint8),
_ => Err(SemanticError::TypeMismatch {
expected: "array".to_string(),
actual: format!("{:?}", array_type),
}),
}
}
}
}
fn validate_gnacs_code(&self, code: &GNACSCode) -> Result<(), SemanticError> {
// GNACSCode已经在解析时验证过这里只需要额外的业务验证
// 例如:验证校验和
if !code.verify_checksum() {
return Err(SemanticError::InvalidGNACSCode(
format!("Invalid checksum: {}", code.to_hex())
));
}
Ok(())
}
fn validate_sovereignty(&self, _sovereignty: &SovereigntyType) -> Result<(), SemanticError> {
// 主权类型验证已在AST层面保证
Ok(())
}
fn push_scope(&mut self) {
self.current_scope.push(HashMap::new());
}
fn pop_scope(&mut self) {
self.current_scope.pop();
}
fn add_variable(&mut self, name: &str, type_annotation: TypeAnnotation) -> Result<(), SemanticError> {
if let Some(scope) = self.current_scope.last_mut() {
scope.insert(name.to_string(), type_annotation);
}
Ok(())
}
fn get_variable_type(&self, name: &str) -> Result<TypeAnnotation, SemanticError> {
// 从内向外查找变量
for scope in self.current_scope.iter().rev() {
if let Some(type_annotation) = scope.get(name) {
return Ok(type_annotation.clone());
}
}
Err(SemanticError::UndefinedVariable(name.to_string()))
}
fn types_compatible(&self, expected: &TypeAnnotation, actual: &TypeAnnotation) -> bool {
// 简单的类型兼容性检查
std::mem::discriminant(expected) == std::mem::discriminant(actual)
}
fn is_numeric_type(&self, type_annotation: &TypeAnnotation) -> bool {
matches!(
type_annotation,
TypeAnnotation::Uint8 | TypeAnnotation::Uint16 | TypeAnnotation::Uint32 |
TypeAnnotation::Uint64 | TypeAnnotation::Uint128 | TypeAnnotation::Uint256 |
TypeAnnotation::Int8 | TypeAnnotation::Int16 | TypeAnnotation::Int32 |
TypeAnnotation::Int64 | TypeAnnotation::Int128 | TypeAnnotation::Int256
)
}
}
/// 语义分析
pub fn analyze(program: &Program) -> anyhow::Result<()> {
let mut analyzer = SemanticAnalyzer::new();
analyzer.analyze(program)?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::lexer::tokenize;
use crate::parser::parse;
#[test]
fn test_analyze_empty() {
let program = Program { items: vec![] };
assert!(analyze(&program).is_ok());
}
#[test]
fn test_analyze_asset() {
let source = r#"
asset TestAsset {
gnacs: 0x940101120187;
sovereignty: C2;
owner: DID;
}
"#;
let tokens = tokenize(source).unwrap();
let program = parse(&tokens).unwrap();
assert!(analyze(&program).is_ok());
}
#[test]
fn test_invalid_gnacs() {
let source = r#"
asset TestAsset {
gnacs: 0x123;
owner: DID;
}
"#;
let tokens = tokenize(source).unwrap();
// GNACS编码在parse阶段就会被验证所以parse应该失败
assert!(parse(&tokens).is_err());
}
}