build_data.py 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. import sys
  2. import yaml
  3. import os
  4. import datetime
  5. from pathlib import Path
  6. # --- 1. Protobuf 依赖导入 ---
  7. # 注意:这依赖于 protoc 编译生成的 school_index_pb2.py 文件
  8. try:
  9. # 导入生成的 Protobuf 类 (必须先运行 protoc)
  10. from school_index_pb2 import SchoolIndex, School, Adapter, AdapterCategory
  11. except ImportError:
  12. print("错误:无法导入 Protobuf 模块。请确认您已运行 protoc 命令生成了 school_index_pb2.py。")
  13. sys.exit(1)
  14. # --- 2. 工程常量与版本定义 ---
  15. # 协议版本:结构大改动时手动递增。
  16. PROTOCOL_VERSION = 1
  17. # YAML 文件路径常量
  18. ROOT_INDEX_PATH = Path("index/root_index.yaml")
  19. RESOURCES_ROOT = Path("resources")
  20. OUTPUT_PB_FILE = "school_index.pb"
  21. # --- 3. 辅助函数:获取版本ID (用于 version_id 字段) ---
  22. def get_version_id():
  23. """
  24. 【统一标准】:只使用精确到毫秒的时间戳作为数据版本ID,
  25. 确保每次构建的唯一性,并消除对 Git 环境的依赖。
  26. 标准格式:TIME_<年><月><日><时><分><秒>_<毫秒>
  27. """
  28. # 获取当前时间(包含微秒)
  29. now = datetime.datetime.now()
  30. # 格式化时间部分
  31. # 格式化为:YYYYMMDDhhmmss
  32. time_str = now.strftime("TIME_%Y%m%d%H%M%S")
  33. micro_str = f"{now.microsecond // 1000:03d}"
  34. return f"{time_str}_{micro_str}"
  35. # --- 4. 辅助函数:YAML 解析逻辑 ---
  36. def get_adapter_category_enum(category_str):
  37. """将 YAML 字符串类别映射到 Protobuf 枚举值。"""
  38. # 使用 getattr 从 Protobuf 模块中查找对应的枚举值。
  39. # 如果找不到,安全地返回 ADAPTER_CATEGORY_UNKNOWN (0)。
  40. enum_value = getattr(AdapterCategory, category_str, AdapterCategory.ADAPTER_CATEGORY_UNKNOWN)
  41. return enum_value
  42. def parse_all_yaml():
  43. """解析所有 YAML 文件,返回结构化数据。"""
  44. if not ROOT_INDEX_PATH.exists():
  45. raise FileNotFoundError(f"根索引文件未找到: {ROOT_INDEX_PATH}")
  46. with open(ROOT_INDEX_PATH, 'r', encoding='utf-8') as f:
  47. root_data = yaml.safe_load(f)
  48. schools_data = root_data.get('schools', [])
  49. parsed_schools = []
  50. for school_entry in schools_data:
  51. # 1. 拼接 adapters.yaml 的路径
  52. folder_name = school_entry['resource_folder']
  53. adapter_yaml_path = RESOURCES_ROOT / folder_name / "adapters.yaml"
  54. if not adapter_yaml_path.exists():
  55. print(f"警告:未找到适配器配置 {adapter_yaml_path},跳过学校 {school_entry['id']}")
  56. continue
  57. # 2. 读取适配器详情
  58. with open(adapter_yaml_path, 'r', encoding='utf-8') as f:
  59. adapter_data = yaml.safe_load(f)
  60. # 将适配器列表添加到学校数据中
  61. school_entry['adapters'] = adapter_data.get('adapters', [])
  62. parsed_schools.append(school_entry)
  63. return parsed_schools
  64. # --- 5. 核心构建函数:数据映射与填充 ---
  65. def build_protobuf_index(parsed_schools_data):
  66. """将解析后的数据填充到 Protobuf 消息中。"""
  67. index = SchoolIndex()
  68. # 设置顶层版本信息
  69. index.protocol_version = PROTOCOL_VERSION
  70. index.version_id = get_version_id()
  71. for school_data in parsed_schools_data:
  72. # 1. 填充 School 消息
  73. school_msg = index.schools.add()
  74. school_msg.id = school_data.get('id', '')
  75. school_msg.name = school_data.get('name', '')
  76. school_msg.initial = school_data.get('initial', '')
  77. school_msg.resource_folder = school_data.get('resource_folder', '')
  78. # 2. 填充 Adapter 列表
  79. for adapter_data in school_data.get('adapters', []):
  80. adapter_msg = school_msg.adapters.add()
  81. # 填充普通 string 字段
  82. adapter_msg.adapter_id = adapter_data.get('adapter_id', '')
  83. adapter_msg.adapter_name = adapter_data.get('adapter_name', '')
  84. adapter_msg.asset_js_path = adapter_data.get('asset_js_path', '')
  85. adapter_msg.description = adapter_data.get('description', '')
  86. adapter_msg.maintainer = adapter_data.get('maintainer', '')
  87. # 填充 optional 字段:只有当字段不为 None 时才设置
  88. import_url = adapter_data.get('import_url')
  89. if import_url is not None:
  90. # 即使 import_url 为空字符串 "",也会被显式设置,满足 optional 的存在性要求
  91. adapter_msg.import_url = import_url
  92. # 填充枚举字段
  93. category_str = adapter_data.get('category', 'ADAPTER_CATEGORY_UNKNOWN')
  94. adapter_msg.category = get_adapter_category_enum(category_str)
  95. return index
  96. # --- 6. 主执行流程 ---
  97. if __name__ == "__main__":
  98. OUTPUT_FILE = OUTPUT_PB_FILE
  99. print(f"目标输出文件: {OUTPUT_FILE}")
  100. try:
  101. print("--- 阶段一:解析 YAML 源文件 ---")
  102. parsed_data = parse_all_yaml()
  103. print("\n--- 阶段二:构建 Protobuf 消息 ---")
  104. index_message = build_protobuf_index(parsed_data)
  105. print(f"Protobuf 协议版本: {index_message.protocol_version}")
  106. print(f"数据版本ID: {index_message.version_id}")
  107. print("\n--- 阶段三:序列化并写入磁盘 ---")
  108. # 序列化并写入二进制文件
  109. with open(OUTPUT_FILE, "wb") as f:
  110. # Protobuf 核心序列化方法
  111. f.write(index_message.SerializeToString())
  112. print(f"\n构建成功!二进制文件已保存到: {OUTPUT_FILE}")
  113. # 可选:打印文件大小,方便调试
  114. print(f"文件大小: {os.path.getsize(OUTPUT_FILE) / 1024:.2f} KB")
  115. except Exception as e:
  116. print(f"构建失败!致命错误: {e}", file=sys.stderr)
  117. # 如果是文件未找到等错误,请给出更明确的提示
  118. sys.exit(1)