python
# -*- coding: utf-8 -*-
import json
import re
import codecs # 添加 codecs 模块导入
def parse_line(line):
# 计算缩进层级(每个缩进占2个空格)
indent = len(line) - len(line.lstrip())
level = indent // 2
# 去除缩进
line = line.strip()
# 提取编号和名称
match = re.match(r'(?:-)?(\d+)(.*)', line)
if match:
key = match.group(1)
name = match.group(2).strip()
# 去除可能存在的冒号
name = name.rstrip(':')
return level, key, name
return None
def build_tree(lines):
result = []
stack = [(result, -1)] # (current_list, level)
for i, line in enumerate(lines):
parsed = parse_line(line)
if not parsed:
continue
level, key, name = parsed
node = {
"name": name,
"key": key,
}
# 回溯到正确的父级
while stack and stack[-1][1] >= level:
stack.pop()
current_list = stack[-1][0]
# 检查是否有子节点
if i < len(lines) - 1:
next_parsed = parse_line(lines[i + 1])
if next_parsed and next_parsed[0] > level:
node["children"] = []
current_list.append(node)
# 如果节点有 children 字段,将其加入堆栈
if "children" in node:
stack.append((node["children"], level))
return result
def main():
# 读取输入文件
with codecs.open('industry.txt', 'r', encoding='utf-8') as f:
lines = f.readlines()
# 构建树形结构
tree = build_tree(lines)
# 写入JSON文件
with codecs.open('result.json', 'w', encoding='utf-8') as f:
json.dump(tree, f, ensure_ascii=False, indent=2)
if __name__ == '__main__':
main()