python脚本处理用缩进分割的树结构文本为json

发布于:

#python 代码

python
# -*- coding: utf-8 -*- import json import re import codecs # 添加 codecs 模块导入 def parse_line(line): # 计算缩进层级(每个缩进占2个空格) indent = len(line) - len(line.lstrip()) level = indent // 2 # 去除缩进 line = line.strip() # 提取编号和名称 match = re.match(r'(?:-)?(\d+)(.*)', line) if match: key = match.group(1) name = match.group(2).strip() # 去除可能存在的冒号 name = name.rstrip(':') return level, key, name return None def build_tree(lines): result = [] stack = [(result, -1)] # (current_list, level) for i, line in enumerate(lines): parsed = parse_line(line) if not parsed: continue level, key, name = parsed node = { "name": name, "key": key, } # 回溯到正确的父级 while stack and stack[-1][1] >= level: stack.pop() current_list = stack[-1][0] # 检查是否有子节点 if i < len(lines) - 1: next_parsed = parse_line(lines[i + 1]) if next_parsed and next_parsed[0] > level: node["children"] = [] current_list.append(node) # 如果节点有 children 字段,将其加入堆栈 if "children" in node: stack.append((node["children"], level)) return result def main(): # 读取输入文件 with codecs.open('industry.txt', 'r', encoding='utf-8') as f: lines = f.readlines() # 构建树形结构 tree = build_tree(lines) # 写入JSON文件 with codecs.open('result.json', 'w', encoding='utf-8') as f: json.dump(tree, f, ensure_ascii=False, indent=2) if __name__ == '__main__': main()

#示例 txt 文件期望的 json 文件