python脚本处理用缩进分割的树结构文本为json

发布于：2025年03月26日

#python 代码

python
# -*- coding: utf-8 -*-
import json
import re
import codecs  # 添加 codecs 模块导入
 
def parse_line(line):
    # 计算缩进层级（每个缩进占2个空格）
    indent = len(line) - len(line.lstrip())
    level = indent // 2
 
    # 去除缩进
    line = line.strip()
 
    # 提取编号和名称
    match = re.match(r'(?:-)?(\d+)(.*)', line)
    if match:
        key = match.group(1)
        name = match.group(2).strip()
        # 去除可能存在的冒号
        name = name.rstrip(':')
        return level, key, name
    return None
 
def build_tree(lines):
    result = []
    stack = [(result, -1)]  # (current_list, level)
 
    for i, line in enumerate(lines):
        parsed = parse_line(line)
        if not parsed:
            continue
 
        level, key, name = parsed
        node = {
            "name": name,
            "key": key,
        }
 
        # 回溯到正确的父级
        while stack and stack[-1][1] >= level:
            stack.pop()
 
        current_list = stack[-1][0]
 
        # 检查是否有子节点
        if i < len(lines) - 1:
            next_parsed = parse_line(lines[i + 1])
            if next_parsed and next_parsed[0] > level:
                node["children"] = []
 
        current_list.append(node)
 
        # 如果节点有 children 字段，将其加入堆栈
        if "children" in node:
            stack.append((node["children"], level))
 
    return result
 
def main():
    # 读取输入文件
    with codecs.open('industry.txt', 'r', encoding='utf-8') as f:
        lines = f.readlines()
 
    # 构建树形结构
    tree = build_tree(lines)
 
    # 写入JSON文件
    with codecs.open('result.json', 'w', encoding='utf-8') as f:
        json.dump(tree, f, ensure_ascii=False, indent=2)
 
if __name__ == '__main__':
    main()

python脚本处理用缩进分割的树结构文本为json

#python 代码

#示例 txt 文件期望的 json 文件