loops - How to generate a directory tree iteratively, implemented in Python? - Stack Overflow

I have already implemented generating a directory tree using recursion:import osimport reimport hash

I have already implemented generating a directory tree using recursion:

import os
import re
import hashlib

PIPE = "│"
ELBOW = "└──"
TEE = "├──"
PIPE_PREFIX = "│   "
SPACE_PREFIX = "    "

root_dir = ""
tree = []

def extract_number(file_name):
    match = re.search(r"\d+", file_name)
    return int(match.group()) if match else float("inf")

def get_md5(file_path):
    hasher = hashlib.md5()
    with open(file_path, "rb") as f:
        for chunk in iter(lambda: f.read(8192), b""):
            hasher.update(chunk)
    return hasher.hexdigest()

def tree_head():
    tree.append(f"{root_dir}{os.sep}")
    tree.append(PIPE)

def get_entries(directory):
    with os.scandir(directory) as entries:
        entries = list(entries)

        entries = [entry for entry in entries if entry.name != ".DS_Store"]

        entries = sorted(entries, key = lambda entry: (entry.is_file(), extract_number(entry.name)))

    return entries

def tree_body(directory, prefix = ""):
    entries = get_entries(directory)
    entries_count = len(entries)
    for index, entry in enumerate(entries):
        connector = ELBOW if index == entries_count - 1 else TEE
        if entry.is_dir():
            add_directory(entry, index, entries_count, prefix, connector)
        else:
            add_file(entry, prefix, connector)

def add_directory(directory, index, entries_count, prefix, connector):
    tree.append(f"{prefix}{connector} {directory.name}{os.sep}")

    if index != entries_count - 1:
        prefix += PIPE_PREFIX
    else:
        prefix += SPACE_PREFIX

    tree_body(directory, prefix)

def add_file(file, prefix, connector):
    md5_hash = get_md5(file)
    tree.append(f"{prefix}{connector} {file.name} -> {md5_hash}")

def build_tree():
    tree_head()
    tree_body(root_dir)
    return tree

def generate():
    tree = build_tree()

    for entry in tree:
        print(entry)

generate()

This code generates a directory tree with file and directory names, as well as their MD5 hashes. Here's an example of the output that this script might produce, assuming the specified directory structure:

root_dir/ 
│
├── folder1/
│   ├── file1.txt -> d41d8cd98f00b204e9800998ecf8427e
│   └── file2.txt -> 098f6bcd4621d373cade4e832627b4f6
│
└── folder2/
    ├── file3.txt -> 6dcd4ce23d88e2ee9568ba546c007c63
    └── file4.txt -> d41d8cd98f00b204e9800998ecf8427e

How can the tree_body() function be converted from a recursive implementation to an iterative one? I have tried many methods and found it difficult to print in the correct order. I don't know what the correct approach for designing the algorithm for this problem looks like.

I have already implemented generating a directory tree using recursion:

import os
import re
import hashlib

PIPE = "│"
ELBOW = "└──"
TEE = "├──"
PIPE_PREFIX = "│   "
SPACE_PREFIX = "    "

root_dir = ""
tree = []

def extract_number(file_name):
    match = re.search(r"\d+", file_name)
    return int(match.group()) if match else float("inf")

def get_md5(file_path):
    hasher = hashlib.md5()
    with open(file_path, "rb") as f:
        for chunk in iter(lambda: f.read(8192), b""):
            hasher.update(chunk)
    return hasher.hexdigest()

def tree_head():
    tree.append(f"{root_dir}{os.sep}")
    tree.append(PIPE)

def get_entries(directory):
    with os.scandir(directory) as entries:
        entries = list(entries)

        entries = [entry for entry in entries if entry.name != ".DS_Store"]

        entries = sorted(entries, key = lambda entry: (entry.is_file(), extract_number(entry.name)))

    return entries

def tree_body(directory, prefix = ""):
    entries = get_entries(directory)
    entries_count = len(entries)
    for index, entry in enumerate(entries):
        connector = ELBOW if index == entries_count - 1 else TEE
        if entry.is_dir():
            add_directory(entry, index, entries_count, prefix, connector)
        else:
            add_file(entry, prefix, connector)

def add_directory(directory, index, entries_count, prefix, connector):
    tree.append(f"{prefix}{connector} {directory.name}{os.sep}")

    if index != entries_count - 1:
        prefix += PIPE_PREFIX
    else:
        prefix += SPACE_PREFIX

    tree_body(directory, prefix)

def add_file(file, prefix, connector):
    md5_hash = get_md5(file)
    tree.append(f"{prefix}{connector} {file.name} -> {md5_hash}")

def build_tree():
    tree_head()
    tree_body(root_dir)
    return tree

def generate():
    tree = build_tree()

    for entry in tree:
        print(entry)

generate()

This code generates a directory tree with file and directory names, as well as their MD5 hashes. Here's an example of the output that this script might produce, assuming the specified directory structure:

root_dir/ 
│
├── folder1/
│   ├── file1.txt -> d41d8cd98f00b204e9800998ecf8427e
│   └── file2.txt -> 098f6bcd4621d373cade4e832627b4f6
│
└── folder2/
    ├── file3.txt -> 6dcd4ce23d88e2ee9568ba546c007c63
    └── file4.txt -> d41d8cd98f00b204e9800998ecf8427e

How can the tree_body() function be converted from a recursive implementation to an iterative one? I have tried many methods and found it difficult to print in the correct order. I don't know what the correct approach for designing the algorithm for this problem looks like.

Share Improve this question edited Nov 30, 2024 at 13:43 Shao Kahn asked Nov 22, 2024 at 6:33 Shao KahnShao Kahn 773 silver badges12 bronze badges 6
  • why do you want it to iterative? – Albin Paul Commented Nov 22, 2024 at 7:27
  • You can push and pop from a stack to make the code iterative. I see no benefits tho. – Albin Paul Commented Nov 22, 2024 at 7:28
  • @AlbinPaul I have tried both the map and stack methods. Iteration is easy, but it's difficult to output at the correct levels. As for the benefits, I don't want recursion in my code. – Shao Kahn Commented Nov 22, 2024 at 7:40
  • 1 Honestly, this is a perfect fit for recursion. Reworking it to an iterative solution will just be uglier: use an explicit stack instead of the call stack provided by Python. If you’re working with trees, I’d suggest getting comfortable with recursion - it’s pretty core to many of the algorithms on tree structures. – nneonneo Commented Nov 22, 2024 at 8:01
  • 1 "I don't want recursion in my code": bad decision. Instead of using the stack that is perfectly fit for such mechanics you'll find yourself with your own implementation of a stack just to implement the same algorithm but avoid calling it "recursion". – trincot Commented Nov 22, 2024 at 8:05
 |  Show 1 more comment

3 Answers 3

Reset to default 0

Here is a stack-based approach that does not rely on recursion -

import os

def tree(init):
  stack = list((p, 0) for p in os.scandir(init))
  while stack:
    path, level = stack.pop()
    yield path, level
    if path.is_dir():
      stack.extend([(p, level + 1) for p in os.scandir(path)])

tree is a generator, with which we can easily implement md5_tree -

import hashlib

TEE = "├──"
PIPE_PREFIX = "│   "

def md5_tree(init):
  for path, level in tree(init):
    print(
      PIPE_PREFIX * level +
      TEE + 
      path.name +
      ('/' if path.is_dir() else " -> " + md5(path))
    )

def md5(path):
  with open(path, 'rb') as f:
    return hashlib.md5(f.read()).hexdigest()

Call md5_tree with an input path to see the result -

md5_tree(".")

Generators make it easy to intuitively filter elements, such as .DS_Store -

def md5_tree(init):
  for path, level in tree(init):
    if path.name == ".DS_Store": continue # filter
    print(...)

If you prefer not to use the generator, simply replace yield with your print statement -

def tree(init):
  stack = list((p, 0) for p in os.scandir(init))
  while stack:
    path, level = stack.pop()
    print(
      PIPE_PREFIX * level +
      TEE + 
      path.name +
      ('/' if path.is_dir() else " -> " + md5(path))
    )
    if path.is_dir():
      stack.extend([(p, level + 1) for p in os.scandir(path)])

os.scandir returns directory contents in arbitrary order. You can sort them using sorted. We are using a stack to order the output, so remember to sort using reverse=True -

def tree(init):
  stack = list((p, 0) for p in sorted(os.scandir(init), key=str, reverse=True))
  while stack:
    path, level = stack.pop()
    yield path, level
    if path.is_dir():
      stack.extend([(p, level + 1) for p in sorted(os.scandir(path), key=str, reverse=True)])

I finally implemented it, using a stack to simulate recursion. The principle is to perform a preorder traversal of an n-ary tree, which actually allows control over any previous stack position I want. This is also a great exercise in converting recursion to iteration. Additionally, using os.path and os.sep helps abstract the differences between operating systems.

import os
import re
import hashlib

PIPE = "│"
ELBOW = "└──"
TEE = "├──"
PIPE_PREFIX = "│   "
SPACE_PREFIX = "    "

class DirectoryTree:
    def __init__(self, root_dir):
        self._generator = _TreeGenerator(root_dir)

    def generate(self):
        tree = self._generator.build_tree()
        for entry in tree:
            print(entry)

class _TreeGenerator:
    def __init__(self, root_dir):
        self._root_dir = root_dir
        self._tree = []

    def build_tree(self):
        self._tree_head()
        self._tree_body(self._root_dir)
        return self._tree

    def _extract_number(self, file_name):
        match = re.search(r"\d+", file_name)
        return int(match.group()) if match else float("inf")

    def _get_md5(self, file_path):
        hasher = hashlib.md5()
        with open(file_path, "rb") as f:
            for chunk in iter(lambda: f.read(8192), b""):
                hasher.update(chunk)
        return hasher.hexdigest()

    def _get_entries(self, folder):
        with os.scandir(folder) as entries:
            if entries is None:
                return None
            entries = list(entries)
            entries = [entry for entry in entries if entry.name != ".DS_Store"]
            entries = sorted(entries, key = lambda entry: self._extract_number(entry.name))
        return entries

    def _tree_head(self):
        self._tree.append(f"{os.path.basename(self._root_dir)}{os.sep}")
        self._tree.append(PIPE)

    def _tree_body(self, folder):
        folder, prefix, offset = folder, "", 0
        stack = [[folder, prefix, offset]]
        has_folder = False

        while stack:
            folder, prefix, offset = stack[-1]
            entries = self._get_entries(folder)
            if entries is None:
                stack = stack[:-1]
                break
            has_folder = False
            inner_folder = None

            for index, entry in enumerate(entries[offset:]):
                connector = ELBOW if i == len(entries[offset:]) - 1 else TEE
                if entry.is_dir():
                    self._tree.append(f"{prefix}{connector} {entry.name}{os.sep}")
                    if index != len(entries[offset:]) - 1:
                        prefix += PIPE_PREFIX
                    else:
                        prefix += SPACE_PREFIX
                    has_folder = True
                    inner_folder = entry
                    stack[-1][2] = offset + index + 1
                    break
                else:
                    md5_hash = self._get_md5(entry)
                    self._tree.append(f"{prefix}{connector} {entry.name} -> {md5_hash}")

            if has_folder:
                stack.append([inner_folder, prefix, 0])
            else:
                stack = stack[:-1]
                if self._tree[-1] != prefix.rstrip():
                    self._tree.append(prefix.rstrip())

tree = DirectoryTree("")
tree.generate()

print:

XX/
│
├── XXX/
│   ├── XXX 1 -> 887100ba4373eebfed52ee1f973161de
│   ├── XXX 2 -> 22fbcce00751c1c51fbd94e83e124fbb
│   ├── XXXX/
│   │   ├── XXXXX.pdf -> f45621b992b922009f4db4422abd6fe4
│   │   └── XXXXX.pdf -> d9cab932175d1e69709291000fa8ffa3
│   │
│   └── XXXX/
│
└── XXX.txt

If you want to add a -L feature, change the data structure to [[folder, prefix, offset, level]], then:

folder, prefix, offset, level = folder, "", 0, 1
stack = [[folder, prefix, offset, level]]
...
while stack:
    folder, prefix, offset, level = stack[-1]
    ...
    if entry.is_dir():
        self._tree.append(f"{prefix}{connector} {entry.name}{os.sep}")
        if level == x:
            continue
        else:
            if index != len(entries[offset:]) - 1:
                prefix += PIPE_PREFIX
...
if has_folder:
   stack.append([inner_folder, prefix, 0, level])
else:
...

Where x is the desired level.

Try a directory class as a dicretory, like (in pseudo code):

Class DirBody():
    def __init__(self, name, depth):
        self.name = name
        self.depth = depth
        self.childDir = []
        self.childFile = []
    def __str__(self) -> str:
        return self.printDirs() + "\n"+self.printFiles()
    def printDirs(self) -> str:
        pass
    def printFiles(self) -> str:
        pass

and you could walk every dir as a object and his sub_object.

发布者:admin,转转请注明出处:http://www.yc00.com/questions/1742294184a4416780.html

相关推荐

发表回复

评论列表(0条)

  • 暂无评论

联系我们

400-800-8888

在线咨询: QQ交谈

邮件:admin@example.com

工作时间:周一至周五,9:30-18:30,节假日休息

关注微信