I have already implemented generating a directory tree using recursion:
import os
import re
import hashlib
PIPE = "│"
ELBOW = "└──"
TEE = "├──"
PIPE_PREFIX = "│ "
SPACE_PREFIX = " "
root_dir = ""
tree = []
def extract_number(file_name):
match = re.search(r"\d+", file_name)
return int(match.group()) if match else float("inf")
def get_md5(file_path):
hasher = hashlib.md5()
with open(file_path, "rb") as f:
for chunk in iter(lambda: f.read(8192), b""):
hasher.update(chunk)
return hasher.hexdigest()
def tree_head():
tree.append(f"{root_dir}{os.sep}")
tree.append(PIPE)
def get_entries(directory):
with os.scandir(directory) as entries:
entries = list(entries)
entries = [entry for entry in entries if entry.name != ".DS_Store"]
entries = sorted(entries, key = lambda entry: (entry.is_file(), extract_number(entry.name)))
return entries
def tree_body(directory, prefix = ""):
entries = get_entries(directory)
entries_count = len(entries)
for index, entry in enumerate(entries):
connector = ELBOW if index == entries_count - 1 else TEE
if entry.is_dir():
add_directory(entry, index, entries_count, prefix, connector)
else:
add_file(entry, prefix, connector)
def add_directory(directory, index, entries_count, prefix, connector):
tree.append(f"{prefix}{connector} {directory.name}{os.sep}")
if index != entries_count - 1:
prefix += PIPE_PREFIX
else:
prefix += SPACE_PREFIX
tree_body(directory, prefix)
def add_file(file, prefix, connector):
md5_hash = get_md5(file)
tree.append(f"{prefix}{connector} {file.name} -> {md5_hash}")
def build_tree():
tree_head()
tree_body(root_dir)
return tree
def generate():
tree = build_tree()
for entry in tree:
print(entry)
generate()
This code generates a directory tree with file and directory names, as well as their MD5 hashes. Here's an example of the output that this script might produce, assuming the specified directory structure:
root_dir/
│
├── folder1/
│ ├── file1.txt -> d41d8cd98f00b204e9800998ecf8427e
│ └── file2.txt -> 098f6bcd4621d373cade4e832627b4f6
│
└── folder2/
├── file3.txt -> 6dcd4ce23d88e2ee9568ba546c007c63
└── file4.txt -> d41d8cd98f00b204e9800998ecf8427e
How can the tree_body()
function be converted from a recursive implementation to an iterative one?
I have tried many methods and found it difficult to print in the correct order.
I don't know what the correct approach for designing the algorithm for this problem looks like.
I have already implemented generating a directory tree using recursion:
import os
import re
import hashlib
PIPE = "│"
ELBOW = "└──"
TEE = "├──"
PIPE_PREFIX = "│ "
SPACE_PREFIX = " "
root_dir = ""
tree = []
def extract_number(file_name):
match = re.search(r"\d+", file_name)
return int(match.group()) if match else float("inf")
def get_md5(file_path):
hasher = hashlib.md5()
with open(file_path, "rb") as f:
for chunk in iter(lambda: f.read(8192), b""):
hasher.update(chunk)
return hasher.hexdigest()
def tree_head():
tree.append(f"{root_dir}{os.sep}")
tree.append(PIPE)
def get_entries(directory):
with os.scandir(directory) as entries:
entries = list(entries)
entries = [entry for entry in entries if entry.name != ".DS_Store"]
entries = sorted(entries, key = lambda entry: (entry.is_file(), extract_number(entry.name)))
return entries
def tree_body(directory, prefix = ""):
entries = get_entries(directory)
entries_count = len(entries)
for index, entry in enumerate(entries):
connector = ELBOW if index == entries_count - 1 else TEE
if entry.is_dir():
add_directory(entry, index, entries_count, prefix, connector)
else:
add_file(entry, prefix, connector)
def add_directory(directory, index, entries_count, prefix, connector):
tree.append(f"{prefix}{connector} {directory.name}{os.sep}")
if index != entries_count - 1:
prefix += PIPE_PREFIX
else:
prefix += SPACE_PREFIX
tree_body(directory, prefix)
def add_file(file, prefix, connector):
md5_hash = get_md5(file)
tree.append(f"{prefix}{connector} {file.name} -> {md5_hash}")
def build_tree():
tree_head()
tree_body(root_dir)
return tree
def generate():
tree = build_tree()
for entry in tree:
print(entry)
generate()
This code generates a directory tree with file and directory names, as well as their MD5 hashes. Here's an example of the output that this script might produce, assuming the specified directory structure:
root_dir/
│
├── folder1/
│ ├── file1.txt -> d41d8cd98f00b204e9800998ecf8427e
│ └── file2.txt -> 098f6bcd4621d373cade4e832627b4f6
│
└── folder2/
├── file3.txt -> 6dcd4ce23d88e2ee9568ba546c007c63
└── file4.txt -> d41d8cd98f00b204e9800998ecf8427e
How can the tree_body()
function be converted from a recursive implementation to an iterative one?
I have tried many methods and found it difficult to print in the correct order.
I don't know what the correct approach for designing the algorithm for this problem looks like.
- why do you want it to iterative? – Albin Paul Commented Nov 22, 2024 at 7:27
- You can push and pop from a stack to make the code iterative. I see no benefits tho. – Albin Paul Commented Nov 22, 2024 at 7:28
- @AlbinPaul I have tried both the map and stack methods. Iteration is easy, but it's difficult to output at the correct levels. As for the benefits, I don't want recursion in my code. – Shao Kahn Commented Nov 22, 2024 at 7:40
- 1 Honestly, this is a perfect fit for recursion. Reworking it to an iterative solution will just be uglier: use an explicit stack instead of the call stack provided by Python. If you’re working with trees, I’d suggest getting comfortable with recursion - it’s pretty core to many of the algorithms on tree structures. – nneonneo Commented Nov 22, 2024 at 8:01
- 1 "I don't want recursion in my code": bad decision. Instead of using the stack that is perfectly fit for such mechanics you'll find yourself with your own implementation of a stack just to implement the same algorithm but avoid calling it "recursion". – trincot Commented Nov 22, 2024 at 8:05
3 Answers
Reset to default 0Here is a stack-based approach that does not rely on recursion -
import os
def tree(init):
stack = list((p, 0) for p in os.scandir(init))
while stack:
path, level = stack.pop()
yield path, level
if path.is_dir():
stack.extend([(p, level + 1) for p in os.scandir(path)])
tree
is a generator, with which we can easily implement md5_tree
-
import hashlib
TEE = "├──"
PIPE_PREFIX = "│ "
def md5_tree(init):
for path, level in tree(init):
print(
PIPE_PREFIX * level +
TEE +
path.name +
('/' if path.is_dir() else " -> " + md5(path))
)
def md5(path):
with open(path, 'rb') as f:
return hashlib.md5(f.read()).hexdigest()
Call md5_tree
with an input path to see the result -
md5_tree(".")
Generators make it easy to intuitively filter elements, such as .DS_Store
-
def md5_tree(init):
for path, level in tree(init):
if path.name == ".DS_Store": continue # filter
print(...)
If you prefer not to use the generator, simply replace yield
with your print
statement -
def tree(init):
stack = list((p, 0) for p in os.scandir(init))
while stack:
path, level = stack.pop()
print(
PIPE_PREFIX * level +
TEE +
path.name +
('/' if path.is_dir() else " -> " + md5(path))
)
if path.is_dir():
stack.extend([(p, level + 1) for p in os.scandir(path)])
os.scandir
returns directory contents in arbitrary order. You can sort them using sorted
. We are using a stack
to order the output, so remember to sort using reverse=True
-
def tree(init):
stack = list((p, 0) for p in sorted(os.scandir(init), key=str, reverse=True))
while stack:
path, level = stack.pop()
yield path, level
if path.is_dir():
stack.extend([(p, level + 1) for p in sorted(os.scandir(path), key=str, reverse=True)])
I finally implemented it, using a stack to simulate recursion. The principle is to perform a preorder traversal of an n-ary tree, which actually allows control over any previous stack position I want.
This is also a great exercise in converting recursion to iteration.
Additionally, using os.path
and os.sep
helps abstract the differences between operating systems.
import os
import re
import hashlib
PIPE = "│"
ELBOW = "└──"
TEE = "├──"
PIPE_PREFIX = "│ "
SPACE_PREFIX = " "
class DirectoryTree:
def __init__(self, root_dir):
self._generator = _TreeGenerator(root_dir)
def generate(self):
tree = self._generator.build_tree()
for entry in tree:
print(entry)
class _TreeGenerator:
def __init__(self, root_dir):
self._root_dir = root_dir
self._tree = []
def build_tree(self):
self._tree_head()
self._tree_body(self._root_dir)
return self._tree
def _extract_number(self, file_name):
match = re.search(r"\d+", file_name)
return int(match.group()) if match else float("inf")
def _get_md5(self, file_path):
hasher = hashlib.md5()
with open(file_path, "rb") as f:
for chunk in iter(lambda: f.read(8192), b""):
hasher.update(chunk)
return hasher.hexdigest()
def _get_entries(self, folder):
with os.scandir(folder) as entries:
if entries is None:
return None
entries = list(entries)
entries = [entry for entry in entries if entry.name != ".DS_Store"]
entries = sorted(entries, key = lambda entry: self._extract_number(entry.name))
return entries
def _tree_head(self):
self._tree.append(f"{os.path.basename(self._root_dir)}{os.sep}")
self._tree.append(PIPE)
def _tree_body(self, folder):
folder, prefix, offset = folder, "", 0
stack = [[folder, prefix, offset]]
has_folder = False
while stack:
folder, prefix, offset = stack[-1]
entries = self._get_entries(folder)
if entries is None:
stack = stack[:-1]
break
has_folder = False
inner_folder = None
for index, entry in enumerate(entries[offset:]):
connector = ELBOW if i == len(entries[offset:]) - 1 else TEE
if entry.is_dir():
self._tree.append(f"{prefix}{connector} {entry.name}{os.sep}")
if index != len(entries[offset:]) - 1:
prefix += PIPE_PREFIX
else:
prefix += SPACE_PREFIX
has_folder = True
inner_folder = entry
stack[-1][2] = offset + index + 1
break
else:
md5_hash = self._get_md5(entry)
self._tree.append(f"{prefix}{connector} {entry.name} -> {md5_hash}")
if has_folder:
stack.append([inner_folder, prefix, 0])
else:
stack = stack[:-1]
if self._tree[-1] != prefix.rstrip():
self._tree.append(prefix.rstrip())
tree = DirectoryTree("")
tree.generate()
print:
XX/
│
├── XXX/
│ ├── XXX 1 -> 887100ba4373eebfed52ee1f973161de
│ ├── XXX 2 -> 22fbcce00751c1c51fbd94e83e124fbb
│ ├── XXXX/
│ │ ├── XXXXX.pdf -> f45621b992b922009f4db4422abd6fe4
│ │ └── XXXXX.pdf -> d9cab932175d1e69709291000fa8ffa3
│ │
│ └── XXXX/
│
└── XXX.txt
If you want to add a -L
feature, change the data structure to [[folder, prefix, offset, level]]
, then:
folder, prefix, offset, level = folder, "", 0, 1
stack = [[folder, prefix, offset, level]]
...
while stack:
folder, prefix, offset, level = stack[-1]
...
if entry.is_dir():
self._tree.append(f"{prefix}{connector} {entry.name}{os.sep}")
if level == x:
continue
else:
if index != len(entries[offset:]) - 1:
prefix += PIPE_PREFIX
...
if has_folder:
stack.append([inner_folder, prefix, 0, level])
else:
...
Where x is the desired level.
Try a directory class as a dicretory, like (in pseudo code):
Class DirBody():
def __init__(self, name, depth):
self.name = name
self.depth = depth
self.childDir = []
self.childFile = []
def __str__(self) -> str:
return self.printDirs() + "\n"+self.printFiles()
def printDirs(self) -> str:
pass
def printFiles(self) -> str:
pass
and you could walk every dir as a object and his sub_object.
发布者:admin,转转请注明出处:http://www.yc00.com/questions/1742294184a4416780.html
评论列表(0条)