From 343960abdbd8ce4ef9fea6437bcfd76aef7d74cd Mon Sep 17 00:00:00 2001
From: fima <fima@dtu.dk>
Date: Mon, 25 Sep 2023 11:21:48 +0200
Subject: [PATCH] Directories check

---
 qim3d/io/__init__.py |   1 +
 qim3d/io/sync.py     | 207 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 208 insertions(+)
 create mode 100644 qim3d/io/sync.py

diff --git a/qim3d/io/__init__.py b/qim3d/io/__init__.py
index 00ba2eb5..bab11b43 100644
--- a/qim3d/io/__init__.py
+++ b/qim3d/io/__init__.py
@@ -1,3 +1,4 @@
 from .load import DataLoader, load, ImgExamples
 from .save import save
+from .sync import Sync
 from . import logger
\ No newline at end of file
diff --git a/qim3d/io/sync.py b/qim3d/io/sync.py
new file mode 100644
index 00000000..c7358cf8
--- /dev/null
+++ b/qim3d/io/sync.py
@@ -0,0 +1,207 @@
+""" Dataset synchronization tasks """
+import os
+import subprocess
+import outputformat as ouf
+from qim3d.io.logger import log
+from pathlib import Path
+
+
+class Sync:
+    """Class for dataset synchronization tasks"""
+
+    def __init__(self):
+        # Checks if rsync is available
+        if not self._check_rsync():
+            raise RuntimeError(
+                "Could not find rsync, please check if it is installed in your system."
+            )
+
+    def _check_rsync(self):
+        """Check if rsync is available"""
+        try:
+            subprocess.run(["rsync", "--version"], capture_output=True, check=True)
+            return True
+
+        except Exception as error:
+            log.error("rsync is not available")
+            log.error(error)
+
+            return False
+
+    def check_destination(self, source, destination, checksum=False, verbose=True):
+        """Check if all files from 'source' are in 'destination'
+
+        This function compares the files in the 'source' directory to those in
+        the 'destination' directory and reports any differences or missing files.
+
+        Args:
+            source (str or Path): The source directory path.
+            destination (str or Path): The destination directory path.
+            checksum (bool, optional): If True, use checksums to compare files (slower but more accurate).
+                Default is False.
+            verbose (bool, optional): If True, display a list of differing or missing files in the log.
+                Default is True.
+
+        Returns:
+            list: A list of differing or missing file paths in the destination directory.
+
+        """
+
+        source = Path(source)
+        destination = Path(destination)
+
+        if checksum:
+            rsync_args = "-avrc"
+        else:
+            rsync_args = "-avr"
+
+        command = [
+            "rsync",
+            "-n",
+            rsync_args,
+            str(source) + os.path.sep,
+            str(destination) + os.path.sep,
+        ]
+
+        out = subprocess.run(
+            command,
+            capture_output=True,
+            check=True,
+        )
+
+        diff_files_and_folders = out.stdout.decode().splitlines()[1:-3]
+        diff_files = [f for f in diff_files_and_folders if not f.endswith("/")]
+
+        if len(diff_files) > 0 and verbose:
+            title = "Source files differing or missing in destination"
+            log.info(
+                ouf.showlist(diff_files, style="line", return_str=True, title=title)
+            )
+
+        return diff_files
+
+    def compare_dirs(self, source, destination, checksum=False, verbose=True):
+        """Checks whether 'source' and 'destination' directories are synchronized.
+
+        This function compares the contents of two directories
+        ('source' and 'destination') and reports any differences.
+        It checks for files that exist in one directory but not the other and
+        files that are present in both but not equal.
+
+        If no differences are found between the directories,
+        it logs a message indicating that they are synchronized.
+        If differences are found, it logs detailed information about the differing files.
+
+        Args:
+            source (str or Path): The source directory path.
+            destination (str or Path): The destination directory path.
+            checksum (bool, optional): If True, use checksums to compare files (slower but more accurate).
+                Default is False.
+            verbose (bool, optional): If True, display information about the comparison in the log.
+                Default is True.
+
+        Returns:
+            None: This function does not return a value.
+
+        """
+        if verbose:
+            s_files, s_dirs = self.count_files_and_dirs(source)
+            d_files, d_dirs = self.count_files_and_dirs(destination)
+            log.info("\n")
+
+        s_d = self.check_destination(
+            source, destination, checksum=checksum, verbose=False
+        )
+        d_s = self.check_destination(
+            destination, source, checksum=checksum, verbose=False
+        )
+
+        if len(s_d) == 0 and len(d_s) == 0:
+            # No differences
+            if verbose:
+                log.info(
+                    "Source and destination are synchronized, no differences found."
+                )
+            return
+
+        union = list(set(s_d + d_s))
+        log.info(
+            ouf.showlist(
+                union,
+                style="line",
+                return_str=True,
+                title=f"{len(union)} files are not in sync",
+            )
+        )
+
+        intersection = list(set(s_d) & set(d_s))
+        if len(intersection) > 0:
+            log.info(
+                ouf.showlist(
+                    intersection,
+                    style="line",
+                    return_str=True,
+                    title=f"{len(intersection)} files present on both, but not equal",
+                )
+            )
+
+        s_exclusive = list(set(s_d).symmetric_difference(set(intersection)))
+        if len(s_exclusive) > 0:
+            log.info(
+                ouf.showlist(
+                    s_exclusive,
+                    style="line",
+                    return_str=True,
+                    title=f"{len(s_exclusive)} files present only on {source}",
+                )
+            )
+
+        d_exclusive = list(set(d_s).symmetric_difference(set(intersection)))
+        if len(d_exclusive) > 0:
+            log.info(
+                ouf.showlist(
+                    d_exclusive,
+                    style="line",
+                    return_str=True,
+                    title=f"{len(d_exclusive)} files present only on {destination}",
+                )
+            )
+        return
+
+    def count_files_and_dirs(self, path, verbose=True):
+        """Count the number of files and directories in the given path.
+
+        This function recursively counts the number of files and
+        directories in the specified directory 'path'.
+
+        If 'verbose' is True, the function logs the total count
+        of files and directories in the specified path.
+
+
+        Args:
+            path (str or Path): The directory path to count files and directories in.
+            verbose (bool, optional): If True, display the total count in the log.
+                Default is True.
+
+        Returns:
+            tuple: A tuple containing two values:
+                - The count of files in the directory and its subdirectories.
+                - The count of directories in the directory and its subdirectories.
+
+        """
+        path = Path(path)
+        files = 0
+        dirs = 0
+        for p in os.scandir(path):
+            if p.is_file():
+                files += 1
+            elif p.is_dir():
+                dirs += 1
+                file_count, dirs_count = self.count_files_and_dirs(p, verbose=False)
+                files += file_count
+                dirs += dirs_count
+
+        if verbose:
+            log.info(f"Total of {files} files and {dirs} directories on {path}")
+
+        return files, dirs
-- 
GitLab