#!/bin/python3

from os import listdir
from os.path import isfile, join, islink, exists, getsize
import sys
import argparse
from fsToolsLib import findAllFilesRecursively, FileInfo, getFilesWithSizeMatches, computeHashesInParallel, findIdenticalFiles, sortByFileSizeAndGetBiggest

parser = argparse.ArgumentParser()
parser.add_argument('pathToSearch')
parser.add_argument('-f', '--format', action='store_true')
args = parser.parse_args()


pathToSearch = args.pathToSearch
formatForConsole = args.format

def logInfo(msg: str) -> None:
    if not formatForConsole: print(msg)

logInfo("using " + pathToSearch + " as the path to search") 

fileInfos: list[FileInfo] = findAllFilesRecursively(pathToSearch)
logInfo("mapped out directory tree")

fileInfos = getFilesWithSizeMatches(fileInfos)
logInfo("found files with size matches")

computeHashesInParallel(fileInfos)
logInfo("Initialized hash values")

indenticalFiles: list[list[FileInfo, FileInfo]] = findIdenticalFiles(fileInfos)
logInfo("found duplicates (" + str(len(indenticalFiles)) + ")")


if formatForConsole:
    dupSet: set[FileInfo] = set()
    for dupPair in indenticalFiles:
        dupSet.add(dupPair[0])
        dupSet.add(dupPair[1])
    for fi in dupSet:
        print(fi.getPath())
else:
    #TODO: group duplicates together in output
    numFilesToList = 100 if len(indenticalFiles) >= 100 else len(indenticalFiles)
    sortedList = sortByFileSizeAndGetBiggest(indenticalFiles, numFilesToList)
    print("got biggest duplicates")
    for idx in range(numFilesToList - 1, -1, -1):
        print(sortedList[idx][0].getPath() + " is the same as: " + sortedList[idx][1].getPath() + " with size of " + str(sortedList[idx][0].getSize()))