diff --git a/README.md b/README.md index 5411f76..02fb504 100644 --- a/README.md +++ b/README.md @@ -4,8 +4,24 @@ You have file objects in separate forms of storage (internal SSD, SFTP, S3, etc), but want to have a centralized directory to access all those files. You want to be able to shift these files across these storage solutions automatically. ### Hello file-o-bot -This script creates a folder with a huge list of soft links to every object across your storage mediums. You can also define "lifetimes" for each object in each form of storage such that it is slowly "downgraded" in storage priority as you. +This script creates a folder with a huge list of soft links to every object across your storage mediums. You can also define "lifetimes" for each object in each form of storage such that it is slowly "downgraded" in storage priority as it becomes less and less accessed. This allows you to move those files transparently from faster storage to cheaper archive storage. -file-o-bot is inspired by the Autoclass feature of Google Cloud Storage buckets. file-o-bot is merely the redneck version of it. +file-o-bot is inspired by the Autoclass feature of Google Cloud Storage buckets. file-o-bot is merely a redneck version of it. -### +### FAQ + +#### What happens if files/folder structures overlap across my storage mediums? + +Movements still apply to the files. + +Storage paths are sorted via the `sort` utility and the preceding paths have higher priority over subsequent ones. All storage paths are "merged" + +For overlapping files, the file in the highest priority storage would be chosen to be soft-linked. + +#### What kind of file protections do I have when executing movements? + +None. The files are transferred using rsync but that's about it. + +#### This is madness. + +Yes. diff --git a/default-config.ini b/default-config.ini deleted file mode 100644 index 2b86893..0000000 --- a/default-config.ini +++ /dev/null @@ -1,3 +0,0 @@ -MAP_ROOT_DIRECTORY=./root-directory -SOURCE_FILE_PATH=./source-path -SOFT_LINK_PATH=./soft-links diff --git a/duckdb-install.sql b/duckdb-install.sql deleted file mode 100644 index d50579f..0000000 --- a/duckdb-install.sql +++ /dev/null @@ -1,9 +0,0 @@ -CREATE TABLE files ( - source_file_name TEXT NOT NULL, - source_file_path TEXT NOT NULL, - source_path TEXT NOT NULL -); -COPY - (SELECT * FROM files) - TO 'file-o-bot.db' - (FORMAT 'parquet', CODEC 'zstd'); diff --git a/file-o-bot.sh b/file-o-bot similarity index 79% rename from file-o-bot.sh rename to file-o-bot index a890b05..2f822b0 100644 --- a/file-o-bot.sh +++ b/file-o-bot @@ -10,7 +10,6 @@ DEFAULT_CONFIG_PATH=. # Read custom configs from custom config file . "${DEFAULT_CONFIG_PATH}"/config.d/*.ini -# For each lifecycle rule, execute a movement using rsync start_movement () { SOURCE_PATH=$1 DESTINATION_PATH=$2 @@ -24,15 +23,20 @@ start_movement () { --inplace \ --archive \ --mkpath \ + --remove-source-files \ --files-from=- \ "$SOURCE_PATH" \ "$DESTINATION_PATH" } + +# Check for movements file MOVEMENT_JSON_PATH=$( readlink -f "${DEFAULT_CONFIG_PATH}/movement.json" ) if ! [[ -f ${MOVEMENT_JSON_PATH} ]]; then echo "Missing movement file. Exiting." exit 1 fi + +# For each lifecycle rule, execute a movement using rsync RULE_LENGTH=$( jq '.movements | length' "${MOVEMENT_JSON_PATH}" ) for (( i=0; i<$RULE_LENGTH ; i++ )) do @@ -44,13 +48,12 @@ done # Run checks and build soft links if ! [[ -d ${MAP_ROOT_DIRECTORY} ]]; then - echo "Missing root directory" + echo "Missing map root directory" exit 1 fi GREEN_PATH=$( readlink -f "${MAP_ROOT_DIRECTORY}"/green ) RED_PATH=$( readlink -f "${MAP_ROOT_DIRECTORY}"/red ) BLUE_PATH=$( readlink -f "${MAP_ROOT_DIRECTORY}"/blue ) -export GREEN_PATH if ! [[ -d ${GREEN_PATH} ]]; then mkdir "${GREEN_PATH}" fi @@ -58,30 +61,38 @@ fi # Compile storage paths from all the movements readarray -t STORAGE_PATHS \ < <( jq -r '.movements[] | .sourcePath, .destinationPath' "${MOVEMENT_JSON_PATH}" | xargs -n 1 -I {} readlink -e "{}" | sort --unique ) +STORAGE_PATHS_LEN=${#STORAGE_PATHS[@]} + +# Verify that all storage paths work +for ((i=0 ; i<${STORAGE_PATHS_LEN}; i++)) +do + STORAGE_PATH=${STORAGE_PATHS[$i]} + if ! [[ -d $STORAGE_PATH ]]; then + printf "Storage path ${STORAGE_PATH} missing. Exiting.\n" + exit 1 + fi +done # Build folder structure printf '%s\0' "${STORAGE_PATHS[@]}" | \ xargs -0 -n 1 bash -c 'find "$0" -type d -printf "%P\0"' | \ sort --unique --zero-terminated | \ - parallel -0 mkdir -p "${GREEN_PATH}"/{} + parallel -0 mkdir -p "${GREEN_PATH}/{}" # Build file structure using soft links link_file () { - echo "\$1 $1" - echo "\$2 $2" - echo "\$3 $3" - echo "Soft linked to ${1/$2/$3}" - /usr/bin/ln -s "$1" "${1/$2/$3}" + /usr/bin/ln -s "$1" "${1/$2/$3}" || echo failed } export -f link_file -for STORAGE_PATH in "${STORAGE_PATHS[@]}" +for ((i=0 ; i<${STORAGE_PATHS_LEN}; i++)) do + STORAGE_PATH=${STORAGE_PATHS[$i]} find "${STORAGE_PATH}" -type f -print0 | \ sort --unique --zero-terminated | \ parallel -0 link_file "{}" "${STORAGE_PATH}" "${GREEN_PATH}" done -# Swap blue/green +# Move green to blue, and blue to red rm -rf "${RED_PATH}" && \ mv "${BLUE_PATH}" "${RED_PATH}" && \ mv "${GREEN_PATH}" "${BLUE_PATH}" diff --git a/install-files/default-config.ini b/install-files/default-config.ini new file mode 100644 index 0000000..d478cad --- /dev/null +++ b/install-files/default-config.ini @@ -0,0 +1 @@ +MAP_ROOT_DIRECTORY=./mapped-storage diff --git a/install-files/movement.json b/install-files/movement.json new file mode 100644 index 0000000..cbb52c8 --- /dev/null +++ b/install-files/movement.json @@ -0,0 +1,19 @@ +{ + "movements": [ + { + "sourcePath": "./hdd", + "destinationPath": "./s3-bucket", + "amins": 1 + }, + { + "sourcePath": "./ssd", + "destinationPath": "./s3-bucket", + "amins": 1 + }, + { + "sourcePath": "./s3-bucket", + "destinationPath": "./backblaze-b2", + "amins": 1 + } + ] +} diff --git a/movement.json b/movement.json deleted file mode 100644 index a4f1ddb..0000000 --- a/movement.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "movements": [ - { - "sourcePath": "./source_path1", - "destinationPath": "./dest_path1", - "amins": 1 - }, - { - "sourcePath": "./source_path2", - "destinationPath": "./dest_path2", - "amins": 1 - } - ] -} diff --git a/screenshots/after-file-o-bot.jpg b/screenshots/after-file-o-bot.jpg new file mode 100644 index 0000000..4cbcf95 Binary files /dev/null and b/screenshots/after-file-o-bot.jpg differ diff --git a/screenshots/after-movement-2.jpg b/screenshots/after-movement-2.jpg new file mode 100644 index 0000000..5c3d932 Binary files /dev/null and b/screenshots/after-movement-2.jpg differ diff --git a/screenshots/after-movement-3.jpg b/screenshots/after-movement-3.jpg new file mode 100644 index 0000000..83d5e71 Binary files /dev/null and b/screenshots/after-movement-3.jpg differ diff --git a/screenshots/before-file-o-bot.jpg b/screenshots/before-file-o-bot.jpg new file mode 100644 index 0000000..3a8c624 Binary files /dev/null and b/screenshots/before-file-o-bot.jpg differ