From 1ff500cdfba9ffaa20e626e2e0d2664e83095109 Mon Sep 17 00:00:00 2001 From: Clement Date: Mon, 27 May 2024 21:59:41 +0800 Subject: [PATCH] Initial commit --- Dockerfile | 7 ++++ README.md | 9 +++++ default-config.ini | 3 ++ duckdb-install.sql | 9 +++++ file-o-bot.sh | 87 ++++++++++++++++++++++++++++++++++++++++++++++ install.sh | 7 ++++ movement.json | 14 ++++++++ 7 files changed, 136 insertions(+) create mode 100644 Dockerfile create mode 100644 default-config.ini create mode 100644 duckdb-install.sql create mode 100644 file-o-bot.sh create mode 100644 install.sh create mode 100644 movement.json diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..da0172c --- /dev/null +++ b/Dockerfile @@ -0,0 +1,7 @@ +FROM debian:bookworm-slim + +RUN apt update && apt upgrade -y && apt install rsync parallel jq +COPY . /app + +WORKDIR /app +CMD bash /app/file-o-bot.sh diff --git a/README.md b/README.md index 8639bd9..5411f76 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,11 @@ # file-o-bot +### Problem +You have file objects in separate forms of storage (internal SSD, SFTP, S3, etc), but want to have a centralized directory to access all those files. You want to be able to shift these files across these storage solutions automatically. + +### Hello file-o-bot +This script creates a folder with a huge list of soft links to every object across your storage mediums. You can also define "lifetimes" for each object in each form of storage such that it is slowly "downgraded" in storage priority as you. + +file-o-bot is inspired by the Autoclass feature of Google Cloud Storage buckets. file-o-bot is merely the redneck version of it. + +### diff --git a/default-config.ini b/default-config.ini new file mode 100644 index 0000000..2b86893 --- /dev/null +++ b/default-config.ini @@ -0,0 +1,3 @@ +MAP_ROOT_DIRECTORY=./root-directory +SOURCE_FILE_PATH=./source-path +SOFT_LINK_PATH=./soft-links diff --git a/duckdb-install.sql b/duckdb-install.sql new file mode 100644 index 0000000..d50579f --- /dev/null +++ b/duckdb-install.sql @@ -0,0 +1,9 @@ +CREATE TABLE files ( + source_file_name TEXT NOT NULL, + source_file_path TEXT NOT NULL, + source_path TEXT NOT NULL +); +COPY + (SELECT * FROM files) + TO 'file-o-bot.db' + (FORMAT 'parquet', CODEC 'zstd'); diff --git a/file-o-bot.sh b/file-o-bot.sh new file mode 100644 index 0000000..a890b05 --- /dev/null +++ b/file-o-bot.sh @@ -0,0 +1,87 @@ +#!/usr/bin/env bash + +set -euo + +DEFAULT_CONFIG_PATH=. + +# Read default configs from default config file +. "${DEFAULT_CONFIG_PATH}"/default-config.ini + +# Read custom configs from custom config file +. "${DEFAULT_CONFIG_PATH}"/config.d/*.ini + +# For each lifecycle rule, execute a movement using rsync +start_movement () { + SOURCE_PATH=$1 + DESTINATION_PATH=$2 + AMINS=$3 + find "$SOURCE_PATH" \ + ! -type l \ + -type f \ + -amin +"$AMINS" \ + -printf "%P\0" | \ + rsync -0 \ + --inplace \ + --archive \ + --mkpath \ + --files-from=- \ + "$SOURCE_PATH" \ + "$DESTINATION_PATH" +} +MOVEMENT_JSON_PATH=$( readlink -f "${DEFAULT_CONFIG_PATH}/movement.json" ) +if ! [[ -f ${MOVEMENT_JSON_PATH} ]]; then + echo "Missing movement file. Exiting." + exit 1 +fi +RULE_LENGTH=$( jq '.movements | length' "${MOVEMENT_JSON_PATH}" ) +for (( i=0; i<$RULE_LENGTH ; i++ )) +do + SOURCE_PATH=$( jq -r ".movements[$i].sourcePath" "${MOVEMENT_JSON_PATH}" ) + DESTINATION_PATH=$( jq -r ".movements[$i].destinationPath" "${MOVEMENT_JSON_PATH}" ) + AMINS=$( jq -r ".movements[$i].amins" "${MOVEMENT_JSON_PATH}" ) + start_movement "$SOURCE_PATH" "$DESTINATION_PATH" "$AMINS" +done + +# Run checks and build soft links +if ! [[ -d ${MAP_ROOT_DIRECTORY} ]]; then + echo "Missing root directory" + exit 1 +fi +GREEN_PATH=$( readlink -f "${MAP_ROOT_DIRECTORY}"/green ) +RED_PATH=$( readlink -f "${MAP_ROOT_DIRECTORY}"/red ) +BLUE_PATH=$( readlink -f "${MAP_ROOT_DIRECTORY}"/blue ) +export GREEN_PATH +if ! [[ -d ${GREEN_PATH} ]]; then + mkdir "${GREEN_PATH}" +fi + +# Compile storage paths from all the movements +readarray -t STORAGE_PATHS \ + < <( jq -r '.movements[] | .sourcePath, .destinationPath' "${MOVEMENT_JSON_PATH}" | xargs -n 1 -I {} readlink -e "{}" | sort --unique ) + +# Build folder structure +printf '%s\0' "${STORAGE_PATHS[@]}" | \ + xargs -0 -n 1 bash -c 'find "$0" -type d -printf "%P\0"' | \ + sort --unique --zero-terminated | \ + parallel -0 mkdir -p "${GREEN_PATH}"/{} + +# Build file structure using soft links +link_file () { + echo "\$1 $1" + echo "\$2 $2" + echo "\$3 $3" + echo "Soft linked to ${1/$2/$3}" + /usr/bin/ln -s "$1" "${1/$2/$3}" +} +export -f link_file +for STORAGE_PATH in "${STORAGE_PATHS[@]}" +do + find "${STORAGE_PATH}" -type f -print0 | \ + sort --unique --zero-terminated | \ + parallel -0 link_file "{}" "${STORAGE_PATH}" "${GREEN_PATH}" +done + +# Swap blue/green +rm -rf "${RED_PATH}" && \ +mv "${BLUE_PATH}" "${RED_PATH}" && \ +mv "${GREEN_PATH}" "${BLUE_PATH}" diff --git a/install.sh b/install.sh new file mode 100644 index 0000000..cded61f --- /dev/null +++ b/install.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +set -euo + +mkdir -p /etc/file-o-bot/config.d +cd ./install-files/ +cp -r * /etc/file-o-bot/ diff --git a/movement.json b/movement.json new file mode 100644 index 0000000..a4f1ddb --- /dev/null +++ b/movement.json @@ -0,0 +1,14 @@ +{ + "movements": [ + { + "sourcePath": "./source_path1", + "destinationPath": "./dest_path1", + "amins": 1 + }, + { + "sourcePath": "./source_path2", + "destinationPath": "./dest_path2", + "amins": 1 + } + ] +}