Initial commit

This commit is contained in:
2024-05-27 21:59:41 +08:00
parent 54a4e887c3
commit 1ff500cdfb
7 changed files with 136 additions and 0 deletions

7
Dockerfile Normal file
View File

@ -0,0 +1,7 @@
FROM debian:bookworm-slim
RUN apt update && apt upgrade -y && apt install rsync parallel jq
COPY . /app
WORKDIR /app
CMD bash /app/file-o-bot.sh

View File

@ -1,2 +1,11 @@
# file-o-bot
### Problem
You have file objects in separate forms of storage (internal SSD, SFTP, S3, etc), but want to have a centralized directory to access all those files. You want to be able to shift these files across these storage solutions automatically.
### Hello file-o-bot
This script creates a folder with a huge list of soft links to every object across your storage mediums. You can also define "lifetimes" for each object in each form of storage such that it is slowly "downgraded" in storage priority as you.
file-o-bot is inspired by the Autoclass feature of Google Cloud Storage buckets. file-o-bot is merely the redneck version of it.
###

3
default-config.ini Normal file
View File

@ -0,0 +1,3 @@
MAP_ROOT_DIRECTORY=./root-directory
SOURCE_FILE_PATH=./source-path
SOFT_LINK_PATH=./soft-links

9
duckdb-install.sql Normal file
View File

@ -0,0 +1,9 @@
CREATE TABLE files (
source_file_name TEXT NOT NULL,
source_file_path TEXT NOT NULL,
source_path TEXT NOT NULL
);
COPY
(SELECT * FROM files)
TO 'file-o-bot.db'
(FORMAT 'parquet', CODEC 'zstd');

87
file-o-bot.sh Normal file
View File

@ -0,0 +1,87 @@
#!/usr/bin/env bash
set -euo
DEFAULT_CONFIG_PATH=.
# Read default configs from default config file
. "${DEFAULT_CONFIG_PATH}"/default-config.ini
# Read custom configs from custom config file
. "${DEFAULT_CONFIG_PATH}"/config.d/*.ini
# For each lifecycle rule, execute a movement using rsync
start_movement () {
SOURCE_PATH=$1
DESTINATION_PATH=$2
AMINS=$3
find "$SOURCE_PATH" \
! -type l \
-type f \
-amin +"$AMINS" \
-printf "%P\0" | \
rsync -0 \
--inplace \
--archive \
--mkpath \
--files-from=- \
"$SOURCE_PATH" \
"$DESTINATION_PATH"
}
MOVEMENT_JSON_PATH=$( readlink -f "${DEFAULT_CONFIG_PATH}/movement.json" )
if ! [[ -f ${MOVEMENT_JSON_PATH} ]]; then
echo "Missing movement file. Exiting."
exit 1
fi
RULE_LENGTH=$( jq '.movements | length' "${MOVEMENT_JSON_PATH}" )
for (( i=0; i<$RULE_LENGTH ; i++ ))
do
SOURCE_PATH=$( jq -r ".movements[$i].sourcePath" "${MOVEMENT_JSON_PATH}" )
DESTINATION_PATH=$( jq -r ".movements[$i].destinationPath" "${MOVEMENT_JSON_PATH}" )
AMINS=$( jq -r ".movements[$i].amins" "${MOVEMENT_JSON_PATH}" )
start_movement "$SOURCE_PATH" "$DESTINATION_PATH" "$AMINS"
done
# Run checks and build soft links
if ! [[ -d ${MAP_ROOT_DIRECTORY} ]]; then
echo "Missing root directory"
exit 1
fi
GREEN_PATH=$( readlink -f "${MAP_ROOT_DIRECTORY}"/green )
RED_PATH=$( readlink -f "${MAP_ROOT_DIRECTORY}"/red )
BLUE_PATH=$( readlink -f "${MAP_ROOT_DIRECTORY}"/blue )
export GREEN_PATH
if ! [[ -d ${GREEN_PATH} ]]; then
mkdir "${GREEN_PATH}"
fi
# Compile storage paths from all the movements
readarray -t STORAGE_PATHS \
< <( jq -r '.movements[] | .sourcePath, .destinationPath' "${MOVEMENT_JSON_PATH}" | xargs -n 1 -I {} readlink -e "{}" | sort --unique )
# Build folder structure
printf '%s\0' "${STORAGE_PATHS[@]}" | \
xargs -0 -n 1 bash -c 'find "$0" -type d -printf "%P\0"' | \
sort --unique --zero-terminated | \
parallel -0 mkdir -p "${GREEN_PATH}"/{}
# Build file structure using soft links
link_file () {
echo "\$1 $1"
echo "\$2 $2"
echo "\$3 $3"
echo "Soft linked to ${1/$2/$3}"
/usr/bin/ln -s "$1" "${1/$2/$3}"
}
export -f link_file
for STORAGE_PATH in "${STORAGE_PATHS[@]}"
do
find "${STORAGE_PATH}" -type f -print0 | \
sort --unique --zero-terminated | \
parallel -0 link_file "{}" "${STORAGE_PATH}" "${GREEN_PATH}"
done
# Swap blue/green
rm -rf "${RED_PATH}" && \
mv "${BLUE_PATH}" "${RED_PATH}" && \
mv "${GREEN_PATH}" "${BLUE_PATH}"

7
install.sh Normal file
View File

@ -0,0 +1,7 @@
#!/usr/bin/env bash
set -euo
mkdir -p /etc/file-o-bot/config.d
cd ./install-files/
cp -r * /etc/file-o-bot/

14
movement.json Normal file
View File

@ -0,0 +1,14 @@
{
"movements": [
{
"sourcePath": "./source_path1",
"destinationPath": "./dest_path1",
"amins": 1
},
{
"sourcePath": "./source_path2",
"destinationPath": "./dest_path2",
"amins": 1
}
]
}