More files for initial work

This commit is contained in:
2024-05-28 23:51:40 +08:00
parent 1ff500cdfb
commit aa8529e5dc
11 changed files with 61 additions and 40 deletions

View File

@ -4,8 +4,24 @@
You have file objects in separate forms of storage (internal SSD, SFTP, S3, etc), but want to have a centralized directory to access all those files. You want to be able to shift these files across these storage solutions automatically.
### Hello file-o-bot
This script creates a folder with a huge list of soft links to every object across your storage mediums. You can also define "lifetimes" for each object in each form of storage such that it is slowly "downgraded" in storage priority as you.
This script creates a folder with a huge list of soft links to every object across your storage mediums. You can also define "lifetimes" for each object in each form of storage such that it is slowly "downgraded" in storage priority as it becomes less and less accessed. This allows you to move those files transparently from faster storage to cheaper archive storage.
file-o-bot is inspired by the Autoclass feature of Google Cloud Storage buckets. file-o-bot is merely the redneck version of it.
file-o-bot is inspired by the Autoclass feature of Google Cloud Storage buckets. file-o-bot is merely a redneck version of it.
###
### FAQ
#### What happens if files/folder structures overlap across my storage mediums?
Movements still apply to the files.
Storage paths are sorted via the `sort` utility and the preceding paths have higher priority over subsequent ones. All storage paths are "merged"
For overlapping files, the file in the highest priority storage would be chosen to be soft-linked.
#### What kind of file protections do I have when executing movements?
None. The files are transferred using rsync but that's about it.
#### This is madness.
Yes.

View File

@ -1,3 +0,0 @@
MAP_ROOT_DIRECTORY=./root-directory
SOURCE_FILE_PATH=./source-path
SOFT_LINK_PATH=./soft-links

View File

@ -1,9 +0,0 @@
CREATE TABLE files (
source_file_name TEXT NOT NULL,
source_file_path TEXT NOT NULL,
source_path TEXT NOT NULL
);
COPY
(SELECT * FROM files)
TO 'file-o-bot.db'
(FORMAT 'parquet', CODEC 'zstd');

View File

@ -10,7 +10,6 @@ DEFAULT_CONFIG_PATH=.
# Read custom configs from custom config file
. "${DEFAULT_CONFIG_PATH}"/config.d/*.ini
# For each lifecycle rule, execute a movement using rsync
start_movement () {
SOURCE_PATH=$1
DESTINATION_PATH=$2
@ -24,15 +23,20 @@ start_movement () {
--inplace \
--archive \
--mkpath \
--remove-source-files \
--files-from=- \
"$SOURCE_PATH" \
"$DESTINATION_PATH"
}
# Check for movements file
MOVEMENT_JSON_PATH=$( readlink -f "${DEFAULT_CONFIG_PATH}/movement.json" )
if ! [[ -f ${MOVEMENT_JSON_PATH} ]]; then
echo "Missing movement file. Exiting."
exit 1
fi
# For each lifecycle rule, execute a movement using rsync
RULE_LENGTH=$( jq '.movements | length' "${MOVEMENT_JSON_PATH}" )
for (( i=0; i<$RULE_LENGTH ; i++ ))
do
@ -44,13 +48,12 @@ done
# Run checks and build soft links
if ! [[ -d ${MAP_ROOT_DIRECTORY} ]]; then
echo "Missing root directory"
echo "Missing map root directory"
exit 1
fi
GREEN_PATH=$( readlink -f "${MAP_ROOT_DIRECTORY}"/green )
RED_PATH=$( readlink -f "${MAP_ROOT_DIRECTORY}"/red )
BLUE_PATH=$( readlink -f "${MAP_ROOT_DIRECTORY}"/blue )
export GREEN_PATH
if ! [[ -d ${GREEN_PATH} ]]; then
mkdir "${GREEN_PATH}"
fi
@ -58,30 +61,38 @@ fi
# Compile storage paths from all the movements
readarray -t STORAGE_PATHS \
< <( jq -r '.movements[] | .sourcePath, .destinationPath' "${MOVEMENT_JSON_PATH}" | xargs -n 1 -I {} readlink -e "{}" | sort --unique )
STORAGE_PATHS_LEN=${#STORAGE_PATHS[@]}
# Verify that all storage paths work
for ((i=0 ; i<${STORAGE_PATHS_LEN}; i++))
do
STORAGE_PATH=${STORAGE_PATHS[$i]}
if ! [[ -d $STORAGE_PATH ]]; then
printf "Storage path ${STORAGE_PATH} missing. Exiting.\n"
exit 1
fi
done
# Build folder structure
printf '%s\0' "${STORAGE_PATHS[@]}" | \
xargs -0 -n 1 bash -c 'find "$0" -type d -printf "%P\0"' | \
sort --unique --zero-terminated | \
parallel -0 mkdir -p "${GREEN_PATH}"/{}
parallel -0 mkdir -p "${GREEN_PATH}/{}"
# Build file structure using soft links
link_file () {
echo "\$1 $1"
echo "\$2 $2"
echo "\$3 $3"
echo "Soft linked to ${1/$2/$3}"
/usr/bin/ln -s "$1" "${1/$2/$3}"
/usr/bin/ln -s "$1" "${1/$2/$3}" || echo failed
}
export -f link_file
for STORAGE_PATH in "${STORAGE_PATHS[@]}"
for ((i=0 ; i<${STORAGE_PATHS_LEN}; i++))
do
STORAGE_PATH=${STORAGE_PATHS[$i]}
find "${STORAGE_PATH}" -type f -print0 | \
sort --unique --zero-terminated | \
parallel -0 link_file "{}" "${STORAGE_PATH}" "${GREEN_PATH}"
done
# Swap blue/green
# Move green to blue, and blue to red
rm -rf "${RED_PATH}" && \
mv "${BLUE_PATH}" "${RED_PATH}" && \
mv "${GREEN_PATH}" "${BLUE_PATH}"

View File

@ -0,0 +1 @@
MAP_ROOT_DIRECTORY=./mapped-storage

View File

@ -0,0 +1,19 @@
{
"movements": [
{
"sourcePath": "./hdd",
"destinationPath": "./s3-bucket",
"amins": 1
},
{
"sourcePath": "./ssd",
"destinationPath": "./s3-bucket",
"amins": 1
},
{
"sourcePath": "./s3-bucket",
"destinationPath": "./backblaze-b2",
"amins": 1
}
]
}

View File

@ -1,14 +0,0 @@
{
"movements": [
{
"sourcePath": "./source_path1",
"destinationPath": "./dest_path1",
"amins": 1
},
{
"sourcePath": "./source_path2",
"destinationPath": "./dest_path2",
"amins": 1
}
]
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 61 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 65 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 67 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 23 KiB