blob: 3712828b76dd366c27b58b908b322f6eb2dd8d8e (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
|
#!/usr/bin/env bash
set -e
#
# You can use this script to mirror all GitHub repositories of org/user for safekeeping purposes.
# It's good idea to use some kind of backup with versioning ability, like the duplicity/duply.
# To make sure you can go back in time, if something you want from the repositories gets purged.
#
# Configuration:
#
# The root path is the base directory for all repositories and metadata:
# export ROOT_PATH="/opt/github-mirror"
#
# Namespace is sub-directory, so you can divide repositories of multiple orgs/users:
# export NAMESPACE="vyos"
#
# The GITHUB_SUBJECT sets the GitHub org/user you want to mirror and the GITHUB_KIND sets type of the subject:
# For the org:
# export GITHUB_KIND="org"
# export GITHUB_SUBJECT="vyos"
# For the user:
# export GITHUB_KIND="user"
# export GITHUB_SUBJECT="dd010101"
#
# This script will produce $ROOT_PATH/data and $ROOT_PATH/repos directories
# The data contains working data used by this script.
# The repos contains git mirrors divided by namespaces.
#
namespace="vyos"
namespace=${NAMESPACE:-$namespace}
rootPath="/opt/github-mirror"
rootPath=${ROOT_PATH:-$rootPath}
dataDir="$rootPath/data/$namespace"
reposDir="$rootPath/repos/$namespace"
changeTimestampPath="$dataDir/change-timestamp"
githubKind="org"
githubKind=${GITHUB_KIND:-$githubKind}
githubSubject="vyos"
githubSubject=${GITHUB_SUBJECT:-$githubSubject}
mkdir -p "$dataDir"
mkdir -p "$reposDir"
function formatDate {
date '+%Y-%m-%d %H:%M:%S' -d "@$1"
}
page=1
changeTimestamp=0
while [ $page -le 1000 ]
do
echo "Processing page $page"
path="$dataDir/repos-$page.json"
params="?per_page=50&page=$page"
curl -sS --fail-with-body "https://api.github.com/${githubKind}s/$githubSubject/repos$params" -o "$path"
emptyPage=true
while read -r item
do
gitUrl=$(jq -r '.clone_url' <<< "$item")
description=$(jq -r '.description' <<< "$item")
if [ "$description" == "null" ]; then
description=""
fi
directory=$(echo "$gitUrl" | grep -oP '([^/]+).git')
fullPath="$reposDir/$directory"
if [ -d "$fullPath" ]; then
echo "Updating $gitUrl in $fullPath"
git -C "$fullPath" remote update 2>&1
if [ $? -ne 0 ]; then
>&2 echo "ERROR: failed to 'git remote update' for $fullPath"
fi
else
echo "Cloning $gitUrl as $fullPath"
mkdir -p "$fullPath"
git -C "$fullPath" clone --mirror "$gitUrl" .
fi
echo "$description (mirror of $gitUrl)" > "$fullPath/description"
webInfoPath="$fullPath/info/web"
if [ ! -d "$webInfoPath" ]; then
mkdir -p "$webInfoPath"
fi
latestTimestamp=$(git -C "$fullPath" for-each-ref --sort=-committerdate refs/heads/ --format='%(refname) %(committerdate:raw)' | head -1 | cut -d ' ' -f2)
echo "$latestTimestamp" > "$webInfoPath/last-modified"
if [ "$changeTimestamp" -lt "$latestTimestamp" ]; then
changeTimestamp="$latestTimestamp"
fi
emptyPage=false
done < <(cat "$path" | jq -c '.[]')
if [ $emptyPage = true ]; then
echo "All done"
break
fi
page=$((page+1))
done
echo "$changeTimestamp" > "$changeTimestampPath"
echo "Latest change: $(formatDate "$changeTimestamp")"
|