diff options
author | Case Duckworth | 2020-05-28 08:37:40 -0500 |
---|---|---|
committer | Case Duckworth | 2020-05-28 08:37:40 -0500 |
commit | 2e6b42e5c1e00d946691a0b40f64be1091338519 (patch) | |
tree | f837d1a91228815d31f98e10c3f0c4fdd4a9ee5c | |
parent | Change typesetting (diff) | |
download | bollux-2e6b42e5c1e00d946691a0b40f64be1091338519.tar.gz bollux-2e6b42e5c1e00d946691a0b40f64be1091338519.zip |
Start testing transform_uri
-rw-r--r-- | transform_uri.sh | 157 |
1 files changed, 157 insertions, 0 deletions
diff --git a/transform_uri.sh b/transform_uri.sh new file mode 100644 index 0000000..e9c9fc9 --- /dev/null +++ b/transform_uri.sh | |||
@@ -0,0 +1,157 @@ | |||
1 | #!/usr/bin/env bash | ||
2 | # transform-url | ||
3 | # cf. https://tools.ietf.org/html/rfc3986#section-5 and | ||
4 | # cf. https://tools.ietf.org/html/rfc3986#section-5.1 | ||
5 | # cf. also https://tools.ietf.org/html/rfc3986#appendix-B -- regex | ||
6 | |||
7 | # TEST WITH https://tools.ietf.org/html/rfc3986#section-5.4 | ||
8 | |||
9 | transform_resource() { # 5.2.2 | ||
10 | declare -A R B T # reference, base url, target | ||
11 | eval "$(parse_url R "$2")" # XXX CHANGE | ||
12 | eval "$(parse_url B "$1")" | ||
13 | # Basically going to follow the pseudocode in the spec. | ||
14 | # the '+x' bit after the fields of the arrays tests if they're set | ||
15 | if [[ "${R['scheme']+x}" ]]; then | ||
16 | T['scheme']="${R['scheme']}" | ||
17 | T['authority']="${R['authority']}" | ||
18 | T['path']="$(remove_dot_segments "${R['path']}")" | ||
19 | T['query']="${R['query']}" | ||
20 | else | ||
21 | if [[ "${R['authority']+x}" ]]; then | ||
22 | T['authority']="${R['authority']}" | ||
23 | T['path']="$(remove_dot_segments "${R['path']}")" | ||
24 | T['query']="${R['query']}" | ||
25 | else | ||
26 | if [[ "${R['path']-x}" == "" ]]; then | ||
27 | T['path']="${B['path']}" | ||
28 | if [[ "${R['query']-x}" ]]; then | ||
29 | T['query']="${R['query']}" | ||
30 | else | ||
31 | T['query']="${B['query']}" | ||
32 | fi | ||
33 | else | ||
34 | if [[ "${R['path']}" == /* ]]; then | ||
35 | T['path']="$(remove_dot_segments "${R['path']}")" | ||
36 | else | ||
37 | T['path']="$(merge "${B['authority']-?}" \ | ||
38 | "${B['path']}" "${R['path']}")" | ||
39 | T['path']="$(remove_dot_segments "${T['path']}")" | ||
40 | fi | ||
41 | T['query']="${R['query']}" | ||
42 | fi | ||
43 | T['authority']="${B['authority']}" | ||
44 | fi | ||
45 | T['scheme']="${B['scheme']}" | ||
46 | fi | ||
47 | T['fragment']="${R['fragment']}" | ||
48 | # 5.3 -- recomposition | ||
49 | local r="" | ||
50 | [[ "${T['scheme']-x}" ]] && | ||
51 | r="$r${T['scheme']}:" | ||
52 | [[ "${T['authority']-x}" ]] && | ||
53 | r="$r//${T['authority']}" | ||
54 | r="$r${T['path']}" | ||
55 | [[ "${T['query']-x}" ]] && | ||
56 | r="$r?${T['query']}" | ||
57 | [[ "${T['fragment']-x}" ]] && | ||
58 | r="$r#${T['fragment']}" | ||
59 | printf '%s\n' "$r" | ||
60 | } | ||
61 | |||
62 | merge() { # 5.2.3 | ||
63 | #>If the base URI has a defined authority component and an empty | ||
64 | #>path, then return a string consisting of "/" concatenated with the | ||
65 | #>reference's path; otherwise, | ||
66 | #>return a string consisting of the reference's path component | ||
67 | #>appended to all but the last segment of the base URI's path (i.e., | ||
68 | #>excluding any characters after the right-most "/" in the base URI | ||
69 | #>path, or excluding the entire base URI path if it does not contain | ||
70 | #>any "/" characters). | ||
71 | B_authority="$1" # if ? is here, it means undefined (see caller) | ||
72 | B_path="$2" | ||
73 | R_path="$3" | ||
74 | if [[ -z "$R_path" ]]; then | ||
75 | printf '%q\n' "$B_path" | | ||
76 | sed 's,//,/,g' # XXX is this okay....? | ||
77 | return | ||
78 | fi | ||
79 | |||
80 | if [[ "${B_authority:-?}" != "?" && "${B_path-x}" == "" ]]; then | ||
81 | printf '/%q\n' "$R_path" | ||
82 | else | ||
83 | if [[ "$B_path" == */* ]]; then | ||
84 | B_path="${B_path%/*}/" | ||
85 | else | ||
86 | B_path="" | ||
87 | fi | ||
88 | printf '%q/%q\n' "$B_path" "$R_path" # XXX - %q vs %s | ||
89 | fi | ||
90 | } | ||
91 | |||
92 | # I can probably just use normalize_path already in bollux here | ||
93 | remove_dot_segments() { # 5.2.4 | ||
94 | local input="$1" | ||
95 | local output= | ||
96 | while [[ -n "$input" ]]; do | ||
97 | if [[ "$input" == ../* || "$input" == ./* ]]; then | ||
98 | input="${input#*/}" | ||
99 | elif [[ "$input" == /./* ]]; then | ||
100 | input="${input#/./}/" | ||
101 | elif [[ "$input" == /.* ]]; then | ||
102 | input="${input#/.}/b" | ||
103 | elif [[ "$input" == /../* ]]; then | ||
104 | input="${input#/../}/c" | ||
105 | output="${output%/*}" | ||
106 | elif [[ "$input" == /..* ]]; then | ||
107 | input="${input#/..}/d" | ||
108 | output="${output%/*}" | ||
109 | elif [[ "$input" == . || "$input" == .. ]]; then | ||
110 | input= | ||
111 | else | ||
112 | # move the first path segment in the input buffer to the end of | ||
113 | # the output buffer, including the initial "/" character (if | ||
114 | # any) and any subsequent characters up to, but not including, | ||
115 | # the next "/" character or the end of the input buffer. | ||
116 | [[ $input =~ ^(/?[^/]*)(/?.*)$ ]] || echo NOMATCH >&2 | ||
117 | output="$output${BASH_REMATCH[1]}" | ||
118 | input="${BASH_REMATCH[2]}" | ||
119 | fi | ||
120 | done | ||
121 | printf '%s\n' "$output" | | ||
122 | sed 's,//,/,g' # XXX is this okay....? | ||
123 | } | ||
124 | |||
125 | # *FINDING* URLS ... IN PURE BASH !!! | ||
126 | parse_url() { # eval "$(split_url NAME STRING)" => NAME[...] | ||
127 | local name="$1" | ||
128 | local string="$2" | ||
129 | local re='^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?' | ||
130 | [[ $string =~ $re ]] || return $? | ||
131 | |||
132 | local scheme="${BASH_REMATCH[2]}" | ||
133 | local authority="${BASH_REMATCH[4]}" | ||
134 | local path="${BASH_REMATCH[5]}" | ||
135 | local query="${BASH_REMATCH[7]}" | ||
136 | local fragment="${BASH_REMATCH[9]}" | ||
137 | |||
138 | for c in scheme authority path query fragment; do | ||
139 | [[ "${!c}" ]] && | ||
140 | printf '%s[%s]=%s\n' "$name" "$c" "${!c}" | | ||
141 | sed 's/[\|&;()<>]/\\&/g' # quote shell metacharacters | ||
142 | done | ||
143 | } | ||
144 | |||
145 | # ease-of-life functions | ||
146 | isdefined() { # isdefined NAME => tests if NAME is defined ONLY | ||
147 | [[ "${!1+x}" ]] | ||
148 | } | ||
149 | isempty() { # isempty NAME => tests if NAME is empty ONLY | ||
150 | [[ ! "${!1-x}" ]] | ||
151 | } | ||
152 | |||
153 | set -x | ||
154 | transform_resource "$@" | ||
155 | |||
156 | # NEXT .... | ||
157 | # NORMALIZATION !!! | ||