18
18
import time
19
19
from collections import OrderedDict
20
20
from multiprocessing import Process
21
+ # pylint: disable=E0401
22
+ from packaging import version as packaging_version # type: ignore
21
23
22
24
RLEC_CONTAINER_NAME = "redis-enterprise-node"
23
25
OPERATOR_LABEL = "app=redis-enterprise"
24
26
MODE_RESTRICTED = "restricted"
25
27
MODE_ALL = "all"
28
+ FIRST_VERSION_SUPPORTING_RESTRICTED = "6.2.18-3"
26
29
27
30
RS_LOG_FOLDER_PATH = "/var/opt/redislabs/log"
28
31
logger = logging .getLogger (__name__ )
29
- VERSION_LOG_COLLECTOR = "6.2.18-3 "
32
+ VERSION_LOG_COLLECTOR = "6.2.18-3a "
30
33
31
34
TIME_FORMAT = time .strftime ("%Y%m%d-%H%M%S" )
32
35
39
42
DEFAULT_K8S_CLI = "kubectl"
40
43
OC_K8S_CLI = "oc"
41
44
42
- API_RESOURCES = [
45
+ RESTRICTED_MODE_API_RESOURCES = [
43
46
"RedisEnterpriseCluster" ,
44
47
"RedisEnterpriseDatabase" ,
45
48
"RedisEnterpriseRemoteCluster" ,
54
57
"RoleBinding" ,
55
58
"PersistentVolume" ,
56
59
"PersistentVolumeClaim" ,
57
- "Node" ,
58
60
"PodDisruptionBudget" ,
59
- "ResourceQuota" ,
60
61
"Endpoints" ,
61
62
"Pod" ,
62
- "NetworkPolicy" ,
63
63
"CustomResourceDefinition" ,
64
- "CertificateSigningRequest" ,
65
64
"ValidatingWebhookConfiguration" ,
66
65
"NamespacedValidatingType" ,
67
66
"NamespacedValidatingRule" ,
67
+ "PodSecurityPolicy"
68
+ ]
69
+
70
+ ALL_ONLY_API_RESOURCES = [
71
+ "Node" ,
72
+ "ResourceQuota" ,
73
+ "NetworkPolicy" ,
74
+ "CertificateSigningRequest" ,
68
75
"ClusterRole" ,
69
76
"ClusterRoleBinding" ,
70
77
"ClusterServiceVersion" ,
71
78
"Subscription" ,
72
79
"InstallPlan" ,
73
80
"CatalogSource" ,
74
- "PodSecurityPolicy" ,
75
81
"ReplicaSet" ,
76
- "StorageClass" ,
82
+ "StorageClass"
77
83
]
78
84
79
85
@@ -135,7 +141,9 @@ def _get_namespace_from_config():
135
141
return existing_namespaces
136
142
137
143
138
- def collect_from_ns (namespace , output_dir , logs_from_all_pods = False , k8s_cli_input = "" , mode = MODE_RESTRICTED ):
144
+ # pylint: disable=R0913
145
+ def collect_from_ns (namespace , output_dir , api_resources , logs_from_all_pods = False , k8s_cli_input = "" ,
146
+ mode = MODE_RESTRICTED ):
139
147
"Collect the context of a specific namespace. Typically runs in parallel processes."
140
148
k8s_cli = detect_k8s_cli (k8s_cli_input )
141
149
logger .info ("Started collecting from namespace '%s'" , namespace )
@@ -151,8 +159,8 @@ def collect_from_ns(namespace, output_dir, logs_from_all_pods=False, k8s_cli_inp
151
159
collect_pod_rs_logs (namespace , ns_output_dir , k8s_cli , mode )
152
160
collect_resources_list (namespace , ns_output_dir , k8s_cli , mode )
153
161
collect_events (namespace , ns_output_dir , k8s_cli , mode )
154
- collect_api_resources (namespace , ns_output_dir , k8s_cli , selector )
155
- collect_api_resources_description (namespace , ns_output_dir , k8s_cli , selector )
162
+ collect_api_resources (namespace , ns_output_dir , k8s_cli , api_resources , selector )
163
+ collect_api_resources_description (namespace , ns_output_dir , k8s_cli , api_resources , selector )
156
164
collect_pods_logs (namespace , ns_output_dir , k8s_cli , logs_from_all_pods )
157
165
158
166
@@ -188,11 +196,10 @@ def detect_k8s_cli(k8s_cli_input=""):
188
196
return DEFAULT_K8S_CLI
189
197
190
198
191
- def compare_versions (k8s_cli , namespaces ):
199
+ def get_operator_version (k8s_cli , namespaces ):
192
200
"""
193
201
Compare operator version with the current log_collector version.
194
202
"""
195
- operator_version = ""
196
203
for namespace in namespaces :
197
204
cmd = "{} get deployment redis-enterprise-operator -o jsonpath=" \
198
205
"\" {{.spec.template.spec.containers[0].image}}\" -n {}" .format (k8s_cli , namespace )
@@ -203,51 +210,87 @@ def compare_versions(k8s_cli, namespaces):
203
210
if len (operator_version ) == 2 :
204
211
logger .info ("running with operator version: %s and log collector version: %s" ,
205
212
operator_version [1 ], VERSION_LOG_COLLECTOR )
206
- return operator_version [1 ] == VERSION_LOG_COLLECTOR
207
- logger .info ("could not find operator version - running log collector " )
208
- return True
213
+ return operator_version [1 ]
214
+ logger .info ("could not find operator version" )
215
+ return ""
209
216
210
217
211
- def run (namespace_input , output_dir , logs_from_all_pods = False , k8s_cli_input = "" , # pylint: disable=R0913
212
- mode = MODE_RESTRICTED ,
213
- skip_version_check = False ):
218
+ def validate_mode (mode , operator_version ):
219
+ """
220
+ for old versions there is no way to use restricted because resources are missing labels
221
+ """
222
+ if mode == MODE_RESTRICTED and packaging_version .parse (operator_version ) < packaging_version .parse (
223
+ FIRST_VERSION_SUPPORTING_RESTRICTED ):
224
+ raise ValueError ("{} is not supported for this version, please use {}" .format (MODE_RESTRICTED , MODE_ALL ))
225
+
226
+
227
+ def determine_default_mode (operator_version ):
228
+ """
229
+ check the version of the operator (if it is running)
230
+ the default mode is ALL before 6.2.18 and RESTRICTED afterwards
231
+ """
232
+ if operator_version == "" or packaging_version .parse (operator_version ) >= packaging_version .parse (
233
+ FIRST_VERSION_SUPPORTING_RESTRICTED ):
234
+ return MODE_RESTRICTED
235
+
236
+ return MODE_ALL
237
+
238
+
239
+ def run (results ):
214
240
"""
215
241
Collect logs
216
242
"""
243
+ logger .info ("Started Redis Enterprise k8s log collector" )
244
+
217
245
start_time = time .time ()
218
- k8s_cli = detect_k8s_cli (k8s_cli_input )
246
+ k8s_cli = detect_k8s_cli (results .k8s_cli )
247
+ namespace_input = results .namespace
248
+
219
249
namespaces = _get_namespaces_to_run_on (namespace_input , k8s_cli )
250
+ logs_from_all_pods = results .logs_from_all_pods
251
+
252
+ # pylint: disable=global-statement, invalid-name
253
+ global TIMEOUT
254
+ # pylint: disable=locally-disabled, invalid-name
255
+ TIMEOUT = results .timeout
256
+
257
+ mode = results .mode
258
+ operator_version = get_operator_version (k8s_cli , namespaces )
259
+ if mode :
260
+ validate_mode (mode , operator_version )
261
+ else :
262
+ mode = determine_default_mode (operator_version )
263
+
264
+ api_resources = RESTRICTED_MODE_API_RESOURCES
265
+ if mode == MODE_ALL :
266
+ api_resources = api_resources + ALL_ONLY_API_RESOURCES
220
267
221
268
output_file_name = "redis_enterprise_k8s_debug_info_{}" .format (TIME_FORMAT )
269
+ output_dir = results .output_dir
222
270
if not output_dir :
223
- # if not specified, use cwd
224
271
output_dir = os .getcwd ()
225
272
output_dir = os .path .join (output_dir , output_file_name )
226
273
make_dir (output_dir )
227
274
collect_cluster_info (output_dir , k8s_cli )
228
275
229
- if not skip_version_check :
230
- if not compare_versions (k8s_cli , namespaces ):
231
- logger .info ("Log collector version is not compatible with current operator version."
232
- "Stopping Redis Enterprise log collector" )
233
- return
234
276
processes = []
235
277
for namespace in namespaces :
236
- proc = Process (target = collect_from_ns , args = [namespace , output_dir , logs_from_all_pods , k8s_cli_input , mode ])
278
+ proc = Process (target = collect_from_ns ,
279
+ args = [namespace , output_dir , api_resources , logs_from_all_pods , k8s_cli , mode ])
237
280
proc .start ()
238
281
processes .append (proc )
239
282
240
283
for proc in processes :
241
284
proc .join ()
242
285
243
- create_collection_report (output_dir , output_file_name , k8s_cli , namespaces , start_time )
286
+ create_collection_report (output_dir , output_file_name , k8s_cli , namespaces , start_time , mode )
244
287
245
288
archive_files (output_dir , output_file_name )
246
289
logger .info ("Finished Redis Enterprise log collector" )
247
290
logger .info ("--- Run time: %d minutes ---" , round (((time .time () - start_time ) / 60 ), 3 ))
248
291
249
292
250
- def create_collection_report (output_dir , output_file_name , k8s_cli , namespaces , start_time ):
293
+ def create_collection_report (output_dir , output_file_name , k8s_cli , namespaces , start_time , mode ):
251
294
"""
252
295
create a file with some data about the collection
253
296
"""
@@ -257,7 +300,9 @@ def create_collection_report(output_dir, output_file_name, k8s_cli, namespaces,
257
300
"output_file_name" : output_file_name ,
258
301
"k8s_cli" : k8s_cli ,
259
302
"namespaces" : namespaces ,
260
- "start_time" : start_time
303
+ "start_time" : start_time ,
304
+ "mode" : mode ,
305
+ "log_collector_version" : VERSION_LOG_COLLECTOR
261
306
}, output_fh )
262
307
263
308
@@ -372,8 +417,8 @@ def create_debug_info_package_on_pod(namespace, pod_name, attempt, k8s_cli):
372
417
return None
373
418
374
419
375
- def download_debug_info_package_from_pod ( # pylint: disable=R0913
376
- namespace , output_dir , pod_name , attempt , k8s_cli , debug_file_path , debug_file_name
420
+ def download_debug_info_package_from_pod ( # pylint: disable=R0913
421
+ namespace , output_dir , pod_name , attempt , k8s_cli , debug_file_path , debug_file_name
377
422
):
378
423
"""
379
424
This function attempt to download debug info package from a given pod.
@@ -397,7 +442,7 @@ def download_debug_info_package_from_pod( # pylint: disable=R0913
397
442
398
443
399
444
def create_and_download_debug_info_package_from_pod (
400
- namespace , pod_name , output_dir , k8s_cli
445
+ namespace , pod_name , output_dir , k8s_cli
401
446
):
402
447
"""
403
448
This function attempts to create a debug info package on a pod and if debug
@@ -419,7 +464,7 @@ def create_and_download_debug_info_package_from_pod(
419
464
(debug_info_path , debug_info_file_name ) = debug_info_path_and_name
420
465
for attempt in range (DEBUG_INFO_PACKAGE_RETRIES ):
421
466
if download_debug_info_package_from_pod (
422
- namespace , output_dir , pod_name , attempt + 1 , k8s_cli , debug_info_path , debug_info_file_name
467
+ namespace , output_dir , pod_name , attempt + 1 , k8s_cli , debug_info_path , debug_info_file_name
423
468
):
424
469
logger .info (
425
470
"Namespace '%s': Collected Redis Enterprise cluster debug package from pod: %s" ,
@@ -496,8 +541,7 @@ def collect_events(namespace, output_dir, k8s_cli, mode=MODE_RESTRICTED):
496
541
kubectl get event -o yaml
497
542
"""
498
543
if mode != MODE_ALL :
499
- logger .warning ("Cannot collect events when labels is specified - "
500
- "skipping events collection" )
544
+ logger .warning ('Cannot collect events in "restricted" mode - skipping events collection' )
501
545
return
502
546
# events need -n parameter in kubectl
503
547
if not namespace :
@@ -514,14 +558,14 @@ def collect_events(namespace, output_dir, k8s_cli, mode=MODE_RESTRICTED):
514
558
file_handle .write (output )
515
559
516
560
517
- def collect_api_resources (namespace , output_dir , k8s_cli , selector = "" ):
561
+ def collect_api_resources (namespace , output_dir , k8s_cli , api_resources , selector = "" ):
518
562
"""
519
563
Creates file for each of the API resources
520
564
with the output of kubectl get <resource> -o yaml
521
565
"""
522
566
logger .info ("Namespace '%s': Collecting API resources" , namespace )
523
567
resources_out = OrderedDict ()
524
- for resource in API_RESOURCES :
568
+ for resource in api_resources :
525
569
output = run_get_resource_yaml (namespace , resource , k8s_cli , selector )
526
570
if output :
527
571
resources_out [resource ] = output
@@ -535,14 +579,14 @@ def collect_api_resources(namespace, output_dir, k8s_cli, selector=""):
535
579
file_handle .write (out )
536
580
537
581
538
- def collect_api_resources_description (namespace , output_dir , k8s_cli , selector = "" ):
582
+ def collect_api_resources_description (namespace , output_dir , k8s_cli , api_resources , selector = "" ):
539
583
"""
540
584
Creates file for each of the API resources
541
585
with the output of kubectl describe <resource>
542
586
"""
543
587
logger .info ("Namespace '%s': Collecting API resources description" , namespace )
544
588
resources_out = OrderedDict ()
545
- for resource in API_RESOURCES :
589
+ for resource in api_resources :
546
590
output = describe_resource (namespace , resource , k8s_cli , selector )
547
591
if output :
548
592
resources_out [resource ] = output
@@ -936,6 +980,16 @@ def describe_resource(namespace, resource_type, k8s_cli, selector=""):
936
980
return run_shell_command_with_retries (cmd , KUBCTL_DESCRIBE_RETRIES , error_template )
937
981
938
982
983
+ def check_not_negative (value ):
984
+ """
985
+ Validate a numeric option is not negative
986
+ """
987
+ ivalue = int (value )
988
+ if ivalue < 0 :
989
+ raise argparse .ArgumentTypeError ("%s can't be less than 0" % value )
990
+ return ivalue
991
+
992
+
939
993
if __name__ == "__main__" :
940
994
logger .setLevel (logging .INFO )
941
995
logging .basicConfig (format = '%(asctime)s - %(levelname)s - %(message)s' )
@@ -951,7 +1005,7 @@ def describe_resource(namespace, resource_type, k8s_cli, selector=""):
951
1005
parser .add_argument ('-a' , '--logs_from_all_pods' , action = "store_true" ,
952
1006
help = "collect logs from all pods, not only the operator and pods run by the operator" )
953
1007
parser .add_argument ('-t' , '--timeout' , action = "store" ,
954
- type = int , default = TIMEOUT ,
1008
+ type = check_not_negative , default = TIMEOUT ,
955
1009
help = "time to wait for external commands to "
956
1010
"finish execution "
957
1011
"(default: 180s, specify 0 to not timeout) "
@@ -960,27 +1014,12 @@ def describe_resource(namespace, resource_type, k8s_cli, selector=""):
960
1014
help = "Which K8s cli client to use (kubectl/oc/auto-detect). "
961
1015
"Defaults to auto-detect (chooses between \" kubectl\" and \" oc\" ). "
962
1016
"Full paths can also be used." )
963
- parser .add_argument ('-m' , '--mode' , action = "store" , type = str , default = MODE_RESTRICTED ,
964
- help = "in which mode to run the log collector. The options are:"
965
- "1. all - collect all resources"
966
- "2. restricted (default) - collect only resources that are related to the operaotr,"
967
- " and has the label \" app=redis-enterprise\" . " )
968
- parser .add_argument ('--skip-version-check' , action = "store_true" ,
969
- help = "skip the version check" )
970
-
971
- # pylint: disable=locally-disabled, invalid-name
972
- results = parser .parse_args ()
973
-
974
- # pylint: disable=locally-disabled, invalid-name
975
- TIMEOUT = results .timeout
976
- if TIMEOUT < 0 :
977
- logger .error ("timeout can't be less than 0" )
978
- sys .exit (1 )
979
-
980
- logger .info ("Started Redis Enterprise k8s log collector" )
981
- if results .skip_version_check :
982
- logger .info ("skipping version check" )
983
- else :
984
- logger .info ("Checking version compatible" )
985
- run (results .namespace , results .output_dir , results .logs_from_all_pods , results .k8s_cli , results .mode ,
986
- results .skip_version_check )
1017
+ parser .add_argument ('-m' , '--mode' , action = "store" , type = str ,
1018
+ choices = [MODE_RESTRICTED , MODE_ALL ],
1019
+ help = "Which mode to run the log collector. The options are:"
1020
+ "1. restricted (default for clusters of version 6.2.18 and newer) - "
1021
+ "collect only resources that are related to the operator,"
1022
+ " and has the label \" app=redis-enterprise\" . "
1023
+ "2. all - collect all resources" )
1024
+
1025
+ run (parser .parse_args ())
0 commit comments