Skip to content

Commit c52aa0e

Browse files
authored
Merge pull request #252 from RedisLabs/yuvallevy2-fixlogc
Fix log collector mode logic
2 parents 9567271 + 11ce179 commit c52aa0e

File tree

1 file changed

+104
-65
lines changed

1 file changed

+104
-65
lines changed

log_collector/log_collector.py

100644100755
Lines changed: 104 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,18 @@
1818
import time
1919
from collections import OrderedDict
2020
from multiprocessing import Process
21+
# pylint: disable=E0401
22+
from packaging import version as packaging_version # type: ignore
2123

2224
RLEC_CONTAINER_NAME = "redis-enterprise-node"
2325
OPERATOR_LABEL = "app=redis-enterprise"
2426
MODE_RESTRICTED = "restricted"
2527
MODE_ALL = "all"
28+
FIRST_VERSION_SUPPORTING_RESTRICTED = "6.2.18-3"
2629

2730
RS_LOG_FOLDER_PATH = "/var/opt/redislabs/log"
2831
logger = logging.getLogger(__name__)
29-
VERSION_LOG_COLLECTOR = "6.2.18-3"
32+
VERSION_LOG_COLLECTOR = "6.2.18-3a"
3033

3134
TIME_FORMAT = time.strftime("%Y%m%d-%H%M%S")
3235

@@ -39,7 +42,7 @@
3942
DEFAULT_K8S_CLI = "kubectl"
4043
OC_K8S_CLI = "oc"
4144

42-
API_RESOURCES = [
45+
RESTRICTED_MODE_API_RESOURCES = [
4346
"RedisEnterpriseCluster",
4447
"RedisEnterpriseDatabase",
4548
"RedisEnterpriseRemoteCluster",
@@ -54,26 +57,29 @@
5457
"RoleBinding",
5558
"PersistentVolume",
5659
"PersistentVolumeClaim",
57-
"Node",
5860
"PodDisruptionBudget",
59-
"ResourceQuota",
6061
"Endpoints",
6162
"Pod",
62-
"NetworkPolicy",
6363
"CustomResourceDefinition",
64-
"CertificateSigningRequest",
6564
"ValidatingWebhookConfiguration",
6665
"NamespacedValidatingType",
6766
"NamespacedValidatingRule",
67+
"PodSecurityPolicy"
68+
]
69+
70+
ALL_ONLY_API_RESOURCES = [
71+
"Node",
72+
"ResourceQuota",
73+
"NetworkPolicy",
74+
"CertificateSigningRequest",
6875
"ClusterRole",
6976
"ClusterRoleBinding",
7077
"ClusterServiceVersion",
7178
"Subscription",
7279
"InstallPlan",
7380
"CatalogSource",
74-
"PodSecurityPolicy",
7581
"ReplicaSet",
76-
"StorageClass",
82+
"StorageClass"
7783
]
7884

7985

@@ -135,7 +141,9 @@ def _get_namespace_from_config():
135141
return existing_namespaces
136142

137143

138-
def collect_from_ns(namespace, output_dir, logs_from_all_pods=False, k8s_cli_input="", mode=MODE_RESTRICTED):
144+
# pylint: disable=R0913
145+
def collect_from_ns(namespace, output_dir, api_resources, logs_from_all_pods=False, k8s_cli_input="",
146+
mode=MODE_RESTRICTED):
139147
"Collect the context of a specific namespace. Typically runs in parallel processes."
140148
k8s_cli = detect_k8s_cli(k8s_cli_input)
141149
logger.info("Started collecting from namespace '%s'", namespace)
@@ -151,8 +159,8 @@ def collect_from_ns(namespace, output_dir, logs_from_all_pods=False, k8s_cli_inp
151159
collect_pod_rs_logs(namespace, ns_output_dir, k8s_cli, mode)
152160
collect_resources_list(namespace, ns_output_dir, k8s_cli, mode)
153161
collect_events(namespace, ns_output_dir, k8s_cli, mode)
154-
collect_api_resources(namespace, ns_output_dir, k8s_cli, selector)
155-
collect_api_resources_description(namespace, ns_output_dir, k8s_cli, selector)
162+
collect_api_resources(namespace, ns_output_dir, k8s_cli, api_resources, selector)
163+
collect_api_resources_description(namespace, ns_output_dir, k8s_cli, api_resources, selector)
156164
collect_pods_logs(namespace, ns_output_dir, k8s_cli, logs_from_all_pods)
157165

158166

@@ -188,11 +196,10 @@ def detect_k8s_cli(k8s_cli_input=""):
188196
return DEFAULT_K8S_CLI
189197

190198

191-
def compare_versions(k8s_cli, namespaces):
199+
def get_operator_version(k8s_cli, namespaces):
192200
"""
193201
Compare operator version with the current log_collector version.
194202
"""
195-
operator_version = ""
196203
for namespace in namespaces:
197204
cmd = "{} get deployment redis-enterprise-operator -o jsonpath=" \
198205
"\"{{.spec.template.spec.containers[0].image}}\" -n {}".format(k8s_cli, namespace)
@@ -203,51 +210,87 @@ def compare_versions(k8s_cli, namespaces):
203210
if len(operator_version) == 2:
204211
logger.info("running with operator version: %s and log collector version: %s",
205212
operator_version[1], VERSION_LOG_COLLECTOR)
206-
return operator_version[1] == VERSION_LOG_COLLECTOR
207-
logger.info("could not find operator version - running log collector")
208-
return True
213+
return operator_version[1]
214+
logger.info("could not find operator version")
215+
return ""
209216

210217

211-
def run(namespace_input, output_dir, logs_from_all_pods=False, k8s_cli_input="", # pylint: disable=R0913
212-
mode=MODE_RESTRICTED,
213-
skip_version_check=False):
218+
def validate_mode(mode, operator_version):
219+
"""
220+
for old versions there is no way to use restricted because resources are missing labels
221+
"""
222+
if mode == MODE_RESTRICTED and packaging_version.parse(operator_version) < packaging_version.parse(
223+
FIRST_VERSION_SUPPORTING_RESTRICTED):
224+
raise ValueError("{} is not supported for this version, please use {}".format(MODE_RESTRICTED, MODE_ALL))
225+
226+
227+
def determine_default_mode(operator_version):
228+
"""
229+
check the version of the operator (if it is running)
230+
the default mode is ALL before 6.2.18 and RESTRICTED afterwards
231+
"""
232+
if operator_version == "" or packaging_version.parse(operator_version) >= packaging_version.parse(
233+
FIRST_VERSION_SUPPORTING_RESTRICTED):
234+
return MODE_RESTRICTED
235+
236+
return MODE_ALL
237+
238+
239+
def run(results):
214240
"""
215241
Collect logs
216242
"""
243+
logger.info("Started Redis Enterprise k8s log collector")
244+
217245
start_time = time.time()
218-
k8s_cli = detect_k8s_cli(k8s_cli_input)
246+
k8s_cli = detect_k8s_cli(results.k8s_cli)
247+
namespace_input = results.namespace
248+
219249
namespaces = _get_namespaces_to_run_on(namespace_input, k8s_cli)
250+
logs_from_all_pods = results.logs_from_all_pods
251+
252+
# pylint: disable=global-statement, invalid-name
253+
global TIMEOUT
254+
# pylint: disable=locally-disabled, invalid-name
255+
TIMEOUT = results.timeout
256+
257+
mode = results.mode
258+
operator_version = get_operator_version(k8s_cli, namespaces)
259+
if mode:
260+
validate_mode(mode, operator_version)
261+
else:
262+
mode = determine_default_mode(operator_version)
263+
264+
api_resources = RESTRICTED_MODE_API_RESOURCES
265+
if mode == MODE_ALL:
266+
api_resources = api_resources + ALL_ONLY_API_RESOURCES
220267

221268
output_file_name = "redis_enterprise_k8s_debug_info_{}".format(TIME_FORMAT)
269+
output_dir = results.output_dir
222270
if not output_dir:
223-
# if not specified, use cwd
224271
output_dir = os.getcwd()
225272
output_dir = os.path.join(output_dir, output_file_name)
226273
make_dir(output_dir)
227274
collect_cluster_info(output_dir, k8s_cli)
228275

229-
if not skip_version_check:
230-
if not compare_versions(k8s_cli, namespaces):
231-
logger.info("Log collector version is not compatible with current operator version."
232-
"Stopping Redis Enterprise log collector")
233-
return
234276
processes = []
235277
for namespace in namespaces:
236-
proc = Process(target=collect_from_ns, args=[namespace, output_dir, logs_from_all_pods, k8s_cli_input, mode])
278+
proc = Process(target=collect_from_ns,
279+
args=[namespace, output_dir, api_resources, logs_from_all_pods, k8s_cli, mode])
237280
proc.start()
238281
processes.append(proc)
239282

240283
for proc in processes:
241284
proc.join()
242285

243-
create_collection_report(output_dir, output_file_name, k8s_cli, namespaces, start_time)
286+
create_collection_report(output_dir, output_file_name, k8s_cli, namespaces, start_time, mode)
244287

245288
archive_files(output_dir, output_file_name)
246289
logger.info("Finished Redis Enterprise log collector")
247290
logger.info("--- Run time: %d minutes ---", round(((time.time() - start_time) / 60), 3))
248291

249292

250-
def create_collection_report(output_dir, output_file_name, k8s_cli, namespaces, start_time):
293+
def create_collection_report(output_dir, output_file_name, k8s_cli, namespaces, start_time, mode):
251294
"""
252295
create a file with some data about the collection
253296
"""
@@ -257,7 +300,9 @@ def create_collection_report(output_dir, output_file_name, k8s_cli, namespaces,
257300
"output_file_name": output_file_name,
258301
"k8s_cli": k8s_cli,
259302
"namespaces": namespaces,
260-
"start_time": start_time
303+
"start_time": start_time,
304+
"mode": mode,
305+
"log_collector_version": VERSION_LOG_COLLECTOR
261306
}, output_fh)
262307

263308

@@ -372,8 +417,8 @@ def create_debug_info_package_on_pod(namespace, pod_name, attempt, k8s_cli):
372417
return None
373418

374419

375-
def download_debug_info_package_from_pod( # pylint: disable=R0913
376-
namespace, output_dir, pod_name, attempt, k8s_cli, debug_file_path, debug_file_name
420+
def download_debug_info_package_from_pod( # pylint: disable=R0913
421+
namespace, output_dir, pod_name, attempt, k8s_cli, debug_file_path, debug_file_name
377422
):
378423
"""
379424
This function attempt to download debug info package from a given pod.
@@ -397,7 +442,7 @@ def download_debug_info_package_from_pod( # pylint: disable=R0913
397442

398443

399444
def create_and_download_debug_info_package_from_pod(
400-
namespace, pod_name, output_dir, k8s_cli
445+
namespace, pod_name, output_dir, k8s_cli
401446
):
402447
"""
403448
This function attempts to create a debug info package on a pod and if debug
@@ -419,7 +464,7 @@ def create_and_download_debug_info_package_from_pod(
419464
(debug_info_path, debug_info_file_name) = debug_info_path_and_name
420465
for attempt in range(DEBUG_INFO_PACKAGE_RETRIES):
421466
if download_debug_info_package_from_pod(
422-
namespace, output_dir, pod_name, attempt + 1, k8s_cli, debug_info_path, debug_info_file_name
467+
namespace, output_dir, pod_name, attempt + 1, k8s_cli, debug_info_path, debug_info_file_name
423468
):
424469
logger.info(
425470
"Namespace '%s': Collected Redis Enterprise cluster debug package from pod: %s",
@@ -496,8 +541,7 @@ def collect_events(namespace, output_dir, k8s_cli, mode=MODE_RESTRICTED):
496541
kubectl get event -o yaml
497542
"""
498543
if mode != MODE_ALL:
499-
logger.warning("Cannot collect events when labels is specified - "
500-
"skipping events collection")
544+
logger.warning('Cannot collect events in "restricted" mode - skipping events collection')
501545
return
502546
# events need -n parameter in kubectl
503547
if not namespace:
@@ -514,14 +558,14 @@ def collect_events(namespace, output_dir, k8s_cli, mode=MODE_RESTRICTED):
514558
file_handle.write(output)
515559

516560

517-
def collect_api_resources(namespace, output_dir, k8s_cli, selector=""):
561+
def collect_api_resources(namespace, output_dir, k8s_cli, api_resources, selector=""):
518562
"""
519563
Creates file for each of the API resources
520564
with the output of kubectl get <resource> -o yaml
521565
"""
522566
logger.info("Namespace '%s': Collecting API resources", namespace)
523567
resources_out = OrderedDict()
524-
for resource in API_RESOURCES:
568+
for resource in api_resources:
525569
output = run_get_resource_yaml(namespace, resource, k8s_cli, selector)
526570
if output:
527571
resources_out[resource] = output
@@ -535,14 +579,14 @@ def collect_api_resources(namespace, output_dir, k8s_cli, selector=""):
535579
file_handle.write(out)
536580

537581

538-
def collect_api_resources_description(namespace, output_dir, k8s_cli, selector=""):
582+
def collect_api_resources_description(namespace, output_dir, k8s_cli, api_resources, selector=""):
539583
"""
540584
Creates file for each of the API resources
541585
with the output of kubectl describe <resource>
542586
"""
543587
logger.info("Namespace '%s': Collecting API resources description", namespace)
544588
resources_out = OrderedDict()
545-
for resource in API_RESOURCES:
589+
for resource in api_resources:
546590
output = describe_resource(namespace, resource, k8s_cli, selector)
547591
if output:
548592
resources_out[resource] = output
@@ -936,6 +980,16 @@ def describe_resource(namespace, resource_type, k8s_cli, selector=""):
936980
return run_shell_command_with_retries(cmd, KUBCTL_DESCRIBE_RETRIES, error_template)
937981

938982

983+
def check_not_negative(value):
984+
"""
985+
Validate a numeric option is not negative
986+
"""
987+
ivalue = int(value)
988+
if ivalue < 0:
989+
raise argparse.ArgumentTypeError("%s can't be less than 0" % value)
990+
return ivalue
991+
992+
939993
if __name__ == "__main__":
940994
logger.setLevel(logging.INFO)
941995
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s')
@@ -951,7 +1005,7 @@ def describe_resource(namespace, resource_type, k8s_cli, selector=""):
9511005
parser.add_argument('-a', '--logs_from_all_pods', action="store_true",
9521006
help="collect logs from all pods, not only the operator and pods run by the operator")
9531007
parser.add_argument('-t', '--timeout', action="store",
954-
type=int, default=TIMEOUT,
1008+
type=check_not_negative, default=TIMEOUT,
9551009
help="time to wait for external commands to "
9561010
"finish execution "
9571011
"(default: 180s, specify 0 to not timeout) "
@@ -960,27 +1014,12 @@ def describe_resource(namespace, resource_type, k8s_cli, selector=""):
9601014
help="Which K8s cli client to use (kubectl/oc/auto-detect). "
9611015
"Defaults to auto-detect (chooses between \"kubectl\" and \"oc\"). "
9621016
"Full paths can also be used.")
963-
parser.add_argument('-m', '--mode', action="store", type=str, default=MODE_RESTRICTED,
964-
help="in which mode to run the log collector. The options are:"
965-
"1. all - collect all resources"
966-
"2. restricted (default) - collect only resources that are related to the operaotr,"
967-
" and has the label \"app=redis-enterprise\". ")
968-
parser.add_argument('--skip-version-check', action="store_true",
969-
help="skip the version check")
970-
971-
# pylint: disable=locally-disabled, invalid-name
972-
results = parser.parse_args()
973-
974-
# pylint: disable=locally-disabled, invalid-name
975-
TIMEOUT = results.timeout
976-
if TIMEOUT < 0:
977-
logger.error("timeout can't be less than 0")
978-
sys.exit(1)
979-
980-
logger.info("Started Redis Enterprise k8s log collector")
981-
if results.skip_version_check:
982-
logger.info("skipping version check")
983-
else:
984-
logger.info("Checking version compatible")
985-
run(results.namespace, results.output_dir, results.logs_from_all_pods, results.k8s_cli, results.mode,
986-
results.skip_version_check)
1017+
parser.add_argument('-m', '--mode', action="store", type=str,
1018+
choices=[MODE_RESTRICTED, MODE_ALL],
1019+
help="Which mode to run the log collector. The options are:"
1020+
"1. restricted (default for clusters of version 6.2.18 and newer) - "
1021+
"collect only resources that are related to the operator,"
1022+
" and has the label \"app=redis-enterprise\". "
1023+
"2. all - collect all resources")
1024+
1025+
run(parser.parse_args())

0 commit comments

Comments
 (0)