docker/models/algorithm.py (112 lines of code) (raw):

import hashlib import json from datetime import datetime from functools import cached_property from mongoengine import StringField, ListField, IntField, EnumField, \ EmbeddedDocument, BooleanField, EmbeddedDocumentField, DateTimeField from commons.enum import ListEnum from models.base_model import BaseModel, CloudEnum class QuotingEnum(ListEnum): QUOTE_MINIMAL = 0 QUOTE_ALL = 1 QUOTE_NONNUMERIC = 2 QUOTE_NONE = 3 class ShapeSorting(ListEnum): SORT_BY_PRICE = 'PRICE' SORT_BY_PERFORMANCE = 'PERFORMANCE' class ShapeCompatibilityRule(ListEnum): RULE_NONE = 'NONE' RULE_ONLY_SAME = 'SAME' RULE_ONLY_COMPATIBLE = 'COMPATIBLE' class AnalysisPriceEnum(ListEnum): DEFAULT = 'DEFAULT' CUSTOMER_MIN = 'CUSTOMER_MIN' CUSTOMER_MAX = 'CUSTOMER_MAX' CUSTOMER_AVG = 'CUSTOMER_AVG' class InterpMethodEnum(ListEnum): INTERP1D = 'interp1d' POLYMONIAL = 'polynomial' class KMeansInitEnum(ListEnum): KMEANS = 'k-means++' RANDOM = 'random' class MetricFormatSettings(EmbeddedDocument): delimiter = StringField(null=True, max_length=3) skipinitialspace = BooleanField(null=True) lineterminator = StringField(null=True, max_length=5) quotechar = StringField(null=True, max_length=3) quoting = EnumField(QuotingEnum, null=True) escapechar = StringField(null=True, max_length=3) doublequote = BooleanField(null=True) class ClusteringSettings(EmbeddedDocument): max_clusters = IntField(min_value=1, max_value=10, default=5) wcss_kmeans_init = EnumField(KMeansInitEnum, default=KMeansInitEnum.KMEANS) wcss_kmeans_max_iter = IntField(min_value=1, default=300) wcss_kmeans_n_init = IntField(min_value=1, default=10) knee_interp_method = EnumField(InterpMethodEnum, default=InterpMethodEnum.POLYMONIAL) knee_polynomial_degree = IntField(default=5) class RecommendationSettings(EmbeddedDocument): record_step_minutes = IntField(min_value=1, max_value=60, default=5) thresholds = ListField(field=IntField(null=True), max_length=3, default=[10, 30, 70]) min_allowed_days = IntField(default=1, min_value=1) max_days = IntField(default=90, min_value=7) min_allowed_days_schedule = IntField(default=14, min_value=7) ignore_savings = BooleanField(default=False) max_recommended_shapes = IntField(min_value=1, max_value=10, default=5) shape_compatibility_rule = EnumField( ShapeCompatibilityRule, default=ShapeCompatibilityRule.RULE_NONE) shape_sorting = EnumField(ShapeSorting, default=ShapeSorting.SORT_BY_PERFORMANCE) use_past_recommendations = BooleanField(default=True) use_instance_tags = BooleanField(default=True) analysis_price = EnumField(AnalysisPriceEnum, default=AnalysisPriceEnum.DEFAULT) ignore_actions = ListField(StringField(null=True)) discard_initial_zeros = BooleanField(default=True) target_timezone_name = StringField(default="Europe/London") forbid_change_series = BooleanField(default=False) forbid_change_family = BooleanField(default=False) class Algorithm(BaseModel): dto_skip_attrs = ['_id', 'md5', 'format_version'] name = StringField(unique=True) licensed = BooleanField(default=False) customer = StringField(null=True) cloud = EnumField(CloudEnum) metric_format = EmbeddedDocumentField(MetricFormatSettings, null=True) required_data_attributes = ListField(StringField(null=True)) metric_attributes = ListField(StringField(null=True)) timestamp_attribute = StringField(null=True) clustering_settings = EmbeddedDocumentField(ClusteringSettings, default=ClusteringSettings()) recommendation_settings = EmbeddedDocumentField( RecommendationSettings, default=RecommendationSettings()) last_modified = DateTimeField(null=False, default=datetime.utcnow) md5 = StringField(null=True) format_version = StringField(null=True) def get_dto(self): algorithm_dto = super(Algorithm, self).get_dto() last_modified = algorithm_dto.get('last_modified') if isinstance(last_modified, datetime): algorithm_dto['last_modified'] = last_modified.isoformat() return algorithm_dto @cached_property def read_configuration(self): if not self.metric_format: return {} return {k: v for k, v in self.metric_format.to_mongo().items() if v} def checksum_matches(self): md5 = self.get_checksum() return md5 == self.md5 def get_checksum(self): algorithm_data = self.get_json() algorithm_data.pop('md5', None) algorithm_data.pop('_id', None) if self.last_modified: algorithm_data['last_modified'] = self.last_modified.isoformat( timespec='seconds') data_str = json.dumps(algorithm_data, sort_keys=True).encode('utf-8') return hashlib.md5(data_str).hexdigest()