redash/utils/__init__.py (158 lines of code) (raw):

import codecs import io import csv import datetime import decimal import hashlib import os import random import re import uuid import binascii import pystache import pytz import simplejson import sqlparse from flask import current_app from funcy import select_values from redash import settings from sqlalchemy.orm.query import Query from .human_time import parse_human_time COMMENTS_REGEX = re.compile("/\*.*?\*/") WRITER_ENCODING = os.environ.get("REDASH_CSV_WRITER_ENCODING", "utf-8") WRITER_ERRORS = os.environ.get("REDASH_CSV_WRITER_ERRORS", "strict") def utcnow(): """Return datetime.now value with timezone specified. Without the timezone data, when the timestamp stored to the database it gets the current timezone of the server, which leads to errors in calculations. """ return datetime.datetime.now(pytz.utc) def dt_from_timestamp(timestamp, tz_aware=True): timestamp = datetime.datetime.utcfromtimestamp(float(timestamp)) if tz_aware: timestamp = timestamp.replace(tzinfo=pytz.utc) return timestamp def slugify(s): return re.sub("[^a-z0-9_\-]+", "-", s.lower()) def gen_query_hash(sql): """Return hash of the given query after stripping all comments, line breaks and multiple spaces, and lower casing all text. TODO: possible issue - the following queries will get the same id: 1. SELECT 1 FROM table WHERE column='Value'; 2. SELECT 1 FROM table where column='value'; """ sql = COMMENTS_REGEX.sub("", sql) sql = "".join(sql.split()).lower() return hashlib.md5(sql.encode("utf-8")).hexdigest() def generate_token(length): chars = "abcdefghijklmnopqrstuvwxyz" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "0123456789" rand = random.SystemRandom() return "".join(rand.choice(chars) for x in range(length)) class JSONEncoder(simplejson.JSONEncoder): """Adapter for `simplejson.dumps`.""" def default(self, o): # Some SQLAlchemy collections are lazy. if isinstance(o, Query): result = list(o) elif isinstance(o, decimal.Decimal): result = float(o) elif isinstance(o, (datetime.timedelta, uuid.UUID)): result = str(o) # See "Date Time String Format" in the ECMA-262 specification. elif isinstance(o, datetime.datetime): result = o.isoformat() if o.microsecond: result = result[:23] + result[26:] if result.endswith("+00:00"): result = result[:-6] + "Z" elif isinstance(o, datetime.date): result = o.isoformat() elif isinstance(o, datetime.time): if o.utcoffset() is not None: raise ValueError("JSON can't represent timezone-aware times.") result = o.isoformat() if o.microsecond: result = result[:12] elif isinstance(o, memoryview): result = binascii.hexlify(o).decode() elif isinstance(o, bytes): result = binascii.hexlify(o).decode() else: result = super(JSONEncoder, self).default(o) return result def json_loads(data, *args, **kwargs): """A custom JSON loading function which passes all parameters to the simplejson.loads function.""" return simplejson.loads(data, *args, **kwargs) def json_dumps(data, *args, **kwargs): """A custom JSON dumping function which passes all parameters to the simplejson.dumps function.""" kwargs.setdefault("cls", JSONEncoder) kwargs.setdefault("encoding", None) # Float value nan or inf in Python should be render to None or null in json. # Using ignore_nan = False will make Python render nan as NaN, leading to parse error in front-end kwargs.setdefault('ignore_nan', True) return simplejson.dumps(data, *args, **kwargs) def mustache_render(template, context=None, **kwargs): pystache.defaults.DELIMITERS = ('{{', '}}') # pystache.defaults.DELIMITERS = ('[[', ']]') renderer = pystache.Renderer(escape=lambda u: u) return renderer.render(template, context, **kwargs) def user_mustache_render(template, context=None, **kwargs): pystache.defaults.DELIMITERS = ('[[', ']]') renderer = pystache.Renderer(escape=lambda u: u) q = renderer.render(template, context, **kwargs) pystache.defaults.DELIMITERS = ('{{', '}}') return q def build_url(request, host, path): parts = request.host.split(":") if len(parts) > 1: port = parts[1] if (port, request.scheme) not in (("80", "http"), ("443", "https")): host = "{}:{}".format(host, port) return "{}://{}{}".format(request.scheme, host, path) class UnicodeWriter: """ A CSV writer which will write rows to CSV file "f", which is encoded in the given encoding. """ def __init__(self, f, dialect=csv.excel, encoding=WRITER_ENCODING, **kwds): # Redirect output to a queue self.queue = io.StringIO() self.writer = csv.writer(self.queue, dialect=dialect, **kwds) self.stream = f self.encoder = codecs.getincrementalencoder(encoding)() def _encode_utf8(self, val): if isinstance(val, str): return val.encode(WRITER_ENCODING, WRITER_ERRORS) return val def writerow(self, row): self.writer.writerow([self._encode_utf8(s) for s in row]) # Fetch UTF-8 output from the queue ... data = self.queue.getvalue() data = data.decode(WRITER_ENCODING) # ... and reencode it into the target encoding data = self.encoder.encode(data) # write to the target stream self.stream.write(data) # empty queue self.queue.truncate(0) def writerows(self, rows): for row in rows: self.writerow(row) def collect_parameters_from_request(args): parameters = {} for k, v in args.items(): if k.startswith("p_"): parameters[k[2:]] = v return parameters def base_url(org): if settings.MULTI_ORG: return "https://{}/{}".format(settings.HOST, org.slug) return settings.HOST def filter_none(d): return select_values(lambda v: v is not None, d) def to_filename(s): s = re.sub('[<>:"\\\/|?*]+', " ", s, flags=re.UNICODE) s = re.sub("\s+", "_", s, flags=re.UNICODE) return s.strip("_") def deprecated(): def wrapper(K): setattr(K, "deprecated", True) return K return wrapper def render_template(path, context): """ Render a template with context, without loading the entire app context. Using Flask's `render_template` function requires the entire app context to load, which in turn triggers any function decorated with the `context_processor` decorator, which is not explicitly required for rendering purposes. """ return current_app.jinja_env.get_template(path).render(**context) def query_is_select_no_limit(query): parsed_query = sqlparse.parse(query)[0] last_keyword_idx = find_last_keyword_idx(parsed_query) # Either invalid query or query that is not select if last_keyword_idx == -1 or parsed_query.tokens[0].value.upper() != "SELECT": return False no_limit = parsed_query.tokens[last_keyword_idx].value.upper() != "LIMIT" \ and parsed_query.tokens[last_keyword_idx].value.upper() != "OFFSET" return no_limit def find_last_keyword_idx(parsed_query): for i in reversed(range(len(parsed_query.tokens))): if parsed_query.tokens[i].ttype in sqlparse.tokens.Keyword: return i return -1 def add_limit_to_query(query): parsed_query = sqlparse.parse(query)[0] limit_tokens = sqlparse.parse(" LIMIT 1000")[0].tokens length = len(parsed_query.tokens) if parsed_query.tokens[length - 1].ttype == sqlparse.tokens.Punctuation: parsed_query.tokens[length - 1:length - 1] = limit_tokens else: parsed_query.tokens += limit_tokens return str(parsed_query)