scripts/extract_redis_commands_argcounts.php (301 lines of code) (raw):
#!/usr/bin/env php
<?php
/**
* @author Tyson Andre
*
* Heuristics to extract commands from redis-doc and determine what group of commands
* they'd fall under for twemproxy's request parsing logic.
*/
if (count($argv) !== 2) {
echo "Usage: ${argv[0]} commands.json\n";
echo "commands.json can be downloaded from https://github.com/redis/redis-doc\n";
exit(1);
}
$path = $argv[1];
$contents = file_get_contents($path);
if (!is_string($contents)) {
echo "Failed to read $path\n";
exit(1);
}
$commands = json_decode($contents, true);
uasort($commands, fn($a, $b) => version_compare($b['since'], $a['since']));
const INFINITE_ARGS = 100000;
function categorize_arg(array $arg, string $commandName): array {
$min = 1;
$max = 1;
if ($arg['multiple']) {
$min = 0;
$max = INFINITE_ARGS;
}
if ($arg['optional']) {
$min = 0;
}
if ($arg['type'] === 'key') {
return ['min_key' => $min, 'max_key' => $max];
}
return ['min_arg' => $min, 'max_arg' => $max];
}
function categorize(array $command, string $commandName): string {
$minKeyCount = 0;
$maxKeyCount = 0;
$minArgCount = 0;
$maxArgCount = 0;
$arguments = $command['arguments'] ?? [];
foreach ($arguments as $arg) {
$data = categorize_arg($arg, $commandName);
$minKeyCount += ($data['min_key'] ?? 0);
$maxKeyCount += ($data['max_key'] ?? 0);
$minArgCount += ($data['min_arg'] ?? 0);
$maxArgCount += ($data['max_arg'] ?? 0);
}
if (in_array($commandName, ['DEL', 'MGET', 'MSET', 'TOUCH', 'UNLINK'])) {
return "keyn";
}
if ($maxKeyCount > $minKeyCount || $maxArgCount > $minArgCount) {
// return "key${minKeyCount}_argx";
return "key1_argx";
}
// Assume that
// min=max for arg and key
if ($minArgCount > 0 && $minKeyCount >= 2) {
return "key1_arg" . ($minArgCount + $maxKeyCount - 1);
}
return "key${minKeyCount}_arg" . $minArgCount;
}
const KEY1 = [
'PERSIST',
'PTTL',
'TTL',
'TYPE',
'DUMP',
'DECR',
'GET',
'GETDEL',
'INCR',
'STRLEN',
'HGETALL',
'HKEYS',
'HLEN',
'HVALS',
'LLEN',
'SCARD',
'SMEMBERS',
'ZCARD',
// 'AUTH',
];
const KEY1_ARG1 = [
'EXPIRE',
'EXPIREAT',
'PEXPIRE',
'PEXPIREAT',
'MOVE',
'APPEND',
'DECRBY',
'GETBIT',
'GETSET',
'INCRBY',
'INCRBYFLOAT',
'SETNX',
'HEXISTS',
'HGET',
'HSTRLEN',
'LINDEX',
'RPOPLPUSH',
'SISMEMBER',
'ZRANK',
'ZREVRANK',
'ZSCORE',
];
const KEY1_ARG2 = [
'GETRANGE',
'PSETEX',
'SETBIT',
'SETEX',
'SETRANGE',
'HINCRBY',
'HINCRBYFLOAT',
'HSETNX',
'LRANGE',
'LREM',
'LSET',
'LTRIM',
'SMOVE',
'ZCOUNT',
'ZLEXCOUNT',
'ZINCRBY',
'ZREMRANGEBYLEX',
'ZREMRANGEBYRANK',
'ZREMRANGEBYSCORE',
];
const KEY1_ARG3 = [
'LINSERT',
'LMOVE',
];
const KEY1_ARGN = [
'SORT',
'BITCOUNT',
'BITPOS',
'BITFIELD',
'BITOP',
'EXISTS',
'GETEX',
'SET',
'HDEL',
'HMGET',
'HMSET',
'HSCAN',
'HSET',
'HRANDFIELD',
'LPUSH',
'LPUSHX',
'RPUSH',
'RPUSHX',
'LPOP',
'RPOP',
'LPOS',
'SADD',
'SDIFF',
'SDIFFSTORE',
'SINTER',
'SINTERSTORE',
'SREM',
'SUNION',
'SUNIONSTORE',
'SRANDMEMBER',
'SSCAN',
'SPOP',
'SMISMEMBER',
'PFADD',
'PFMERGE',
'PFCOUNT',
'ZADD',
'ZDIFF',
'ZDIFFSTORE',
'ZINTER',
'ZINTERSTORE',
'ZMSCORE',
'ZPOPMAX',
'ZPOPMIN',
'ZRANDMEMBER',
'ZRANGE',
'ZRANGEBYLEX',
'ZRANGEBYSCORE',
'ZRANGESTORE',
'ZREM',
'ZREVRANGE',
'ZREVRANGEBYLEX',
'ZREVRANGEBYSCORE',
'ZSCAN',
'ZUNION',
'ZUNIONSTORE',
'GEODIST',
'GEOPOS',
'GEOHASH',
'GEOADD',
'GEOSEARCH',
'RESTORE',
];
const EXPECTED_MAPS = [
'key1_arg0' => KEY1,
'key1_arg1' => KEY1_ARG1,
'key1_arg2' => KEY1_ARG2,
'key1_arg3' => KEY1_ARG3,
'key1_argx' => KEY1_ARGN,
];
function compute_types(): array {
global $commands;
$types = [];
foreach ($commands as $name => $cmd) {
// printf("%s: %s\n", $name, json_encode($cmd, JSON_PRETTY_PRINT));
try {
$type = categorize($cmd, $name);
} catch (Exception $e) {
$type = "unknown: {$e->getMessage()} " . json_encode($cmd);
}
$types[$name] = $type;
}
return $types;
}
function dump_mismatched_argument_types(array $types, array $commands): void {
foreach (EXPECTED_MAPS as $expected => $maps) {
foreach ($maps as $key) {
$actual = $types[$key];
if ($actual !== $expected) {
echo "Unexpected type for $key: got $actual, want $expected: " . json_encode($commands[$key]['arguments']) . "\n";
}
}
foreach ($types as $other_name => $type) {
if ($type === $expected && !in_array($other_name, $maps)) {
$command = $commands[$other_name];
echo "Expected $other_name in $expected: " . json_encode($command['arguments']) . "\n";
echo "> " . $command['group'] . ": " . $command['summary'] . "\n\n";
}
}
}
}
function render_arg(array $argument): string {
if ($argument['optional'] ?? false) {
unset($argument['optional']);
return '[' . render_arg($argument) . ']';
}
if ($argument['enum'] ?? null) {
return implode('|', $argument['enum']);
}
if ($argument['command'] ?? null) {
return $argument['command'];
}
$name = $argument['name'];
$repr = is_array($name) ? implode(' ', $name) : $name;
if ($argument['multiple'] ?? false) {
return "$repr [$repr …]";
}
return $repr;
}
function render_command(string $name, array $command): string {
$repr = $name;
foreach ($command['arguments'] ?? [] as $argument) {
$repr .= ' ' . render_arg($argument);
}
return $repr;
}
function center_pad(string $name, int $len) {
if (mb_strlen($name) >= $len) {
return $name;
}
$name = str_repeat(' ', ($len - mb_strlen($name)) >> 1) . $name;
$name .= str_repeat(' ', $len - mb_strlen($name));
return $name;
}
function right_pad(string $name, int $len) {
if (mb_strlen($name) >= $len) {
return $name;
}
$name .= str_repeat(' ', $len - mb_strlen($name));
return $name;
}
function dump_table(array $commands) {
$header = <<<EOT
+-------------------+------------+---------------------------------------------------------------------------------------------------------------------+
| Command | Supported? | Format |
+-------------------+------------+---------------------------------------------------------------------------------------------------------------------+
EOT;
echo $header . "\n";
$rowLine = explode("\n", $header)[0];
ksort($commands);
$parts = explode('+', $rowLine);
$nameLen = strlen($parts[1]);
$supportsLen = strlen($parts[2]);
$commandLen = strlen($parts[3]);
foreach ($commands as $name => $command) {
$key = center_pad($name, 19);
$commandRepr = render_command($name, $command);
$supports = 'Yes';
printf(" |%s|%s|%s|\n", center_pad($name, $nameLen), center_pad($supports, $supportsLen), right_pad(' ' . $commandRepr, $commandLen));
echo $rowLine . "\n";
}
echo "\n";
}
function dump_table_groups(array $commands): void {
$groups = [];
foreach ($commands as $name => $command) {
$groups[$command['group']][$name] = $command;
}
foreach ($groups as $groupName => $group) {
printf("### %s Command\n\n", $groupName);
dump_table($group);
}
}
$types = compute_types();
foreach ($types as $name => $type) {
printf("%s: %s\n", $name, $type);
}
dump_mismatched_argument_types($types, $commands);
dump_table_groups($commands);