Index: www/sites/all/modules/custom/np_scan/np_scan.analytics.inc
===================================================================
--- www/sites/all/modules/custom/np_scan/np_scan.analytics.inc (revision 34222)
+++ www/sites/all/modules/custom/np_scan/np_scan.analytics.inc (working copy)
@@ -19,71 +19,6 @@
}
/**
- * Helper function to get the name of the velocity field.
- *
- * @param $timeframe time in hours 24, 48, 168,720
- * @param $collection_name the name of the mongo collection
- * @return string, the name of the velocity field
- */
-function _np_scan_analytics_get_velocity_field($timeframe, $collection_name) {
- $velocity_field = '';
-
- if ($collection_name == 'scanurl') {
- switch ($timeframe) {
- case 24: // day
- $velocity_field = 'hours.velocity24';
- break;
- case 168: //week
- $velocity_field = 'days.velocity7';
- break;
- case 720: // month
- case 'all':
- $velocity_field = 'days.velocity';
- break;
- default: // biday
- $velocity_field = 'hours.velocity';
- break;
- }
- }
- elseif ($collection_name == 'scan') {
- switch ($timeframe) {
- case 24: // day
- $velocity_field = 'hours.velocity';
- break;
- case 168: //week
- $velocity_field = 'days.velocity7';
- break;
- case 720: // month
- case 'all':
- $velocity_field = 'days.velocity';
- break;
- default: // biday
- $velocity_field = 'hours.velocity48';
- break;
- }
- }
- elseif (in_array($collection_name, array('hashtag', 'keyword', 'retweet'))) {
- switch ($timeframe) {
- case 24: // day
- $velocity_field = 'hours.velocity';
- break;
- case 168: //week
- $velocity_field = 'days.velocity'; // no stats use month
- break;
- case 720: // month
- case 'all':
- $velocity_field = 'days.velocity';
- break;
- default: // biday
- $velocity_field = 'days.velocity'; // no stats use month
- break;
- }
- }
-
- return $velocity_field;
-}
-
-/**
* Menu callback; property statistics.
*
* $page - which page are we displaying results for?
@@ -114,6 +49,7 @@
switch ($page) {
case 'views':
+ $velocity_field_2 = ($timeframe == 24) ? 'velocity.hours' : 'velocity.days';
$args = array_merge(array($scan_status), $client_ids );
scan_api_set_active_shard('misc');
$key = 'vid';
@@ -135,36 +71,15 @@
break;
case 'links':
- $velocity_field = _np_scan_analytics_get_velocity_field($timeframe, 'scanurl');
- $fields = array('scan_id' => 1);
- $query = array(
- 'scan.client_id' => array('$in' => $client_ids),
- 'scan.status' => $scan_status,
- );
- if ($cursor = scan_api_get_mongo('urls', 'scanurl')) {
- try {
- $results = $cursor
- ->find($query, $fields)
- ->sort(array($velocity_field => -1))
- ->limit($limit)
- ->timeout(scan_api_get_mongo_timeout());
- foreach ($results as $row) {
- $scans[$row['scan_id']] = FALSE;
- }
- }
- catch (MongoCursorTimeoutException $e) {
- }
- }
- break;
-
default:
- $velocity_field = _np_scan_analytics_get_velocity_field($timeframe, 'scan');
+ $velocity_field = ($page == 'links') ? 'velocity.hours_urls' : 'velocity.hours_scan';
+ $velocity_field_2 = 'velocity.days'; // @@@ V2 Maybe we should standardize on 48h across the board?
$fields = array('scan_id' => 1);
$query = array(
'scan.client_id' => array('$in' => $client_ids),
'scan.status' => $scan_status,
);
- if ($cursor = scan_api_get_mongo('scan_stats', 'scan')) {
+ if ($cursor = scan_api_get_mongo('scan')) { // V2r15 / scan / scan.client_id[], scan.status / velocity.hours_urls, velocity.hours_scan
try {
$results = $cursor
->find($query, $fields)
@@ -203,24 +118,23 @@
// pour together hashtag and keyword velocities
foreach (array('keyword', 'hashtag') as $collection_name) {
if ($words[$collection_name]) {
- $velocity_field = _np_scan_analytics_get_velocity_field($timeframe, $collection_name);
- list($interval, $tmp) = explode('.', $velocity_field);
- $fields = array('word' => 1, $velocity_field => 1);
+ list($tmp, $interval) = explode('.', $velocity_field_2);
+ $fields = array('word' => 1, $velocity_field_2 => 1);
$query = array(
'scan_id' => intval($statistics->scan_id),
'word' => array('$in' => array_map('strtolower', $words[$collection_name])),
);
- if ($cursor = scan_api_get_mongo('scan_stats', $collection_name)) { // keyword, hashtag
+ if ($cursor = scan_api_get_mongo($collection_name)) { // V2r15 / keyword, hashtag / scan_id, word[] / velocity.hours:-1, velocity.days:-1
try {
$results = $cursor
->find($query, $fields)
- ->sort(array($velocity_field => -1))
+ ->sort(array($velocity_field_2 => -1))
->timeout(scan_api_get_mongo_timeout());
foreach ($results as $row) {
if ($collection_name == 'hashtag') {
$row['word'] = '#' . $row['word'];
};
- $word_velocities[$row['word']] = isset($row[$interval]['velocity']) ? round($row[$interval]['velocity'], VELOCITY_PRECISION) : 0;
+ $word_velocities[$row['word']] = isset($row['velocity'][$interval]) ? round($row['velocity'][$interval], VELOCITY_PRECISION) : 0;
}
}
catch (MongoCursorTimeoutException $e) {
@@ -248,60 +162,43 @@
'timestamp' => strtotime('-1 day'),
);
- // fill up velocity and difference
+ // fill up velocity and difference / stats.
$statistics->velocity = 0;
$statistics->difference = 0;
- $fields = array('minutes.velocity' => 1, 'minutes.prev_velocity' => 1);
- $query = array(
- 'scan_id' => intval($statistics->scan_id),
- );
- if ($cursor = scan_api_get_mongo('scan_stats', 'scan')) {
- try {
- $results = $cursor
- ->find($query, $fields)
- ->timeout(scan_api_get_mongo_timeout());
- if ($results->hasNext()) {
- $row = $results->getNext();
- $velocity = isset($row['minutes']['velocity']) ? $row['minutes']['velocity'] * 12 : 0;
- $prev_velocity = isset($row['minutes']['prev_velocity']) ? $row['minutes']['prev_velocity'] * 12 : 0;
- $statistics->velocity = $velocity;
- $statistics->difference = $velocity - $prev_velocity;
- }
- }
- catch (MongoCursorTimeoutException $e) {
- }
- }
-
- // Fill up the url uniq stats part. Initialize to 0.
- $statistics->velocity = 0;
$statistics->general = 0;
$statistics->photo = 0;
$statistics->video = 0;
-
$statistics->url_velocity = 0;
-
$fields = array(
'scan_id' => 1,
- 'hours.velocity' => 1,
- 'hours.general.velocity' => 1,
- 'hours.photo.velocity' => 1,
- 'hours.video.velocity' => 1,
+ // @@@ V2 Previous code disagrees on whether to use minutes or hours here.
+ //'velocity.minutes' => 1,
+ 'velocity.hours' => 1,
+ 'velocity.hours_general' => 1,
+ 'velocity.hours_photo' => 1,
+ 'velocity.hours_video' => 1,
+ 'velocity.hours_urls' => 1,
+ 'increasing' => 1,
);
$query = array(
'scan_id' => intval($statistics->scan_id),
);
- if ($cursor = scan_api_get_mongo('urls', 'scanurl')) {
+ if ($cursor = scan_api_get_mongo('scan')) { // V2r15 / scan / scan_id / none
try {
$results = $cursor
->find($query, $fields)
->timeout(scan_api_get_mongo_timeout());
- foreach ($results as $row) {
- $statistics->velocity = isset($row['hours']['velocity']) ? $row['hours']['velocity'] : 0;
- $statistics->general = isset($row['hours']['general']['velocity']) ? $row['hours']['general']['velocity'] : 0;
- $statistics->photo = isset($row['hours']['photo']['velocity']) ? $row['hours']['photo']['velocity'] : 0;
- $statistics->video = isset($row['hours']['video']['velocity']) ? $row['hours']['video']['velocity'] : 0;
-
- $statistics->url_velocity = isset($row['hours']['velocity']) ? $row['hours']['velocity'] : 0;
+ if ($results->hasNext()) {
+ $row = $results->getNext();
+ // @@@ V2 Previous code disagrees on whether to use minutes or hours here.
+ //$statistics->velocity = isset($row['velocity']['minutes']) ? $row['velocity']['minutes'] * 12 : 0;
+ $statistics->velocity = isset($row['velocity']['hours']) ? $row['velocity']['hours'] : 0;
+ $statistics->general = isset($row['velocity']['hours_general']) ? $row['velocity']['hours_general'] : 0;
+ $statistics->photo = isset($row['velocity']['hours_photo']) ? $row['velocity']['hours_photo'] : 0;
+ $statistics->video = isset($row['velocity']['hours_video']) ? $row['velocity']['hours_video'] : 0;
+ $statistics->url_velocity = isset($row['velocity']['hours_urls']) ? $row['velocity']['hours_urls'] : 0;
+ // @@@ V2 Teach everything about "increasing".
+ $statistics->difference = $increasing ? 1 : -1;
}
}
catch (MongoCursorTimeoutException $e) {
@@ -365,15 +262,18 @@
$max = 0;
$data = array();
- $order_field = ($order == 'trending') ? 'trending' : 'minutes.velocity';
+ $order_field = ($order == 'trending') ? 'trending' : 'velocity.minutes';
$ordering_data = array();
foreach ($scan_ids as $scan_id) {
- $fields = array('minutes.velocity' => 1, 'trending' => 1, 'word' => 1);
+ $fields = array(
+ 'velocity.minutes' => 1,
+ 'trending' => 1,
+ 'word' => 1,
+ );
$query = array(
'scan_id' => $scan_id,
- 'word' => array('$exists' => TRUE),
);
- if ($cursor = scan_api_get_mongo('scan_stats', $table_type)) { // keyword, hashtag
+ if ($cursor = scan_api_get_mongo($table_type)) { // V2r15 / keyword, hashtag / scan_id / trending:-1, velocity.minutes:-1
try {
$results = $cursor
->find($query, $fields)
@@ -382,11 +282,12 @@
->timeout(scan_api_get_mongo_timeout());
foreach ($results as $row) {
if (!isset($row['word'])) {
+ assert('Worker was speechless!');
// bug in workers. they write empty word records... bad
continue;
}
- $velocity = isset($row['minutes']['velocity']) ? $row['minutes']['velocity'] * 12 : 0;
+ $velocity = isset($row['velocity']['minutes']) ? $row['velocity']['minutes'] * 12 : 0;
$order_data = $order != 'trending' ? $velocity : $row['trending'];
if (!isset($ordering_data[$row['word']]) || ($ordering_data[$row['word']] < $order_data)) {
$data[$row['word']] = array(
@@ -445,16 +346,16 @@
$query = array(
'scan_id' => array('$in' => $scan_ids),
);
- if ($cursor = scan_api_get_mongo('scan_stats', $table_type)) { // keyword, hashtag
+ if ($cursor = scan_api_get_mongo($table_type)) { // V2r15 / keyword, hashtag / scan_id[] / velocity.minutes:-1 limit 1
try {
$results = $cursor
- ->find($query, array('minutes.velocity' => 1))
- ->sort(array('minutes.velocity' => -1))
+ ->find($query, array('velocity.minutes' => 1))
+ ->sort(array('velocity.minutes' => -1))
->limit(1)
->timeout(scan_api_get_mongo_timeout());
if ($results->hasNext()) {
$row = $results->getNext();
- $max = isset($row['minutes']['velocity']) ? $row['minutes']['velocity'] * 12 : 0;
+ $max = isset($row['velocity']['minutes']) ? $row['velocity']['minutes'] * 12 : 0;
}
}
catch (MongoCursorTimeoutException $e) {
@@ -462,15 +363,13 @@
}
}
else {
- // MAX(ABS(trending)
$max_trending = $min_trending = 0;
$query = array(
'scan_id' => array('$in' => $scan_ids),
- 'trending' => array('$ne' => -1000),
);
// max trending
- if ($cursor = scan_api_get_mongo('scan_stats', $table_type)) { // keyword, hashtag
+ if ($cursor = scan_api_get_mongo($table_type)) { // V2r15 / keyword, hashtag / scan_id[] / trending:-1, trending:1 limit 1 (two queries)
try {
$results = $cursor
->find($query, array('trending' => 1))
@@ -531,11 +430,10 @@
foreach (array('keyword', 'hashtag') as $collection_name) {
$query = array(
'scan_id' => 0,
- 'word' => array('$exists' => TRUE),
);
- if ($cursor = scan_api_get_mongo('scan_stats', $collection_name)) { // keyword, hashtag
- // max trending
+ if ($cursor = scan_api_get_mongo($collection_name)) { // V2r15 / keyword, hashtag / scan_id=0 / trending:-1, velocity.minutes:-1 count 1 two queries
try {
+ // max trending
$results = $cursor
->find($query, array('trending' => 1))
->sort(array('trending' => -1))
@@ -548,13 +446,13 @@
// max velocity
$results = $cursor
- ->find($query, array('minutes.velocity' => 1))
- ->sort(array('minutes.velocity' => -1))
+ ->find($query, array('velocity.minutes' => 1))
+ ->sort(array('velocity.minutes' => -1))
->limit(1)
->timeout(scan_api_get_mongo_timeout());
if ($results->hasNext()) {
$row = $results->getNext();
- $result[$collection_name]['velocity'] = isset($row['minutes']['velocity']) ? $row['minutes']['velocity'] * 12 : 0;
+ $result[$collection_name]['velocity'] = isset($row['velocity']['minutes']) ? $row['velocity']['minutes'] * 12 : 0;
}
}
catch (MongoCursorTimeoutException $e) {
@@ -575,7 +473,7 @@
/**
* Helper function to get the pager's current page number.
*/
-function _np_scan_analytics_get_page_number($db, $collection, $limit, $query = array(), $element = 0) {
+function _np_scan_analytics_get_page_number($collection, $limit, $query = array(), $element = 0) {
global $pager_page_array, $pager_total, $pager_total_items;
// Initialize pager, see pager.inc.
@@ -583,7 +481,7 @@
$page = isset($_GET['page']) ? $_GET['page'] : '';
$pager_page_array = explode(',', $page);
$pager_total_items[$element] = 0;
- if ($cursor = scan_api_get_mongo($db, $collection)) { // keyword, hashtag, (url -- disabled)
+ if ($cursor = scan_api_get_mongo($collection)) { // V2r15 / keyword, hashtag, (url -- disabled) / varies(_np_scan_analytics_get_page_number) / none (count)
try {
$pager_total_items[$element] = $cursor->find($query)
->timeout(scan_api_get_mongo_timeout())
@@ -605,13 +503,12 @@
$keyword_origin_access = user_access('access keyword origin');
$collection_name = $hashtag ? 'hashtag' : 'keyword';
- if ($cursor = scan_api_get_mongo('scan_stats', $collection_name)) { // keyword, hashtag
+ if ($cursor = scan_api_get_mongo($collection_name)) { // V2r15 / keyword, hashtag / scan_id=0 / trending (paged query)
$fields = array('scan_id' => 1, 'trending' => 1, 'word' => 1);
$query = array(
'scan_id' => 0,
- 'word' => array('$exists' => TRUE),
);
- $pagenumber = $full ? _np_scan_analytics_get_page_number('scan_stats', $collection_name, $limit, $query) : 0;
+ $pagenumber = $full ? _np_scan_analytics_get_page_number($collection_name, $limit, $query) : 0; // V2r15 / hashtag, keyword / scan_id=0 / none (count)
try {
$results = $cursor
->find($query, $fields)
@@ -659,26 +556,25 @@
$keyword_origin_access = user_access('access keyword origin');
$collection_name = $hashtag ? 'hashtag' : 'keyword';
- if ($cursor = scan_api_get_mongo('scan_stats', $collection_name)) { // keyword, hashtag
+ if ($cursor = scan_api_get_mongo($collection_name)) { // V2r15 / keyword, hashtag / scan_id=0 / velocity.minutes:-1 paged query
$query = array(
'scan_id' => 0,
- 'word' => array('$exists' => TRUE),
);
- $pagenumber = $full ? _np_scan_analytics_get_page_number('scan_stats', $collection_name, $limit, $query) : 0;
+ $pagenumber = $full ? _np_scan_analytics_get_page_number($collection_name, $limit, $query) : 0; // V2r15 / keyword, hashtag / scan_id=0 / none (count)
- $fields = array('scan_id' => 1, 'minutes.velocity' => 1, 'word' => 1);
+ $fields = array('velocity.minutes' => 1, 'word' => 1);
try {
$results = $cursor
->find($query, $fields)
- ->sort(array('minutes.velocity' => -1))
+ ->sort(array('velocity.minutes' => -1))
->skip($pagenumber * $limit)
->limit($limit)
->timeout(scan_api_get_mongo_timeout());
foreach ($results as $row) {
$tweets[$row['word']] = array(
- 'scan_id' => $row['scan_id'],
- 'velocity' => isset($row['minutes']['velocity']) ? $row['minutes']['velocity'] * 12 : 0,
+ 'scan_id' => 0,
+ 'velocity' => isset($row['velocity']['minutes']) ? $row['velocity']['minutes'] * 12 : 0,
'word' => $row['word'],
);
}
@@ -715,13 +611,17 @@
}
if ($tweets) {
-
- $fields = array('word' => 1, 'hours' => 1, 'updated' => 1);
$query = array(
'scan_id' => 0,
'word' => array('$in' => array_map('strtolower', $words)),
);
- if ($cursor = scan_api_get_mongo('scan_stats', $collection_name)) { // keyword, hashtag
+ $fields = array(
+ 'word' => 1,
+ 'hours' => 1,
+ 'updated' => 1,
+ );
+
+ if ($cursor = scan_api_get_mongo($collection_name)) { // V2r15 / keyword, hashtag / scan_id=0, word[] / nond
try {
$results = $cursor
->find($query, $fields)
@@ -753,7 +653,7 @@
'word' => array('$in' => $words),
'updated' => array('$gte' => $date),
);
- $cursor = scan_api_get_mongo('urls', 'url')
+ $cursor = scan_api_get_mongo('urls', 'url') // !V2r15 not converted -- commented out code
->find($query, $fields)
->sort(array('updated' => -1));
$i = 0;
@@ -872,22 +772,22 @@
//$pagenumber = $paged ? _np_scan_analytics_get_page_number('urls', 'url', $limit, array('scan_id' => array('$in' => $scan_ids), 'category' => $category)) : 0;
$pagenumber = 0; //@todo: fixme
foreach ($scan_ids as $scan_id) {
- $fields = array('hours.velocity' => 1, 'url_id' => 1);
+ $fields = array('velocity.hours' => 1, 'url_id' => 1);
$query = array(
- 'scan_id' => $scan_id,
- 'category' => $category,
+ 'scan_id' => intval($scan_id),
+ 'category' => intval($category),
);
- if ($cursor = scan_api_get_mongo('urls', 'url')) {
+ if ($cursor = scan_api_get_mongo('url')) { // V2r15 / url / scan_id, category / velocity.hours:-1 paged query
try {
$results = $cursor
->find($query, $fields)
- ->sort(array('hours.velocity' => -1))
+ ->sort(array('velocity.hours' => -1))
->skip($pagenumber * $limit)
->limit($limit)
->timeout(scan_api_get_mongo_timeout());
foreach ($results as $row) {
$url_ids[$row['url_id']] = $row['url_id'];
- $velocity = isset($row['hours']['velocity']) ? $row['hours']['velocity'] : 0;
+ $velocity = isset($row['velocity']['hours']) ? $row['velocity']['hours'] : 0;
if (!isset($order_data[$row['url_id']])) {
$order_data[$row['url_id']] = $velocity;
}
Index: www/sites/all/modules/custom/np_scan/np_scan.module
===================================================================
--- www/sites/all/modules/custom/np_scan/np_scan.module (revision 34222)
+++ www/sites/all/modules/custom/np_scan/np_scan.module (working copy)
@@ -437,7 +437,6 @@
// Refuse to update to a lower vid.
db_query('UPDATE {scan_settings} SET active_vid = %d WHERE nid = %d AND active_vid < %d', $scan->vid, $scan->nid, $scan->vid);
if (db_affected_rows()) {
- np_scan_denorm($scan->scan_id, 1, 'active');
watchdog('np_scan', 'Scan id %scan_id for revision %vid promoted to active on node %nid.', array('%scan_id' => $scan->scan_id, '%vid' => $scan->vid, '%nid' => $scan->nid));
// Get any non archived/deleted scans older than the one being promoted to...
$result = db_query("SELECT scan_id, nid, vid, archived FROM {scan} WHERE nid = %d AND scan_id < %d AND archived = 0", $scan->nid, $scan->scan_id);
@@ -967,9 +966,6 @@
'auto_unyaml' => false,
));
- // Mark scan as inactive in mongo.
- np_scan_denorm($scan_id, 0, 'active');
-
// insert matching marker into DB for deletion worker to double check
db_query("INSERT INTO {scan_delete} (scan_id) VALUES (%d)", $scan_id);
@@ -1913,15 +1909,14 @@
*/
function np_scan_denorm($scan_id, $new_value, $key = 'status') {
$collections = array('scan', 'keyword', 'hashtag', 'location', 'retweet', 'url', 'scanurl');
- if ($key == 'active' && $new_value == 0) {
- // Don't bother propogating the active status fully for something that's about
- // to get deleted by terminator.
- $collections = array('scan', 'scanurl');
+ if ($key == 'active') {
+ assert('Someone tried to denorm scan.active!');
+ return FALSE;
}
$key = "scan.". $key;
$set = array('$set' => array($key => intval($new_value)));
foreach ($collections as $collection) {
- if ($cursor = scan_api_get_mongo('statistics', $collection)) { // denorm
+ if ($cursor = scan_api_get_mongo($collection)) { // V2r15 / * / scan_id / none (update np_scan_denorm)
$cursor
->update(array('scan_id' => intval($scan_id)), $set, array('multiple' => TRUE));
}
Index: www/sites/all/modules/custom/np_scan/np_scan.archive.inc
===================================================================
--- www/sites/all/modules/custom/np_scan/np_scan.archive.inc (revision 34222)
+++ www/sites/all/modules/custom/np_scan/np_scan.archive.inc (working copy)
@@ -8,6 +8,7 @@
* @return TRUE on success, FALSE if details could not be found for this scan in scan and node DB tables
*/
function _np_scan_snapshot($scan_id, $title) {
+ $scan_id = intval($scan_id);
$scan = db_fetch_object(db_query_range('SELECT r.timestamp, s.* FROM {scan} s INNER JOIN {node_revisions} r USING(vid) WHERE scan_id = %d', $scan_id, 0, 1));
if (!empty($scan)) {
@@ -21,9 +22,9 @@
$url_stats->photo = 0;
$url_stats->video = 0;
try {
- if ($cursor = scan_api_get_mongo('scan_stats', 'scanurl')) {
+ if ($cursor = scan_api_get_mongo('scan')) { // V2r15 / scan / scan_id
$query = array(
- 'scan_id' => intval($scan_id),
+ 'scan_id' => $scan_id,
);
$fields = array('days.general' => 1, 'days.photo' => 1, 'days.video' => 1, 'updated' => 1);
$cursor = $cursor->find($query, $fields)
@@ -36,9 +37,9 @@
$last_update = scan_api_bucket_index('scanurl', 'days', $result['updated']->sec);
$sum = 0;
foreach ($result['days'][$key] as $index => $value) {
+ // @@@ V2 This is completely wrong and probabaly kills kittens.
// ignore velocity values and keys which have not been updated recently enough
- // if $index is int(0), it will return true for in_array($index, array('velocity', 'velocity7')
- if ((!is_int($index) && in_array($index, array('velocity', 'velocity7'))) || ($index > $last_update)) {
+ if ($index > $last_update) {
continue;
}
else {
@@ -89,7 +90,7 @@
*
*/
function _np_scan_archive_get_urls_cursor($scan_id, $category) {
- if ($cursor = scan_api_get_mongo('scan_stats', 'url')) {
+ if ($cursor = scan_api_get_mongo('url')) { // V2r15 / url / scan_id, category / count:-1 (limit 1000)
$query = array(
'scan_id' => intval($scan_id),
'category' => intval($category),
@@ -370,12 +371,10 @@
* @param $snapshot_id
*/
function np_scan_archive_stats($scan, $snapshot_id) {
-
-
$insert_params = array();
$rows = 0;
$max = array('count' => 0, 'start_time' => '2009-01-01');
- if ($cursor = scan_api_get_mongo('scan_stats', 'scan')) {
+ if ($cursor = scan_api_get_mongo('scan')) { // V2r15 / scan / scan_id / none (single)
$query = array(
'scan_id' => intval($scan->scan_id),
);
@@ -387,27 +386,22 @@
$last_update_start = $result['updated']->sec;
$last_update_index = scan_api_bucket_index('scan', 'days', $result['updated']->sec);
foreach ($result['days'] as $index => $count) {
- if (!is_int($index) && in_array($index, array('velocity', 'prev_velocity', 'velocity7'))) {
- continue;
+ if ($index <= $last_update_index) {
+ // anything which is <= last-update-index = last updated - (last-updated-index - bucket index) * 86400
+ $start_time = gmdate('Y-m-d', $last_update_start - (($last_update_index - $index) * 86400));
}
else {
- if ($index <= $last_update_index) {
- // anything which is <= last-update-index = last updated - (last-updated-index - bucket index) * 86400
- $start_time = gmdate('Y-m-d', $last_update_start - (($last_update_index - $index) * 86400));
- }
- else {
- // @TODO decide whether to drop the older stats or not, index in mongo here is 0-29
- // anything > last-update-index is the same as above + another 30 days or so? ... or just ignore these as too old?
- $start_time = gmdate('Y-m-d', $last_update_start - ((($last_update_index - $index) + 30) * 86400));
- }
- $insert_params[] = $snapshot_id;
- $insert_params[] = $start_time;
- $insert_params[] = $count;
- ++$rows;
- if ($count > $max['count']) {
- $max = array('count' => $count, 'start_time' => $start_time); // @TODO fill in the start time
- }
+ // @TODO decide whether to drop the older stats or not, index in mongo here is 0-29
+ // anything > last-update-index is the same as above + another 30 days or so? ... or just ignore these as too old?
+ $start_time = gmdate('Y-m-d', $last_update_start - ((($last_update_index - $index) + 30) * 86400));
}
+ $insert_params[] = $snapshot_id;
+ $insert_params[] = $start_time;
+ $insert_params[] = $count;
+ ++$rows;
+ if ($count > $max['count']) {
+ $max = array('count' => $count, 'start_time' => $start_time); // @TODO fill in the start time
+ }
}
}
}
Index: www/sites/all/modules/custom/np_scan_stats/np_scan_stats.module
===================================================================
--- www/sites/all/modules/custom/np_scan_stats/np_scan_stats.module (revision 34222)
+++ www/sites/all/modules/custom/np_scan_stats/np_scan_stats.module (working copy)
@@ -1,101 +0,0 @@
- t('Scan statistics'),
- 'description' => t('View scan, keyword, hashtag, and url statistics'),
- 'access callback' => 'user_access',
- 'access arguments' => array('view scan stats'),
- 'page callback' => 'np_scan_stats_proto_view',
- 'page arguments' => array(3, 4, 5),
- 'type' => MENU_NORMAL_ITEM,
- 'file' => 'np_scan_stats.proto.inc',
- );
-
- return $items;
-}
-
-/**
- * Implementation of hook_cron
- */
-function np_scan_stats_cron() {
- $mc = dmemcache_object('cache');
- // aggregated stat maintanence, compressed stat maintanence and fresh stat maintanence should not run together
- while (!$mc->add('np_scan_stats_maintanence', 1, FALSE, 1800)) {
- sleep(120);
- }
- $last_run = variable_get('np_scan_stats_cron_run', 0);
- $time = time();
- // dont run more than once
- if (date('H') != date('H', $last_run) || $lastrun + 3600 < $time) {
- include_once drupal_get_path('module', 'np_scan_stats') . '/np_scan_stats.cron.inc';
- $last_hour = mktime(date('H') - 1, 0, 0);
- // cleanup
- $cleanup_times = array();
- // stats
- // scan_statistics, keyword_statistics, hashtag_statistics, retweet_statistics, url_statistics, url_uniq_statistics, location_statistics
- foreach (array('scan', 'keyword', 'hashtag', 'retweet', 'url', 'url_uniq', 'location') as $table) {
- $start = time();
- $mongo_date = new MongoDate($last_hour - 7 * 86400);
- scan_api_get_mongo('statistics', $table)->remove(array('updated' => array('$lte' => $mongo_date))); // scan, keyword, hashtag, retweet, url, url_uniq, location
- $cleanup_times[] = $table . ' ' . (time() - $start);
- }
- scan_api_set_active_shard('misc');
- // urls
- $start = time();
- db_query("DELETE su FROM {source_urls} su INNER JOIN {urls} u ON su.url_id = u.id WHERE u.last_occurrence < '%s'", date('Y-m-d H:i:s', $last_hour - 30 * 86400));
- $cleanup_times[] = 'source_urls ' . (time() - $start);
- $start = time();
- db_query("DELETE su FROM {scan_urls} su INNER JOIN {urls} u ON su.url_id = u.id WHERE u.last_occurrence < '%s'", date('Y-m-d H:i:s', $last_hour - 30 * 86400));
- $cleanup_times[] = 'scan_urls ' . (time() - $start);
-
- $start = time();
- $mongo_date = new MongoDate($last_hour - 30 * 86400);
- scan_api_get_mongo('statistics', 'url')->remove(array('updated' => array('$lte' => $mongo_date)));
- $cleanup_times[] = 'url_statistics_all_time ' . (time() - $start);
-
- $start = time();
- db_query("DELETE se FROM {scan_embeds} se INNER JOIN {urls} u ON se.url_id = u.id WHERE u.last_occurrence < '%s'", date('Y-m-d H:i:s', $last_hour - 30 * 86400));
- $cleanup_times[] = 'scan_embeds ' . (time() - $start);
- $start = time();
- db_query("DELETE si FROM {scan_images} si INNER JOIN {urls} u ON si.url_id = u.id WHERE u.last_occurrence < '%s'", date('Y-m-d H:i:s', $last_hour - 30 * 86400));
- $cleanup_times[] = 'scan_images ' . (time() - $start);
- $start = time();
- // make sure we don't delete a url that is blocked, as they are blocked by ID and we still need the url table entry to get that blocked url
- db_query("DELETE u FROM {urls} u LEFT JOIN {group_blocked_urls} gbu ON u.id = gbu.url_id WHERE last_occurrence < '%s' AND gbu.url_id IS NULL", date('Y-m-d H:i:s', $last_hour - 30 * 86400));
- $cleanup_times[] = 'urls ' . (time() - $start);
- scan_api_set_active_shard();
- watchdog('cron', 'np_scan_stat cleanup times in seconds: ' . implode("\n", $cleanup_times));
- variable_set('np_scan_stats_cron_run', $time);
- }
- $mc->delete('np_scan_stats_maintanence');
- if (date('d', variable_get('np_scan_views_24_cleanup', 0)) != date('d')) {
- variable_set('np_scan_views_24_cleanup', time());
- db_query('UPDATE {node} SET np_views_24 = 0 WHERE np_views_24 != 0');
- db_query('UPDATE {node} SET np_views_widget_24 = 0 WHERE np_views_widget_24 != 0');
- }
-}
-
-function np_scan_stats_perm() {
- return array('view scan stats');
-}
-
-function np_scan_stats_theme() {
- return array(
- 'mongo_query' => array('arguments' => array('collection' => NULL, 'find' => NULL, 'fields' => array(), 'sort' => array(), 'limit' => 0)),
- 'mongo_json' => array('arguments' => array('json' => array())),
- 'mongo_short_datetime' => array('arguments' => array('dt' => NULL)),
- 'mongo_datetime' => array('arguments' => array('dt' => NULL)),
- 'mongo_scan_details' => array('arguments' => array('record' => array())),
- 'mongo_word_details' => array('arguments' => array('collection' => 'keyword', 'record' => array())),
- 'mongo_location_details' => array('arguments' => array('record' => array())),
- 'mongo_url_details' => array('arguments' => array('record' => array())),
- 'twitter_word' => array('arguments' => array('collection' => NULL, 'word' => NULL)),
- );
-}
Index: www/sites/all/modules/custom/np_scan_stats/np_scan_stats_mail.module
===================================================================
--- www/sites/all/modules/custom/np_scan_stats/np_scan_stats_mail.module (revision 34222)
+++ www/sites/all/modules/custom/np_scan_stats/np_scan_stats_mail.module (working copy)
@@ -56,19 +56,28 @@
function np_scan_stats_mail_cron() {
$mail_params = array('time' => time(), 'keyword' => array(), 'hashtag' => array(),);
$collections = array('keyword' => 15, 'hashtag' => 10);
+ $query = array(
+ 'scan_id' => 0,
+ );
+ $fields = array(
+ 'word' => 1,
+ 'velocity.mintues' => 1,
+ 'velocity.hours' => 1,
+ 'trending' => 1,
+ );
foreach ($collections as $collection => $default_length) {
- $cursor = scan_api_get_mongo('statistics', $collection); // keyword, hashtag
- if ($cursor) {
+ if ($cursor = scan_api_get_mongo($collection)) { // V2r15 / keyword, hashtag / scan_id=0 / trending:-1
try {
$result = $cursor
- ->find(array('scan_id' => 0))
+ ->find($query, $fields)
->sort(array('trending' => -1))
->limit(variable_get('np_scan_stats_mail_' . $collection . '_list_length', $default_length))
->timeout(scan_api_get_mongo_timeout());
foreach($result as $document) {
$mail_params[$collection][] = array(
'word' => $document['word'],
- 'velocity' => $document['minutes']['velocity']? $document['minutes']['velocity'] : $document['hours']['velocity'],
+ // @@@ V2 This fallback is kinda stupid.
+ 'velocity' => $document['velocity']['minutes'] ? $document['velocity']['minutes'] : $document['velocity']['hours'],
'trending' => $document['trending'],
);
}
Index: www/sites/all/modules/custom/np_scan_stats/np_scan_stats.cron.inc
===================================================================
--- www/sites/all/modules/custom/np_scan_stats/np_scan_stats.cron.inc (revision 34222)
+++ www/sites/all/modules/custom/np_scan_stats/np_scan_stats.cron.inc (working copy)
@@ -1,264 +0,0 @@
- 1, 'biday' => 2,) as $timeslice => $multiplier) {
- db_query("
- UPDATE {scan_statistics_aggregated} dst
- INNER JOIN {scan_statistics} src ON dst.scan_id = src.scan_id AND src.start_time = '%s'
- SET dst.velocity_" . $timeslice . " = IF(dst.velocity_" . $timeslice . " > src.count, dst.velocity_" . $timeslice . " - src.count, 0)
- WHERE dst.timestamp = %d
- ", date('Y-m-d H:i:s', $last_hour - $multiplier * 86400), $time + $i);
- }
- $runtimes[] = time() - $start;
- }
- watchdog('cron', 'np_scan_stat cron scan runtimes: ' . implode(', ', $runtimes) . ' (seconds)');
- if ($db_change) {
- db_set_active();
- }
-}
-
-/**
- * Maintain keyword/hashtag aggregated statistics table
- */
-function np_scan_stats_cron_word($table_type, $last_hour = 0) {
- if ($table_type != 'keyword' && $table_type != 'hashtag') {
- return;
- }
- $db_change = FALSE;
- if (!$last_hour) {
- $last_hour = mktime(date('H') - 1, 0, 0);
- $db_change = TRUE;
- db_set_active('scan');
- }
- // keyword_statistics_aggregated, hashtag_statistics_aggregated
- $max = db_result(db_query('SELECT COUNT(*) FROM {' . $table_type . '_statistics_aggregated}'));
- $run_length = variable_get('np_scan_stats_runlength_' . $table_type, ($table_type == 'keyword' ? 25000 : 100000));
- $runs = ceil($max / $run_length);
- $time = time();
- $runtimes = array();
- for ($i = 0; $i < $runs; $i++) {
- $start = time();
- // flag the rows that needs to be updated (cant use limit with joined tables in update)
- // keyword_statistics_aggregated, hashtag_statistics_aggregated
- db_query("
- UPDATE {" . $table_type . "_statistics_aggregated}
- SET timestamp = %d
- WHERE timestamp < %d
- LIMIT %d
- ", $time + $i, $time, $run_length);
- // last hour + age
- // keyword_statistics_aggregated, hashtag_statistics_aggregated, keyword_statistics, hashtag_statistics
- db_query("
- UPDATE {" . $table_type . "_statistics_aggregated} dst
- LEFT JOIN {" . $table_type . "_statistics} src ON dst.scan_id = src.scan_id AND dst.word = src.word AND src.start_time = '%s'
- SET dst.velocity_recent = IFNULL(src.count, 0), dst.age = dst.age + 1
- WHERE dst.timestamp = %d
- ", date('Y-m-d H:i:s', $last_hour), $time + $i);
- // remove out of interval ones
- foreach (array('6hour' => 6,) as $timeslice => $multiplier) {
- // keyword_statistics_aggregated, hashtag_statistics_aggregated, keyword_statistics, hashtag_statistics
- db_query("
- UPDATE {" . $table_type . "_statistics_aggregated} dst
- INNER JOIN {" . $table_type . "_statistics} src ON dst.scan_id = src.scan_id AND src.word = dst.word AND src.start_time = '%s'
- SET dst.velocity_" . $timeslice . " = IF(dst.velocity_" . $timeslice . " > src.count, dst.velocity_" . $timeslice . " - src.count, 0)
- WHERE dst.timestamp = %d
- ", date('Y-m-d H:i:s', $last_hour - $multiplier * 3600), $time + $i);
- }
- // trending
- // velocity_6hour / (velocity_month / 30 days)
- // keyword_statistics_aggregated, hashtag_statistics_aggregated
- db_query("
- UPDATE {" . $table_type . "_statistics_aggregated}
- SET trending = IF(velocity_6hour < 18 OR velocity_month = 0 OR age = 0, 0, (velocity_6hour/6) / ( velocity_month / IF(age > 744, 744, age) ))
- WHERE timestamp = %d
- ", $time + $i);
- $runtimes[] = time() - $start;
- }
- watchdog('cron', 'np_scan_stat cron ' . $table_type . ' runtimes: ' . implode(', ', $runtimes) . ' (seconds)');
- if ($db_change) {
- db_set_active();
- }
-}
-
-/**
- * Maintain location aggregated statistics table
- */
-function np_scan_stats_cron_location($last_hour = 0) {
- $db_change = FALSE;
- if (!$last_hour) {
- $last_hour = mktime(date('H') - 1, 0, 0);
- $db_change = TRUE;
- db_set_active('scan');
- }
- // location_statistics_aggregated
- $max = db_result(db_query('SELECT COUNT(*) FROM {location_statistics_aggregated}'));
- $run_length = variable_get('np_scan_stats_runlength_location', 100000);
- $runs = ceil($max / $run_length);
- $time = time();
- $runtimes = array();
- for ($i = 0; $i < $runs; $i++) {
- $start = time();
- // flag the rows that needs to be updated (cant use limit with joined tables in update)
- // keyword_statistics_aggregated, hashtag_statistics_aggregated
- db_query("
- UPDATE {location_statistics_aggregated}
- SET timestamp = %d
- WHERE timestamp < %d
- LIMIT %d
- ", $time + $i, $time, $run_length);
- // last hour + age
- // location_statistics_aggregated, location_statistics
- db_query("
- UPDATE {location_statistics_aggregated} dst
- LEFT JOIN {location_statistics} src ON dst.scan_id = src.scan_id AND dst.location_id = src.location_id AND src.start_time = '%s'
- SET dst.velocity_recent = IFNULL(src.count, 0), dst.age = dst.age + 1
- WHERE dst.timestamp = %d
- ", date('Y-m-d H:i:s', $last_hour), $time + $i);
- // remove out of interval ones
- foreach (array('6hour' => 6, 'biday' => 48,) as $timeslice => $multiplier) {
- // location_statistics_aggregated, location_statistics
- db_query("
- UPDATE {location_statistics_aggregated} dst
- INNER JOIN {location_statistics} src ON dst.scan_id = src.scan_id AND src.location_id = dst.location_id AND src.start_time = '%s'
- SET dst.velocity_" . $timeslice . " = IF(dst.velocity_" . $timeslice . " > src.count, dst.velocity_" . $timeslice . " - src.count, 0)
- WHERE dst.timestamp = %d
- ", date('Y-m-d H:i:s', $last_hour - $multiplier * 3600), $time + $i);
- }
- // trending
- // velocity_6hour / (velocity_month / 30 days)
- // location_statistics_aggregated
- db_query("
- UPDATE {location_statistics_aggregated}
- SET trending = IF(velocity_6hour < 18 OR velocity_month = 0 OR age = 0, 0, (velocity_6hour/6) / ( velocity_month / IF(age > 744, 744, age) ))
- WHERE timestamp = %d
- ", $time + $i);
- $runtimes[] = time() - $start;
- }
- watchdog('cron', 'np_scan_stat cron location runtimes: ' . implode(', ', $runtimes) . ' (seconds)');
- if ($db_change) {
- db_set_active();
- }
-}
-
-/**
- * Maintain url aggregated statistics table
- */
-function np_scan_stats_cron_url($last_hour = 0) {
- $db_change = FALSE;
- if (!$last_hour) {
- $last_hour = mktime(date('H') - 1, 0, 0);
- $db_change = TRUE;
- db_set_active('scan');
- }
- $max = db_result(db_query('SELECT COUNT(*) FROM {url_statistics_aggregated}'));
- $run_length = variable_get('np_scan_stats_runlength_url', 100000);
- $runs = ceil($max / $run_length);
- $time = time();
- $runtimes = array();
- for ($i = 0; $i < $runs; $i++) {
- $start = time();
- // flag the rows that needs to be updated (cant use limit with joined tables in update)
- db_query("
- UPDATE {url_statistics_aggregated}
- SET timestamp = %d
- WHERE timestamp < %d
- LIMIT %d
- ", $time + $i, $time, $run_length);
- // remove out of interval ones
- db_query("
- UPDATE {url_statistics_aggregated} dst
- INNER JOIN {url_statistics} src ON dst.scan_id = src.scan_id AND src.url_id = dst.url_id AND src.start_time = '%s'
- SET dst.velocity = IF(dst.velocity > src.count, dst.velocity - src.count, 0), dst.cleanup = IF(dst.velocity > src.count, 0, 1)
- WHERE dst.timestamp = %d
- ", date('Y-m-d H:i:s', $last_hour - 6 * 3600), $time + $i);
- $runtimes[] = time() - $start;
- }
- watchdog('cron', 'np_scan_stat cron url runtimes: ' . implode(', ', $runtimes) . ' (seconds)');
- // cleanup
- db_query("DELETE FROM {url_statistics_aggregated} WHERE cleanup = 1");
- if ($db_change) {
- db_set_active();
- }
-}
-
-/**
- * Maintain Uniq url aggregated statistics table
- */
-function np_scan_stats_cron_url_uniq($last_hour = 0) {
- $db_change = FALSE;
- if (!$last_hour) {
- $last_hour = mktime(date('H') - 1, 0, 0);
- $db_change = TRUE;
- db_set_active('scan');
- }
- $max = db_result(db_query('SELECT COUNT(*) FROM {url_uniq_statistics_aggregated}'));
- $run_length = variable_get('np_scan_stats_runlength_url_unique', 100000);
- $runs = ceil($max / $run_length);
- $time = time();
- $runtimes = array();
- for ($i = 0; $i < $runs; $i++) {
- $start = time();
- // flag the rows that needs to be updated (cant use limit with joined tables in update)
- db_query("
- UPDATE {url_uniq_statistics_aggregated}
- SET timestamp = %d
- WHERE timestamp < %d
- LIMIT %d
- ", $time + $i, $time, $run_length);
- // remove out of interval ones
- foreach (array('day' => 1, 'biday' => 2,) as $timeslice => $multiplier) {
- db_query("
- UPDATE {url_uniq_statistics_aggregated} dst
- INNER JOIN {url_uniq_statistics} src ON dst.scan_id = src.scan_id AND src.start_time = '%s'
- SET dst.velocity_" . $timeslice . " = IF(dst.velocity_" . $timeslice . " > src.general + src.photo + src.video, dst.velocity_" . $timeslice . " - src.general - src.photo - src.video, 0),
- dst.general_" . $timeslice . " = IF(dst.general_" . $timeslice . " > src.general, dst.general_" . $timeslice . " - src.general, 0),
- dst.photo_" . $timeslice . " = IF(dst.photo_" . $timeslice . " > src.photo, dst.photo_" . $timeslice . " - src.photo, 0),
- dst.video_" . $timeslice . " = IF(dst.video_" . $timeslice . " > src.video, dst.video_" . $timeslice . " - src.video, 0)
- WHERE dst.timestamp = %d
- ", date('Y-m-d H:i:s', $last_hour - $multiplier * 86400), $time + $i);
- }
- $runtimes[] = time() - $start;
- }
- watchdog('cron', 'np_scan_stat cron uniq url runtimes: ' . implode(', ', $runtimes) . ' (seconds)');
- if ($db_change) {
- db_set_active();
- }
-}
Index: www/sites/all/modules/custom/np_scan_stats/np_scan_stats.proto.css
===================================================================
--- www/sites/all/modules/custom/np_scan_stats/np_scan_stats.proto.css (revision 34222)
+++ www/sites/all/modules/custom/np_scan_stats/np_scan_stats.proto.css (working copy)
@@ -1,30 +0,0 @@
-#proto-page th.title { width: 99% }
-#proto-page th.word { min-width: 100px; max-width: 100px }
-#proto-page th.location { min-width: 99%; max-width: 100px }
-#proto-page th.url-link { min-width: 200px; max-width: 200px }
-#proto-page th.url-title { min-width: 50% }
-#proto-page th.id { min-width: 75px; max-width: 75px }
-#proto-page th.updated { min-width: 100px; max-width: 100px }
-#proto-page th.created { min-width: 50px; max-width: 50px }
-#proto-page th.trending { min-width: 75px; max-width: 75px }
-#proto-page th.velocity { min-width: 75px; max-width: 75px }
-#proto-page th.prev_velocity { min-width: 75px; max-width: 75px }
-#proto-page th.general { min-width: 75px; max-width: 75px }
-#proto-page th.video { min-width: 75px; max-width: 75px }
-#proto-page th.photo { min-width: 75px; max-width: 75px }
-#proto-page th.category { min-width: 75px; max-width: 75px }
-#proto-page th.count { min-width: 75px; max-width: 75px }
-#proto-page th.source { min-width: 75px; max-width: 75px }
-#proto-page h3 { padding: 10px 0 0 0; text-align: center}
-#proto-page h4 { text-align: center }
-#proto-page hr { padding: 10px 0 10px 0; clear: both }
-#proto-page div#toc { line-height: 100% }
-#proto-page div#toc > div.item-list > ul > li { float: left }
-#proto-page div#option-form { clear: both }
-#proto-page div#option-form form { margin-bottom: 0 }
-#proto-page div#option-form div, #proto-page div#option-form > div, #proto-page div#option-form label { display: inline }
-#proto-page div#help { clear: both; padding: 5px 0 0 0; line-height: 100%; font-size: 90% }
-#proto-page div.scan-list { float: right; clear: both }
-#proto-page div.scan-list-keyword,
-#proto-page div.scan-list-hashtag,
-#proto-page div.scan-list-retweet { float: right; clear: none }
Index: www/sites/all/modules/custom/np_scan_stats/np_scan_stats.info
===================================================================
--- www/sites/all/modules/custom/np_scan_stats/np_scan_stats.info (revision 34222)
+++ www/sites/all/modules/custom/np_scan_stats/np_scan_stats.info (working copy)
@@ -1,6 +0,0 @@
-; $Id: $
-name = Np Scan Stats
-description = Scan Stats Cron / Admin functions
-dependencies[] = scan_api
-package = NP
-core = 6.x
Index: www/sites/all/modules/custom/np_scan_stats/np_scan_stats.proto.inc
===================================================================
--- www/sites/all/modules/custom/np_scan_stats/np_scan_stats.proto.inc (revision 34222)
+++ www/sites/all/modules/custom/np_scan_stats/np_scan_stats.proto.inc (working copy)
@@ -1,985 +0,0 @@
- array());
- if (isset($_GET['limit'])) {
- $options['query']['limit'] = $_GET['limit'];
- }
- if (isset($_GET['fields'])) {
- $options['query']['fields'] = $_GET['fields'];
- }
-
- $current_url = 'admin/reports/scan_stats/';
- $toc = array(
- array(
- 'data' => l('Scans', $current_url . 'scan', array_merge($options, array('fragment' => 'scan'))),
- 'children' => array(
- l('Most active online scans', $current_url . 'scan', array_merge($options, array('fragment' => 'scan_online'))),
- l('Most active offline scans', $current_url . 'scan', array_merge($options, array('fragment' => 'scan_offline'))),
- l('Most active scans by link', $current_url . 'scan', array_merge($options, array('fragment' => 'scan_bylink'))),
- l('Top trending scans', $current_url . 'scan', array_merge($options, array('fragment' => 'scan_trending'))),
- )
- ),
- array(
- 'data' => l('Keywords', $current_url . 'keyword', array_merge($options, array('fragment' => 'keyword'))),
- 'children' => array(
- l('Most mentioned keywords', $current_url . 'keyword', array_merge($options, array('fragment' => 'keyword_velocity'))),
- l('Top trending keywords', $current_url . 'keyword', array_merge($options, array('fragment' => 'keyword_trending'))),
- ),
- ),
- array(
- 'data' => l('Hashtags', $current_url . 'hashtag', array_merge($options, array('fragment' => 'hashtag'))),
- 'children' => array(
- l('Most mentioned hashtags', $current_url . 'hashtag', array_merge($options, array('fragment' => 'hashtag_velocity'))),
- l('Top trending hashtags', $current_url . 'hashtag', array_merge($options, array('fragment' => 'hashtag_trending'))),
- ),
- ),
-/* array(
- 'data' => l('Retweets', $current_url . 'retweet', array_merge($options, array('fragment' => 'retweet'))),
- 'children' => array(
- l('Most mentioned retweets', $current_url . 'retweet', array_merge($options, array('fragment' => 'retweet_velocity'))),
- l('Top trending retweets', $current_url . 'retweet', array_merge($options, array('fragment' => 'retweet_trending'))),
- ),
- ), */
- array(
- 'data' => l('Links', $current_url . 'link', array_merge($options, array('fragment' => 'link'))),
- 'children' => array(
- l('Top links', $current_url . 'link', array_merge($options, array('fragment' => 'link_general'))),
- l('Top photos', $current_url . 'link', array_merge($options, array('fragment' => 'link_photo'))),
- l('Top videos', $current_url . 'link', array_merge($options, array('fragment' => 'link_video'))),
- l('Top links by count', $current_url . 'link', array_merge($options, array('fragment' => 'link_count_general'))),
- l('Top photos by count', $current_url . 'link', array_merge($options, array('fragment' => 'link_count_photo'))),
- l('Top videos by count', $current_url . 'link', array_merge($options, array('fragment' => 'link_count_video'))),
- ),
- ),
- array(
- 'data' => l('Locations', $current_url . 'location', array_merge($options, array('fragment' => 'location'))),
- 'children' => array(
- l('Most used locations', $current_url . 'location', array_merge($options, array('fragment' => 'location_velocity'))),
- l('Top trending locations', $current_url . 'location', array_merge($options, array('fragment' => 'location_trending'))),
- ),
- ),
- );
- $out = '
' . theme('item_list', $toc) . '
';
-
- if (!empty($heading)) {
- $out .= '' . drupal_get_form('np_scan_stats_proto_options_form') . '
';
- }
-
- $help = array();
- $help[] = t('Trending measures current momentum usng the normalized ratio of the current velocity (5 minute increments) to the long term velocity (30 day).');
- $help[] .= t('See Mongo Schema for more help.', array('@wiki-url' => url('https://apps.d2.nowpublic.com/trac/wiki/Scan/MongoSchema', array('absolute' => TRUE))));
- $out .= '' . theme('item_list', $help) . '
';
-
- $query_active_and_online = array('scan_id' => array('$gt' => 0), 'scan.status' => 0, 'scan.active' => 1);
- $query_no_scan = array('scan_id' => 0);
- $query_scan = array('scan_id' => array('$gt' => 0));
-
- if ($heading == 'scan') {
- if (empty($detail)) {
- $out .= '
' . t('Scans') . '
';
- $out .= '' . t('Most active online scans') . '
';
- $out .= _np_scan_stats_scan_top_velocity($query_active_and_online);
- $out .= '' . t('Most active offline scans') . '
';
- $out .= _np_scan_stats_scan_top_velocity(array('scan.status' => 1, 'scan.active' => 1));
- $out .= '' . t('Most active scans by link') . '
';
- $out .= _np_scan_stats_top_unique_links($query_active_and_online);
- $out .= '' . t('Top trending scans') . '
';
- $out .= _np_scan_stats_word_top_sort('scan', array('trending' => -1), $query_scan);
- }
- else {
- $out .= '
';
- $out .= _np_scan_stats_scan_details_header($detail);
- $out .= _np_scan_stats_scan_title($detail, false);
- $out .= _np_scan_stats_scan_details($detail);
- }
- }
- else if ($heading == 'keyword') {
- if (empty($detail)) {
- $out .= '
' . t('Keywords') . '
';
- $out .= '' . t('Most mentioned keywords') . '
';
- $out .= _np_scan_stats_word_top_velocity('keyword', $query_no_scan);
- $out .= '' . t('Top trending keywords') . '
';
- $out .= _np_scan_stats_word_top_sort('keyword', array('trending' => -1), $query_no_scan);
- }
- else {
- $out .= '
';
- $out .= _np_scan_stats_word_details_header($heading, $detail);
- $out .= _np_scan_stats_scan_title($scan_id);
- $out .= '' . t('Keyword: %word', array('%word' => $detail)) . '
';
- $out .= _np_scan_stats_word_details('keyword', $detail, $scan_id);
- }
- }
- else if ($heading == 'hashtag') {
- if (empty($detail)) {
- $out .= '
' . t('Hashtag') . '
';
- $out .= '' . t('Most mentioned hashtags') . '
';
- $out .= _np_scan_stats_word_top_velocity('hashtag', $query_no_scan);
- $out .= '' . t('Top trending hashtags') . '
';
- $out .= _np_scan_stats_word_top_sort('hashtag', array('trending' => -1), $query_no_scan);
- }
- else {
- $out .= '
';
- $out .= _np_scan_stats_word_details_header($heading, $detail);
- $out .= _np_scan_stats_scan_title($scan_id);
- $out .= '' . t('Hashtag: #%word', array('%word' => $detail)) . '
';
- $out .= _np_scan_stats_word_details('hashtag', $detail, $scan_id);
- }
- }
- else if ($heading == 'retweet') {
-/* if (empty($detail)) {
- $out .= '
' . t('Retweet') . '
';
- $out .= '' . t('Most mentioned retweets') . '
';
- $out .= _np_scan_stats_word_top_velocity('retweet', $query_no_scan);
- $out .= '' . t('Top trending retweets') . '
';
- $out .= _np_scan_stats_word_top_sort('retweet', array('trending' => -1), $query_no_scan);
- }
- else { */
- $out .= '
';
- $out .= _np_scan_stats_word_details_header($heading, $detail);
- $out .= _np_scan_stats_scan_title($scan_id);
- $out .= '' . t('Retweet: %word', array('%word' => $detail)) . '
';
- $out .= _np_scan_stats_word_details('retweet', $detail, $scan_id);
-// }
- }
- else if ($heading == 'location') {
- if (empty($detail)) {
- $out .= '
' . t('Locations') . '
';
- $out .= '' . t('Most used locations') . '
';
- $out .= _np_scan_stats_word_top_velocity('location', $query_no_scan);
- $out .= '' . t('Top trending locations') . '
';
- $out .= _np_scan_stats_word_top_sort('location', array('trending' => -1), $query_no_scan);
- }
- else {
- $name = db_result(db_query("SELECT name FROM {geonames} WHERE geonameid = %d", $detail));
- $out .= '
';
- $out .= _np_scan_stats_word_details_header($heading, intval($detail));
- $out .= _np_scan_stats_scan_title($scan_id);
- $out .= '' . t('Location: %location_id: %title', array('%location_id' => $detail, '%title' => $name)) . '
';
- $out .= _np_scan_stats_location_details($detail, $scan_id);
- }
- }
- else if ($heading == 'link') {
- $out .= '
' . t('Links') . '
';
-
- $categories = array(
- "general" => 1,
- "photo" => 2,
- "video" => 3,
- );
- foreach ($categories as $category_name => $category_id) {
- $out .= '' . t('Top %category links by velocity', array('%category' => $category_name)) . '
';
- $out .= _np_scan_stats_top_links(array('scan_id' => 0, 'category' => $category_id), array('hours.velocity' => -1));
- $out .= '' . t('Top %category links by count', array('%category' => $category_name)) . '
';
- $out .= _np_scan_stats_top_links(array('scan_id' => 0, 'category' => $category_id), array('count' => -1));
- }
- }
- else if ($heading == 'url' && !empty($detail)) {
- $title = db_result(db_query("SELECT title FROM {urls} WHERE id = %d", $detail));
- $out .= '
';
-// $out .= _np_scan_stats_word_details_header($heading, intval($detail));
- $out .= _np_scan_stats_scan_title($scan_id);
- $out .= '' . t('Url: %url_id: %title', array('%url_id' => $detail, '%title' => $title)) . '
';
- $out .= _np_scan_stats_url_details($detail, $scan_id);
- }
-
- db_set_active();
- return '' . $out . '
';
-}
-
-function _np_scan_stats_scan_title($scan_id, $link = true) {
- if ($scan_id) {
- $title = db_result(db_query("SELECT title FROM {scan} INNER JOIN {node_revisions} using(vid) WHERE scan_id = %d", $scan_id));
- $out = t('Scan: %scan_id: %title', array('%scan_id' => $scan_id, '%title' => $title));
- return '' . ($link ? l($out, 'admin/reports/scan_stats/scan/' . $scan_id, array('html' => 1)) : $out) . '
';
- }
-}
-
-function _np_scan_stats_scan_top_velocity($query = array()) {
- $output = array();
- if (!($cursor = scan_api_get_mongo('scan_stats', 'scan'))) {
- _np_scan_stats_mongo_error();
- }
- else {
- $buckets = array(
- 'hour' => array('bucket' => 'minutes', 'velocity' => 'velocity', 'prev_velocity' => 1),
- 'day' => array('bucket' => 'hours', 'velocity' => 'velocity'),
- 'biday' => array('bucket' => 'hours', 'velocity' => 'velocity48', 'prev_velocity' => 1),
- 'week' => array('bucket' => 'days', 'velocity' => 'velocity7'),
- 'month' => array('bucket' => 'days', 'velocity' => 'velocity', 'prev_velocity' => 1),
- );
- $caption = array();
- foreach ($buckets as $timeslice => $mongo) {
- $output[$timeslice] = array();
- $bucket = $mongo['bucket'];
- $velocity_field = $bucket . '.' . $mongo['velocity'];
- $fields = array('scan_id' => 1, $velocity_field => 1, 'trending' => 1, 'updated' => 1, 'created' => 1);
- if (isset($mongo['prev_velocity'])) {
- $fields[$bucket . '.prev_velocity'] = 1;
- }
- $bucket_query = $query;
- $bucket_query[$velocity_field] = array('$gt' => 0);
- $hint = array($velocity_field => -1);
- $sort = array($velocity_field => -1);
- $limit = isset($_GET['limit']) ? $_GET['limit'] : LIMIT;
- $caption[$timeslice] = theme('mongo_query', 'scan', $bucket_query, $fields, $sort, $limit);
- try {
- $results = $cursor->find($bucket_query, $fields)
- ->sort($sort)
- ->limit($limit)
- ->hint($hint)
- ->timeout(scan_api_get_mongo_timeout())
- ;
- foreach ($results as $row) {
- $title = db_result(db_query("SELECT title FROM {scan} INNER JOIN {node_revisions} using(vid) WHERE scan_id = %d", $row['scan_id']));
- $output[$timeslice][] = array(
- l($row['scan_id'], 'admin/reports/scan_stats/scan/'. $row['scan_id']),
- theme('mongo_short_datetime', $row['created']),
- theme('mongo_datetime', $row['updated']),
- $title ? l($title, 'admin/reports/scan_stats/scan/' . $row['scan_id']) : '-',
- isset($row['trending']) ? round($row['trending'], STATS_TRENDING_PRECISION) : '-',
- round($row[$bucket][$mongo['velocity']], STATS_VELOCITY_PRECISION),
- isset($mongo['prev_velocity']) ? round($row[$bucket]['prev_velocity'], STATS_VELOCITY_PRECISION) : '-',
- );
- }
- }
- catch (MongoCursorTimeoutException $e) {
- _np_scan_stats_mongo_timeout_error();
- }
- }
- }
-
- $headers = array(
- array('data' => t('id'), 'class' => 'id'),
- array('data' => t('Created'), 'class' => 'created'),
- array('data' => t('Updated'), 'class' => 'updated'),
- array('data' => t('Title'), 'class' => 'title'),
- array('data' => t('Trending'), 'class' => 'trending'),
- array('data' => t('Velocity'), 'class' => 'velocity'),
- array('data' => t('Prev'), 'class' => 'prev_velocity'),
- );
-
- $out = '';
- $out .= '' . t('Last 60 Minutes') . '
';
- $out .= theme('table', $headers, $output['hour'], array(), $caption['hour']);
-
- $out .= '' . t('Last 24 Hours') . '
';
- $out .= theme('table', $headers, $output['day'], array(), $caption['day']);
-
- $out .= '' . t('Last 48 Hours') . '
';
- $out .= theme('table', $headers, $output['biday'], array(), $caption['biday']);
-
- $out .= '' . t('Last 7 Days') . '
';
- $out .= theme('table', $headers, $output['week'], array(), $caption['week']);
-
- $out .= '' . t('Last 30 Days') . '
';
- $out .= theme('table', $headers, $output['month'], array(), $caption['month']);
-
- return $out;
-}
-
-function _np_scan_stats_top_unique_links($query = array()) {
- if (!($cursor = scan_api_get_mongo('scan_stats', 'scanurl'))) {
- _np_scan_stats_mongo_error();
- }
- else {
- // most active scans by number of unique links
- $buckets = array(
- 'day' => array('bucket' => 'hours', 'velocity' => 'velocity24'),
- 'biday' => array('bucket' => 'hours', 'velocity' => 'velocity'),
- 'week' => array('bucket' => 'days', 'velocity' => 'velocity7'),
- 'month' => array('bucket' => 'days', 'velocity' => 'velocity'),
- );
- $output = array();
- $caption = array();
- foreach ($buckets as $timeslice => $mongo) {
- $output[$timeslice] = array();
- $bucket = $mongo['bucket'];
- $velocity_field = $bucket . '.' . $mongo['velocity'];
- $fields = array(
- 'scan_id' => 1,
- $velocity_field => 1,
- 'updated' => 1,
- 'created' => 1,
- $bucket . '.general.' . $mongo['velocity'] => 1,
- $bucket . '.video.' . $mongo['velocity'] => 1,
- $bucket . '.photo.' . $mongo['velocity'] => 1,
- );
- $bucket_query = $query;
- $bucket_query[$velocity_field] = array('$gt' => 0);
- $sort = array($velocity_field => -1);
- $hint = array('scan_id' => 1, $velocity_field => -1);
- $limit = isset($_GET['limit']) ? $_GET['limit'] : LIMIT;
- $caption[$timeslice] = theme('mongo_query', 'scanurl', $bucket_query, $fields, $sort, $limit);
- try {
- $results = $cursor
- ->find($bucket_query, $fields)
- ->sort($sort)
- ->limit($limit)
- ->hint($hint)
- ->timeout(scan_api_get_mongo_timeout())
- ;
- foreach ($results as $row) {
- $title = db_result(db_query("SELECT title FROM {scan} INNER JOIN {node_revisions} using(vid) WHERE scan_id = %d", $row['scan_id']));
- $output[$timeslice][] = array(
- l($row['scan_id'], 'admin/reports/scan_stats/scan/' . $row['scan_id']),
- theme('mongo_short_datetime', $row['created']),
- theme('mongo_datetime', $row['updated']),
- $title ? l($title, 'admin/reports/scan_stats/scan/' . $row['scan_id']) : '-',
- round($row[$bucket][$mongo['velocity']], STATS_VELOCITY_PRECISION),
- isset($row[$bucket]['general'][$mongo['velocity']]) ? round($row[$bucket]['general'][$mongo['velocity']], STATS_VELOCITY_PRECISION) : '-',
- isset($row[$bucket]['video'][$mongo['velocity']]) ? round($row[$bucket]['video'][$mongo['velocity']], STATS_VELOCITY_PRECISION) : '-',
- isset($row[$bucket]['photo'][$mongo['velocity']]) ? round($row[$bucket]['photo'][$mongo['velocity']], STATS_VELOCITY_PRECISION) : '-',
- );
- }
- }
- catch (MongoCursorTimeoutException $e) {
- _np_scan_stats_mongo_timeout_error();
- return;
- }
- }
- }
-
- $headers = array(
- array('data' => t('id'), 'class' => 'id'),
- array('data' => t('Created'), 'class' => 'created'),
- array('data' => t('Updated'), 'class' => 'updated'),
- array('data' => t('Title'), 'class' => 'title'),
- array('data' => t('Velocity'), 'class' => 'velocity'),
- array('data' => t('General'), 'class' => 'general'),
- array('data' => t('Video'), 'class' => 'video'),
- array('data' => t('Photo'), 'class' => 'photo'),
- );
-
- $out = '';
- $out .= '' . t('Last 24 Hours') . '
';
- $out .= theme('table', $headers, $output['day'], array(), $caption['day']);
-
- $out .= '' . t('Last 48 Hours') . '
';
- $out .= theme('table', $headers, $output['biday'], array(), $caption['biday']);
-
- $out .= '' . t('Last 7 Days') . '
';
- $out .= theme('table', $headers, $output['week'], array(), $caption['week']);
-
- $out .= '' . t('Last 30 Days') . '
';
- $out .= theme('table', $headers, $output['month'], array(), $caption['month']);
-
- return $out;
-}
-
-function _np_scan_stats_top_links($query = array(), $sort = array()) {
- $output = array();
- if (!($cursor = scan_api_get_mongo('scan_stats', 'url'))) {
- _np_scan_stats_mongo_error();
- $caption = theme('mongo_query', 'url', array(), array(), array(), LIMIT);
- }
- else {
- $fields = array(
- 'category' => 1,
- 'url_id' => 1,
- 'hours.velocity' => 1,
- 'count' => 1,
- 'updated' => 1,
- 'created' => 1,
- );
- if (isset($sort['hours.velocity'])) {
- $query['hours.velocity'] = array('$gt' => 0);
- $hint = array('scan_id' => 1, 'category' => 1, 'hours.velocity' => -1);
- }
- else {
- $hint = array('scan_id' => 1, 'category' => 1, 'count' => -1);
- }
- // Add category to query so we can use the index
- if (!isset($query['category'])) {
- $query['category'] = array('$in' => array(1, 2, 3));
- }
- $limit = isset($_GET['limit']) ? $_GET['limit'] : LIMIT;
- try {
- $results = $cursor->find($query, $fields)
- ->sort($sort)
- ->limit($limit)
- ->hint($hint)
- ->timeout(scan_api_get_mongo_timeout())
- ;
- $caption = theme('mongo_query', 'url', $query, $fields, $sort, $limit);
- foreach ($results as $row) {
- db_set_active('misc');
- $url = db_fetch_array(db_query("
- SELECT u.title, u.resolved,
- si.thumb_path,
- se.domain, se.video_id
- FROM {urls} u
- LEFT JOIN {scan_images} si ON si.url_id = u.id
- LEFT JOIN {scan_embeds} se ON se.url_id = u.id
- WHERE u.id = %d
- ", $row['url_id']));
- $title = empty($url['title']) ? '-' : $url['title'];
- if ($row['category'] == 2 && !empty($url['thumb_path'])) {
- $path = basename($url['thumb_path']);
- $full_path = 'http://media.scan.nowpublic.com/'. substr($path, 0, 1) . '/' . substr($path, 1, 1) . substr($path, 2, 1) . '/'. $path;
- $title = theme('image', $full_path, $title, $title, NULL, FALSE);
- }
- elseif ($row['category'] == 3 && !empty($url['video_id'])) {
- $title = _api_embed_code($url['domain'], $url['video_id'], 280, 200);
- }
- if (!empty($url['resolved'])) {
- $url['host'] = parse_url($url['resolved'], PHP_URL_HOST);
- }
- db_set_active();
- $output[] = array(
- l($row['url_id'], 'admin/reports/scan_stats/url/' . $row['url_id']),
- theme('mongo_short_datetime', $row['created']),
- theme('mongo_datetime', $row['updated']),
- $title,
- empty($url['resolved']) ? '-' : l($url['host'], $url['resolved']),
- round($row['hours']['velocity'], STATS_VELOCITY_PRECISION),
- $row['count'],
- );
- }
- }
- catch (MongoCursorTimeoutException $e) {
- _np_scan_stats_mongo_timeout_error();
- }
- }
-
- $headers = array(
- array('data' => t('Id'), 'class' => 'id'),
- array('data' => t('Created'), 'class' => 'created'),
- array('data' => t('Updated'), 'class' => 'updated'),
- array('data' => t('Title'), 'class' => 'url-title'),
- array('data' => t('Link'), 'class' => 'url-link'),
- array('data' => t('Velocity'), 'class' => 'velocity'),
- array('data' => t('Count'), 'class' => 'count'),
- );
-
- return theme('table', $headers, $output, array(), $caption);
-}
-
-function _np_scan_stats_scan_details($scan_id) {
- if ($cursor = scan_api_get_mongo('scan_stats', 'scan')) {
- $query = array('scan_id' => intval($scan_id));
- try {
- $results = $cursor
- ->find($query)
- ->timeout(scan_api_get_mongo_timeout())
- ;
- return theme('mongo_scan_details', _np_scan_stats_get_record('scan', $results));
- }
- catch (MongoCursorTimeoutException $e) {
- _np_scan_stats_mongo_timeout_error();
- }
- }
- else {
- _np_scan_stats_mongo_error();
- }
-}
-
-function _np_scan_stats_word_details_header($collection, $value) {
- if (!($cursor = scan_api_get_mongo('scan_stats', $collection))) { // keyword, hashtag, keyword, location, (url -- disabled)
- _np_scan_stats_mongo_error();
- return;
- }
- $collections = array(
- 'keyword' => 'word',
- 'hashtag' => 'word',
- 'retweet' => 'word',
- 'location' => 'location_id',
- 'url' => 'url_id',
- );
- $query = array($collections[$collection] => $value, 'scan_id' => array('$gt' => 0));
- $hint = array('scan_id' => 1, $collections[$collection] => 1, 'hours.velocity' => -1);
- $limit = isset($_GET['limit']) ? $_GET['limit'] : LIMIT;
- try {
- $results = $cursor
- ->find($query, array('scan_id' => 1, 'hours.velocity' => 1))
- ->sort(array('hours.velocity' => -1))
- ->hint($hint)
- ->limit($limit)
- ->timeout(scan_api_get_mongo_timeout())
- ;
- $rows = array();
- foreach ($results as $row) {
- $title = db_result(db_query("SELECT title FROM {scan} INNER JOIN {node_revisions} using(vid) WHERE scan_id = %d", $row['scan_id']));
- if (strlen($title) > 40) {
- $title = substr($title, 0, 40) . "...";
- }
- $rows[] = round($row['hours']['velocity'], STATS_VELOCITY_PRECISION) . ': ' . l($title ? $title : $row['scan_id'], 'admin/reports/scan_stats/scan/' . $row['scan_id']);
- }
- }
- catch (MongoCursorTimeoutException $e) {
- _np_scan_stats_mongo_timeout_error();
- }
- if ($rows) {
- return '' . theme('item_list', $rows, 'scans', 'ol') . '
';
- }
-}
-
-function _np_scan_stats_scan_details_header($scan_id) {
- $query = array('scan_id' => intval($scan_id));
- $collections = array(
- 'keyword' => 'word',
- 'hashtag' => 'word',
- 'retweet' => 'word',
- 'location' => 'location_id',
- 'url' => 'url_id',
- );
- $toc = array();
- $limit = isset($_GET['limit']) ? $_GET['limit'] : LIMIT;
- foreach ($collections as $collection => $field) {
- if ($cursor = scan_api_get_mongo('scan_stats', $collection)) { // keyword, hashtag, retweet, location, url
- try {
- $results = $cursor
- ->find($query, array($field => 1, 'hours.velocity' => 1))
- ->sort(array('hours.velocity' => -1))
- ->limit($limit)
- ->timeout(scan_api_get_mongo_timeout())
- ;
- $rows = array();
- foreach ($results as $row) {
- $title = $row[$field];
- if ($collection == 'location') {
- if ($name = db_result(db_query("SELECT name FROM {geonames} WHERE geonameid = %d", $row[$field]))) {
- $title = $name . ' (' . $row[$field] . ')';
- }
- }
- else if ($collection == 'url') {
- if ($title = db_result(db_query("SELECT title FROM {urls} WHERE id = %d", $row[$field]))) {
- $title = preg_replace('/^YouTube\s+-\s+/', '', $title);
- if (strlen($title) > 40) {
- $title = substr($title, 0, 40) . "...";
- }
- $title .= ' (' . $row[$field] . ')';
- }
- }
- $rows[] = round($row['hours']['velocity'], STATS_VELOCITY_PRECISION) . ': ' . l($title, 'admin/reports/scan_stats/'. $collection . '/' . $row[$field] . '/' . $scan_id);
- }
- }
- catch (MongoCursorTimeoutException $e) {
- _np_scan_stats_mongo_timeout_error();
- }
- }
- else {
- _np_scan_stats_mongo_error();
- }
- if ($rows) {
- return '' . theme('item_list', $rows, $collection . 's', 'ol') . '
';
- }
- }
-}
-
-function _np_scan_stats_word_top_velocity($collection, $query = array()) {
- $output = array('hour' => array(), 'day' => array(), 'month' => array());
- $key = $collection == 'location' ? 'location_id' : 'word';
-
- if (!($cursor = scan_api_get_mongo('scan_stats', $collection))) { // keyword, hashtag, (retweet -- disabled), location
- _np_scan_stats_mongo_error();
- $caption_title = theme('mongo_query', $collection, array(), array(), array(), LIMIT);
- $caption = array('hour' => $caption_title, 'day' => $caption_title, 'month' => $caption_title);
- }
- else {
- $buckets = array(
- 'hour' => array('bucket' => 'minutes', 'velocity' => 'velocity'),
- 'day' => array('bucket' => 'hours', 'velocity' => 'velocity'),
- 'month' => array('bucket' => 'days', 'velocity' => 'velocity'),
- );
- $limit = isset($_GET['limit']) ? $_GET['limit'] : LIMIT;
- foreach ($buckets as $timeslice => $mongo) {
- $bucket = $mongo['bucket'];
- $velocity_field = $bucket . '.' . $mongo['velocity'];
- $bucket_query = $query;
- $bucket_query[$velocity_field] = array('$gt' => 0);
- $hint = array('scan_id' => 1, $velocity_field => -1);
- $sort = array($velocity_field => -1);
- $fields = array($velocity_field => 1, 'trending' => 1, 'created' => 1, 'updated' => 1, $bucket . '.prev_velocity', $key => 1);
- $caption[$timeslice] = theme('mongo_query', $collection, $bucket_query, $fields, $sort, $limit);
- try {
- $results = $cursor
- ->find($bucket_query, $fields)
- ->sort($sort)
- ->limit($limit)
- ->hint($hint)
- ->timeout(scan_api_get_mongo_timeout())
- ;
- foreach ($results as $row) {
- if ($collection == 'location') {
- $name = db_result(db_query("SELECT name FROM {geonames} WHERE geonameid = %d", $row[$key]));
- $key_link = l($row[$key], 'admin/reports/scan_stats/location/' . $row[$key]);
- $key_value = empty($name) ? $key_link : l($name, 'admin/reports/scan_stats/location/' . $row[$key]) . ' (' . $key_link . ')';
- }
- else {
- $key_value = theme('twitter_word', $collection, $row[$key]);
- }
- $output_row = array(
- theme('mongo_short_datetime', $row['created']),
- theme('mongo_datetime', $row['updated']),
- $key_value,
- isset($row['trending']) ? round($row['trending'], STATS_TRENDING_PRECISION) : '-',
- round($row[$bucket][$mongo['velocity']], STATS_VELOCITY_PRECISION),
- isset($mongo['prev_velocity']) ? round($row[$bucket]['prev_velocity'], STATS_VELOCITY_PRECISION) : '-',
- );
- if ($collection != 'location') {
- $whitelist = db_result(db_query("SELECT IFNULL(type, -1) as whitelist FROM {keyword_whitelist} WHERE word = '%s'", $row['word']));
- $output_row[] = $whitelist == -1 ? "NER" : (($whitelist == 5) ? "whitelist-user" : "whitelist");
- }
- $output[$timeslice][] = $output_row;
- }
- }
- catch (MongoCursorTimeoutException $e) {
- _np_scan_stats_mongo_timeout_error();
- }
- }
- }
-
- $headers = array(
- array('data' => t('Created'), 'class' => 'created'),
- array('data' => t('Updated'), 'class' => 'updated'),
- array('data' => $collection == 'location' ? t('Location') : t('Word'), 'class' => $collection == 'location' ? 'location' : 'word'),
- array('data' => t('Trending'), 'class' => 'trending'),
- array('data' => t('Velocity'), 'class' => 'velocity'),
- array('data' => t('Prev'), 'class' => 'prev_velocity'),
- );
- if ($collection != 'location') {
- $headers[] = array('data' => t('Source'), 'class' => 'source');
- }
-
- $out = '';
- $out .= '' . t('Last 60 Minutes') . '
';
- $out .= theme('table', $headers, $output['hour'], array(), $caption['hour']);
-
- $out .= '' . t('Last 24 Hours') . '
';
- $out .= theme('table', $headers, $output['day'], array(), $caption['day']);
-
- $out .= '' . t('Last 30 Days') . '
';
- $out .= theme('table', $headers, $output['month'], array(), $caption['month']);
-
- return $out;
-}
-
-function _np_scan_stats_word_top_sort($collection, $sort, $query = array()) {
- $output = array();
- $keys = array('location' => 'location_id', 'scan' => 'scan_id');
- $key = isset($keys[$collection]) ? $keys[$collection] : 'word';
- $sort_keys = array_keys($sort);
- $sort_key = $sort_keys[0];
-
- if (!($cursor = scan_api_get_mongo('scan_stats', $collection))) { // scan, keyword, hashtag, (retweet -- disabled), location
- _np_scan_stats_mongo_error();
- $caption = theme('mongo_query', $collection, array(), array(), array(), LIMIT);
- }
- else {
- $fields = array('trending' => 1, 'created' => 1, 'updated' => 1, 'minutes.velocity' => 1, 'minutes.prev_velocity' => 1, 'hours.velocity' => 1, 'hours.prev_velocity' => 1, 'days.velocity' => 1, 'days.prev_velocity' => 1, $key => 1);
- $hint = ($collection == 'scan') ? array() : array('scan_id' => 1);
- $hint = array_merge($hint, $sort);
- $limit = isset($_GET['limit']) ? $_GET['limit'] : LIMIT;
- $caption = theme('mongo_query', $collection, $query, $fields, $sort, $limit);
- try {
- $results = $cursor
- ->find($query, $fields)
- ->sort($sort)
- ->limit($limit)
- ->hint($hint)
- ->timeout(scan_api_get_mongo_timeout())
- ;
- foreach ($results as $row) {
- if ($collection == 'location') {
- $name = db_result(db_query("SELECT name FROM {geonames} WHERE geonameid = %d", $row[$key]));
- $path = 'admin/reports/scan_stats/location/' . $row[$key];
- $key_path = l($row[$key], $path);
- $key_value = empty($name) ? $key_path : l($name, $path) . ' (' . $key_path . ')';
- }
- else if ($collection == 'scan') {
- $title = db_result(db_query("SELECT title FROM {scan} INNER JOIN {node_revisions} using(vid) WHERE scan_id = %d", $row[$key]));
- $key_value = l($title, 'admin/reports/scan_stats/scan/' . $row[$key]);
- }
- else {
- $key_value = theme('twitter_word', $collection, $row[$key]);
- }
- $output_row = array(
- theme('mongo_short_datetime', $row['created']),
- theme('mongo_datetime', $row['updated']),
- $key_value,
- isset($row['trending']) ? round($row['trending'], STATS_TRENDING_PRECISION) : '-',
- isset($row['minutes']['velocity']) ? round($row['minutes']['velocity'], STATS_VELOCITY_PRECISION) : '-',
- isset($row['minutes']['prev_velocity']) ? round($row['minutes']['prev_velocity'], STATS_VELOCITY_PRECISION) : '-',
- isset($row['hours']['velocity']) ? round($row['hours']['velocity'], STATS_VELOCITY_PRECISION) : '-',
- isset($row['hours']['prev_velocity']) ? round($row['hours']['prev_velocity'], STATS_VELOCITY_PRECISION) : '-',
- isset($row['days']['velocity']) ? round($row['days']['velocity'], STATS_VELOCITY_PRECISION) : '-',
- isset($row['days']['prev_velocity']) ? round($row['days']['prev_velocity'], STATS_VELOCITY_PRECISION) : '-',
- );
- if ($key == 'word') {
- $whitelist = db_result(db_query("SELECT IFNULL(type, -1) as whitelist FROM {keyword_whitelist} WHERE word = '%s'", $row['word']));
- $output_row[] = $whitelist == -1 ? "NER" : (($whitelist == 5) ? "whitelist-user" : "whitelist");
- }
- if (floatval($row[$sort_key]) > 0) {
- $output[] = $output_row;
- }
- }
- }
- catch (MongoCursorTimeoutException $e) {
- }
- }
-
- $headers = array(
- array('data' => t('Created'), 'class' => 'created'),
- array('data' => t('Updated'), 'class' => 'updated'),
- array('data' => $collection == 'location' ? t('Location') : t('Word'), 'class' => $collection == 'location' ? 'location' : 'word'),
- array('data' => t('Trending'), 'class' => 'trending'),
- array('data' => t('Velocity hr'), 'class' => 'velocity'),
- array('data' => t('Prev hr'), 'class' => 'prev_velocity'),
- array('data' => t('Velocity day'), 'class' => 'velocity'),
- array('data' => t('Prev day'), 'class' => 'prev_velocity'),
- array('data' => t('Velocity mon'), 'class' => 'velocity'),
- array('data' => t('Prev mon'), 'class' => 'prev_velocity'),
- );
- if ($key == 'word') {
- $headers[] = array('data' => t('Source'), 'class' => 'source');
- }
-
- return theme('table', $headers, $output, array(), $caption);
-}
-
-function _np_scan_stats_word_details($collection, $word, $scan_id) {
- if ($cursor = scan_api_get_mongo('scan_stats', $collection)) { // keyword, hashtag, retweet
- $query = array('scan_id' => empty($scan_id) ? 0 : intval($scan_id), 'word' => $word);
- try {
- $results = $cursor
- ->find($query)
- ->timeout(scan_api_get_mongo_timeout())
- ;
- return theme('mongo_word_details', $collection, _np_scan_stats_get_record($collection, $results));
- }
- catch (MongoCursorTimeoutException $e) {
- _np_scan_stats_mongo_timeout_error();
- }
- }
- else {
- _np_scan_stats_mongo_error();
- }
-}
-
-function _np_scan_stats_location_details($location_id, $scan_id) {
- if ($cursor = scan_api_get_mongo('scan_stats', 'location')) {
- $query = array('scan_id' => empty($scan_id) ? 0 : intval($scan_id), 'location_id' => intval($location_id));
- try {
- $results = $cursor
- ->find($query)
- ->timeout(scan_api_get_mongo_timeout())
- ;
- return theme('mongo_location_details', _np_scan_stats_get_record('location', $results));
- }
- catch (MongoCursorTimeoutException $e) {
- _np_scan_stats_mongo_timeout_error();
- }
- }
- else {
- _np_scan_stats_mongo_error();
- }
-}
-
-function _np_scan_stats_url_details($url_id, $scan_id) {
- if ($cursor = scan_api_get_mongo('scan_stats', 'url')) {
- $query = array('scan_id' => empty($scan_id) ? 0 : intval($scan_id), 'url_id' => intval($url_id));
- $hint = array('scan_id' => 1, 'url_id' => 1, 'hours.velocity' => -1);
- try {
- $results = $cursor
- ->find($query)
- ->hint($hint)
- ->timeout(scan_api_get_mongo_timeout())
- ;
- return theme('mongo_url_details', _np_scan_stats_get_record('url', $results));
- }
- catch (MongoCursorTimeoutException $e) {
- _np_scan_stats_mongo_timeout_error();
- }
- }
- else {
- _np_scan_stats_mongo_error();
- }
-}
-
-function _np_scan_stats_get_record($collection, $cursor) {
- $record = $cursor->getNext();
- if (!$record) {
- return array('error' => t('can not read collection'));
- }
- $updated = $record['updated']->sec;
- foreach (array('minutes', 'hours', 'days') as $type) {
- if (isset($record[$type])) {
- $index = scan_api_bucket_index($collection, $type, $updated);
- if (isset($record[$type][$index])) {
- $record[$type][$index] .= ' (*' . t('updated') . ')';
- }
- else {
-// drupal_set_message(t('current %type index of %index missing', array('%type' => $type, '%index' => $index)));
- }
- // Validate that the velocities.
- $size = scan_api_interval_count($collection, $type);
- $velocity = 0;
- foreach ($record[$type] as $key => $value) {
- if (is_numeric($key)) {
- $velocity += $value;
- }
- }
- $field = 'velocity';
- if ($collection == 'scan' && $type == 'hours') {
- $field .= $size;
- }
- if (intval($record[$type][$field] * $size + 0.0001 /*account for rounding error*/) != $velocity) {
- drupal_set_message(t('%type %field is %old, but should be %new', array('%type' => $type, '%field' => $field, '%old' => round($record[$type]['velocity'], 4), '%new' => round($velocity / $size, 4))), 'error');
- }
- }
- }
- foreach (array('updated', 'created') as $field) {
- if (isset($record[$field])) {
- $dt = $record[$field];
- $record[$field] = (array) $dt;
- $record[$field]['sec'] .= ' (*' . theme('mongo_datetime', $dt) . ')';
- }
- }
- if (isset($record['scan'])) {
- foreach (array('client_id', 'active', 'status') as $key) {
- if (!isset($record['scan'][$key])) {
- drupal_set_message(t('scan.%key missing', array('%key' => $key)), 'error');
- }
- }
- }
- return $record;
-}
-
-function np_scan_stats_proto_options_form($form_state) {
- $form = array();
- $form['limit'] = array(
- '#type' => 'select',
- '#title' => t('Limit'),
- '#default_value' => isset($_GET['limit']) ? $_GET['limit'] : LIMIT,
- '#options' => drupal_map_assoc(array(5, 10, 15, 20, 30, 40, 50, 60, 100, 200, 300, 400, 500)),
- );
- $form['fields'] = array(
- '#type' => 'checkbox',
- '#title' => t('Show Fields in JSON'),
- '#default_value' => isset($_GET['fields']) ? $_GET['fields'] : 0,
- );
- $form['submit'] = array(
- '#type' => 'submit',
- '#value' => t('Change'),
- );
- return $form;
-}
-
-function np_scan_stats_proto_options_form_submit($form, &$form_state) {
- $options = array();
- if ($form_state['values']['limit'] != LIMIT) {
- $options['limit'] = $form_state['values']['limit'];
- }
- if ($form_state['values']['fields'] == 1) {
- $options['fields'] = $form_state['values']['fields'];
- }
- if (count($options)) {
- drupal_goto($_GET['q'], $options);
- }
-}
-
-function theme_mongo_query($collection, $query, $fields, $sort, $limit) {
- $out = 'mongo> db.' . $collection;
- $out .= '.find(' . theme('mongo_json', $query);
- if (count($fields) > 0 && isset($_GET['fields'])) {
- $out .= ',' . theme('mongo_json', $fields);
- }
- $out .= ')';
- if (count($sort) > 0) {
- $out .= '.sort(' . theme('mongo_json', $sort) . ')';
- }
- if ($limit) {
- $out .= '.limit(' . $limit . ')';
- }
- $out .= ".timeout(" . scan_api_get_mongo_timeout() . ")";
- return $out;
-}
-
-function theme_mongo_json($json) {
- return str_replace(array(',', ':'), array(', ', ': '), json_encode($json));
-}
-
-function theme_mongo_short_datetime($dt) {
- return date(SHORT_FORMAT, $dt->sec);
-}
-
-function theme_mongo_datetime($dt) {
- if (empty($dt)) {
- return '-';
- }
- static $now;
- if (!isset($now)) {
- $now = time();
- }
- if ($now <= $dt->sec + 60) {
- return t('last minute');
- }
- $ago = $now - $dt->sec;
- if ($ago <= 3600) {
- return t('%mins minutes', array('%mins' => round($ago / 60, $ago <= 300 ? 1 : 0)));
- }
- return date(MEDIUM_FORMAT, $dt->sec);
-}
-
-// @TODO: Here's a quick implementation for writing detail records,
-// but would be nice to see some real theming.
-
-function theme_mongo_scan_details($record) {
- return '' . _mongo_record_to_string($record) . '
';
-}
-
-function theme_mongo_word_details($collection, $record) {
- $output = '';
- if (isset($record['word'])) {
- $output .= theme('twitter_word', $collection, $record['word']);
- }
- $output .= '' . _mongo_record_to_string($record) . '
';
- return $output;
-}
-
-function theme_mongo_location_details($record) {
- return '' . _mongo_record_to_string($record) . '
';
-}
-
-function theme_mongo_url_details($record) {
- return '' . _mongo_record_to_string($record) . '
';
-}
-
-function _np_scan_stats_mongo_timeout_error() {
- static $once;
- if (!isset($once)) {
- drupal_set_message(t('Mongo timed out, use "mongo> db.currentOp()" to find the long running query'), 'error');
- $once = TRUE;
- }
-}
-
-function _np_scan_stats_mongo_error() {
- static $once;
- if (!isset($once)) {
- drupal_set_message(t('Mongo server is probably down.'), 'error');
- $once = TRUE;
- }
-}
-
-function theme_twitter_word($collection, $word) {
- $output = '';
- if ($collection == 'keyword' || $collection == 'hashtag') {
- $output .= l('T', 'http://twitter.com/#search?q="' . $word . '"') . ' ';
- }
- $pre_word = $collection == 'hashtag' ? '#' : '';
- $output .= l($pre_word . $word, 'admin/reports/scan_stats/' . $collection . '/' . $word);
- return $output;
-}
-
-function _mongo_record_to_string($record) {
- return print_r($record, 1);
-}
Index: www/sites/all/modules/custom/np_potpourri/np_potpourri.pages.inc
===================================================================
--- www/sites/all/modules/custom/np_potpourri/np_potpourri.pages.inc (revision 34222)
+++ www/sites/all/modules/custom/np_potpourri/np_potpourri.pages.inc (working copy)
@@ -247,8 +247,17 @@
$placeholders = array_fill(0, count($add), "'%s'");
// $add comes from a form submission. data size assumed to always be sane ( < 1000) so we are not splitting this into chunked operation.
- scan_api_get_mongo('statistics', 'keyword')->remove(array('word' => array('$in' => $add)));
- scan_api_get_mongo('statistics', 'hashtag')->remove(array('word' => array('$in' => $add)));
+ $query = array('word' => array('$in' => $add));
+ try {
+ if ($cursor = scan_api_get_mongo('keyword')) { // V2r15 / keyword / word[] / none (remove query)
+ $cursor->remove($query);
+ }
+ if ($cursor = scan_api_get_mongo('hashtag')) { // V2r15 / hashtag / word[] / none (remove query)
+ $cursor->remove($query);
+ }
+ }
+ catch (MongoCursorTimeoutException $e) {
+ }
}
if (!empty($remove)) {
// remove from the blacklist
Index: www/sites/all/modules/custom/np_scan_import/np_scan_import.module
===================================================================
--- www/sites/all/modules/custom/np_scan_import/np_scan_import.module (revision 34222)
+++ www/sites/all/modules/custom/np_scan_import/np_scan_import.module (working copy)
@@ -99,14 +99,18 @@
scan_api_set_active_shard();
if (isset($nodes)) {
// fill up scan stat
- $result = scan_api_get_mongo('statistics', 'scan')
- ->find(
- array('scan_id' => array('$in' => $scan_ids)),
- array('scan_id'=> 1, 'minutes.velocity' => 1)
- );
- while($result->hasNext()) {
- $row = $result->getNext();
- $nodes[$row['scan_id']]->velocity = $row['minutes']['velocity'];
+ if ($cursor = scan_api_get_mongo('scan')) { // V2r15 / scan / scan_id[] / none
+ try {
+ $query = array('scan_id' => array('$in' => $scan_ids));
+ $fields = array('scan_id' => 1, 'velocity.minutes' => 1);
+ $result = $cursor->find($fields, $result)
+ ->timeout(scan_api_get_mongo_timeout());
+ foreach ($result as $row) {
+ $nodes[$row['scan_id']]->velocity = $row['velocity']['minutes'];
+ }
+ }
+ catch (MongoCursorTimeoutException $e) {
+ }
}
$notify = FALSE;
Index: view/sites/all/modules/scan_api/scan_api.module
===================================================================
--- view/sites/all/modules/scan_api/scan_api.module (revision 34222)
+++ view/sites/all/modules/scan_api/scan_api.module (working copy)
@@ -753,7 +753,7 @@
function _scan_top_keywords(&$velocity, &$trending, $type, $scan_ids, $order, $count, $related, $related_keywords, $related_hashtags) {
$interval = ($type == 1) ? 'minutes' : 'hours';
$multiplier = ($type == 1) ? 12 : 1;
- $velocity_field = $interval . '.velocity';
+ $velocity_field = 'velocity.' . $interval;
foreach (array('keyword', 'hashtag') as $collection_name) {
if ($related) {
$words = ($collection_name == 'keyword') ? $related_keywords : $related_hashtags;
@@ -764,7 +764,7 @@
if ($scan_ids && (!$related || ($related && isset($query['word'])))) {
$order_field = ($order == 'velocity') ? $velocity_field : 'trending';
- $cursor = scan_api_get_mongo('scan_stats', $collection_name); // keyword, hashtag
+ $cursor = scan_api_get_mongo($collection_name); // V2r15 / keyword, hashtag / scan_id[], word[] / velocity.minutes, velocity.hours, trending
if (!$cursor) {
return;
}
@@ -779,6 +779,7 @@
->timeout(scan_api_get_mongo_timeout());
foreach ($results as $row) {
if (!isset($row['word'])) {
+ assert('Worker bug: keyword / hashtag is speechless!');
// bug in workers. they write empty word records... bad
continue;
}
@@ -786,7 +787,7 @@
$row['word'] = '#' . $row['word'];
}
if ($order == 'velocity') {
- $order_data = isset($row[$interval]['velocity']) ? $row[$interval]['velocity'] : 0;
+ $order_data = isset($row['velocity'][$interval]) ? $row['velocity'][$interval] : 0;
}
else {
$order_data = $row['trending'];
@@ -794,7 +795,7 @@
if (!isset($ordering_data[$row['word']]) || ($ordering_data[$row['word']] < $order_data)) {
$ordering_data[$row['word']] = $order_data;
$data[$row['word']] = array(
- 'velocity' => isset($row[$interval]['velocity']) ? $row[$interval]['velocity'] * $multiplier: 0,
+ 'velocity' => isset($row['velocity'][$interval]) ? $row['velocity'][$interval] * $multiplier: 0,
'trending' => $row['trending'],
'word' => $row['word'],
);
@@ -837,19 +838,146 @@
* @param $type
* int, 1 - fresh stats, 2 - stats based on 6 hr timeslice
*/
-function _scan_keyword_velocity($scan_id, $keywords, $is_hashtag, $multirow, $type) {
+function _scan_keyword_velocity($scan_id, $words) {
+ if (!is_array($words)) {
+ $words = array($words);
+ }
+ $words = array_map('strtolower', $words);
+
+ $criteria['hashtag'] = array();
+ $criteria['keyword'] = array();
+
+ foreach ($words as $k) {
+ if (substr($k, 0, 1) == '#') {
+ $criteria['hashtag'][] = substr($k, 1);
+ }
+ else {
+ $criteria['keyword'][] = $k;
+ }
+
+ $args = array(
+ 'collection' => '', // placeholder
+ 'key' => 'word',
+ 'query' => array(
+ 'scan_id' => $scan_id,
+ 'word' => array(), // placeholder
+ ),
+ 'fields' => array(
+ 'word' => 1,
+ 'velocity.minutes' => 1,
+ 'velocity.hours' => 1,
+ 'trending' => 1,
+ ),
+ 'empty' => array(
+ 'word' => '',
+ 'velocity' => 0,
+ 'velocity.hours' => 0,
+ 'trending' => 0,
+ ),
+ 'emptykeys' => array(), // placeholder
+ 'flatten' => TRUE,
+ 'zeromap' => array(
+ 'velocity.hours' => 'velocity.minutes',
+ ),
+ 'remap' => array(
+ 'velocity.minutes' => 'velocity',
+ ),
+ );
+
+ $results = array('hashtags' => array(), 'keywords' => array());
+ foreach (array('keyword, hashtag') as $collection) {
+ $query['collection'] = $collection;
+ $args['query']['word'] = (count($criteria[$collection]) > 1) ? array('$in' => $criteria[$collection]) : $criteria[$collection][0];
+ $args['emptykeys'] = $criteria[$collection];
+ $results[$collection] = scan_api_mongo_doquery($args); // V2r15 / keyword, hashtag / scan_id, word[] / none
+ }
+ foreach ($results['hashtag'] as $k => $v) {
+ $results['hashtag'][$k]['word'] = '#' . $results['hashtag'][$k]['word'];
+ }
+ foreach ($results['hashtag'] as $k => $v) {
+ $results['hashtag']['#' . $k] =& $results['hashtag'][$k];
+ unset($results['hashtag'][$k]);
+ }
+
+
+ if (!empty($keywords)) {
+ $query['collection'] = 'keyword';
+ $args['query']['word'] = (count($keywords) > 1) ? array('$in' => $keywords) : $keywords[0];
+ $args['emptykeys'] = $keywords;
+ $data = scan_api_mongo_doquery($args); // V2r15 / keyword / scan_id, word[] / none
+ $results['keywords'] = $data;
+ }
+ if (!empty($hashtags)) {
+ $query['collection'] = 'hashtag';
+ $args['query']['word'] = (count($hashtags) > 1) ? array('$in' => $hashtags) : $hashtags[0];
+ $data = scan_api_mongo_doquery($args); // V2r15 / hashtag / scan_id, word[] / none
+ foreach ($data as $key => $value) {
+ $results['keywords'][$key] = $value
+ $results['hashtags']['#' . $key] = $value;
+ }
+ }
+
+ $collection_name = $hashtag ? 'hashtag' : 'keyword';
+
+
+ $data = scan_api_mongo_doquery(array( // V2r15 / keyword, hashtag / scan_id[2], word / none
+ 'collection' => $collection_name,
+ 'key' => 'scan_id',
+ 'query' => $query,
+ 'fields' => $fields,
+ 'empty' => $return,
+ 'emptykeys' => $scan_id ? $query['scan_id']['$in'] : array(0),
+ 'flatten' => TRUE,
+ ));
+
+ $return = $data[0];
+ // Fall back to hours if minutes is 0 (i.e. twitter might be down?). @@@ V2 Should we just go with minutes?
+ $return['velocity'] = $return['velocity.minutes'] ? $return['velocity.minutes'] : $return['velocity.hours'];
+
+ if ($scan_id) {
+ $scan = $data[$scan_id];
+ $scan['velocity'] = $scan['velocity.minutes'] ? $scan['velocity.minutes'] : $scan['velocity.hours'];
+ if ($scan['velocity'] > $return['velocity']) {
+ // Use per scan stats if it is better than global stats.
+ // Scans can be higher than global because searchapi mentions do not end up in global stats.
+ $return = $scan;
+ }
+ }
+
+ unset($return['velocity.minutes']);
+ unset($return['velocity.hours']);
+
+
+
+
+
+
+
+
+
+
+
+
+
+
$result = array();
$interval = ($type == 1) ? 'minutes' : 'hours';
$multiplier = ($type == 1) ? 12 : 1;
- $velocity_field = $interval . '.velocity';
+ $velocity_field = 'velocity.' . $interval;
$collection_name = $is_hashtag ? 'hashtag' : 'keyword';
- $fields = array($velocity_field => 1, 'trending' => 1, 'word' => 1);
+ $fields = array(
+ 'velocity.minutes'
+
+
+ $velocity_field => 1, 'word' => 1);
$query = array(
'scan_id' => intval($scan_id),
'word' => array('$in' => array_map('strtolower', $keywords)),
);
- if ($cursor = scan_api_get_mongo('scan_stats', $collection_name)) { // keyword, hashtag
+
+
+ if ($cursor = scan_api_get_mongo($collection_name)) { // V2r15 / keyword, hashtag / scan_id, word[] / velocity.minutes, velocity.hours
try {
$cursor = $cursor
->find($query, $fields)
@@ -860,13 +988,13 @@
if ($is_hashtag) {
$row['word'] = '#' . $row['word'];
}
- $result[$row['word']] = isset($row[$interval]['velocity']) ? $row[$interval]['velocity'] * $multiplier : 0;
+ $result[$row['word']] = isset($row['velocity'][$interval]) ? $row['velocity'][$interval] * $multiplier : 0;
}
}
elseif ($cursor->hasNext()) {
$result = $cursor->getNext();
- if (isset($result[$interval]['velocity'])) {
- $result['velocity'] = $result[$interval]['velocity'] * $multiplier;
+ if (isset($result['velocity'][$interval])) {
+ $result['velocity'] = $result['velocity'][$interval] * $multiplier;
}
}
}
@@ -887,32 +1015,50 @@
$hashtag = TRUE;
$keyword = substr($keyword, 1);
}
- // get global stat
- $return = _scan_keyword_velocity(0, array($keyword), $hashtag, FALSE, 1);
- // if twitter have gone sleeping fall back to 6 hr timeslice
- if (!$return || !$return['velocity']) {
- $return = _scan_keyword_velocity(0, array($keyword), $hashtag, FALSE, 2);
- }
- // get per scan stat if we have scan_id (searchapi mentions do not get into global stat
- // making global stat too low for what we actually show...
- $per_scan = array();
+ $collection_name = $hashtag ? 'hashtag' : 'keyword';
+ $fields = array(
+ 'scan_id' => 1,
+ 'velocity.minutes' => 1,
+ 'velocity.hours' => 1,
+ 'trending' => 1,
+ );
+ $query = array(
+ 'scan_id' => $scan_id ? array('$in' => array(0, $scan_id)) : 0,
+ 'word' => strtolower($keyword),
+ );
+ $empty = array(
+ 'velocity.minutes' => 0,
+ 'velocity.hours' => 0,
+ 'trending' => 0,
+ );
+
+ $data = scan_api_mongo_doquery(array( // V2r15 / keyword, hashtag / scan_id[2], word / none
+ 'collection' => $collection_name,
+ 'key' => 'scan_id',
+ 'query' => $query,
+ 'fields' => $fields,
+ 'empty' => $return,
+ 'emptykeys' => $scan_id ? $query['scan_id']['$in'] : array(0),
+ 'flatten' => TRUE,
+ ));
+
+ $return = $data[0];
+ // Fall back to hours if minutes is 0 (i.e. twitter might be down?). @@@ V2 Should we just go with minutes?
+ $return['velocity'] = $return['velocity.minutes'] ? $return['velocity.minutes'] : $return['velocity.hours'];
+
if ($scan_id) {
- $per_scan = _scan_keyword_velocity($scan_id, array($keyword), $hashtag, FALSE, 1);
- if (!$per_scan || !$per_scan['velocity']) {
- $per_scan = _scan_keyword_velocity($scan_id, array($keyword), $hashtag, FALSE, 2);
+ $scan = $data[$scan_id];
+ $scan['velocity'] = $scan['velocity.minutes'] ? $scan['velocity.minutes'] : $scan['velocity.hours'];
+ if ($scan['velocity'] > $return['velocity']) {
+ // Use per scan stats if it is better than global stats.
+ // Scans can be higher than global because searchapi mentions do not end up in global stats.
+ $return = $scan;
}
- if (!$return && $per_scan) {
- $return = $per_scan;
- }
}
- // if we have per scan stat as well then return the one that's higher
- if ($per_scan && $per_scan['velocity'] > $return['velocity']) {
- $return = $per_scan;
- }
- // if not found in DB
- if (!$return) {
- $return = array('velocity' => 0, 'trending' => 0);
- }
+
+ unset($return['velocity.minutes']);
+ unset($return['velocity.hours']);
+
print _scan_api_format($return, $format);
}
@@ -983,22 +1129,20 @@
'velocity' => 0,
'difference' => 0,
);
- $fields = array('minutes.velocity' => 1, 'minutes.prev_velocity' => 1);
+ $fields = array('velocity.minutes_scan' => 1, 'increasing' => 1);
$query = array(
'scan_id' => intval($scan_id),
);
- if ($cursor = scan_api_get_mongo('scan_stats', 'scan')) {
+ if ($cursor = scan_api_get_mongo('scan')) { // V2r15 / scan / scan_id / none (single)
try {
$cursor = $cursor
->find($query, $fields)
->timeout(scan_api_get_mongo_timeout());
if ($cursor->hasNext()) {
$row = $cursor->getNext();
- $velocity = isset($row['minutes']['velocity']) ? round(12 * $row['minutes']['velocity']) : 0;
- $prev_velocity = isset($row['minutes']['prev_velocity']) ? round(12 * $row['minutes']['prev_velocity']) : 0;
$return = array(
- 'velocity' => $velocity,
- 'difference' => $velocity - $prev_velocity,
+ 'velocity' => isset($row['velocity']['minutes_scan']) ? round(12 * $row['velocity']['minutes_scan']) : 0,
+ 'difference' => $row['increasing'] ? 1 : -1,
);
}
}
@@ -1011,7 +1155,7 @@
/**
* Determine a scan's number of uniq links
*/
-function scan_scan_uniq_links() {
+function scan_scan_uniq_links() { // @@@ V2 performance?
// $scan_id, $format
extract(_scan_get_args());
$return = array(
@@ -1025,7 +1169,8 @@
SCAN_CATEGORY_PHOTOS => 'photo',
SCAN_CATEGORY_VIDEOS => 'video',
);
- if ($cursor = scan_api_get_mongo('urls', 'url')) {
+ if ($cursor = scan_api_get_mongo('url')) { // V2r15 / url / scan_id, category / none (count)
+ // @@@V2 performance Switch to single query grouped by category?
foreach ($categories as $category => $return_key) {
$query = array(
'scan_id' => intval($scan_id),
@@ -1104,7 +1249,7 @@
'category' => $category,
);
$range = $blocked_url_ids ? sizeof($blocked_url_ids) : 0;
- if ($cursor = scan_api_get_mongo('urls', 'url')) {
+ if ($cursor = scan_api_get_mongo('url')) { // V2r15 / url / scan_id[], category / count:-1 (query is paged!)
try {
$cursor = $cursor
->find($query, $fields)
@@ -1140,7 +1285,7 @@
'scan_id' => array('$in' => $scan_ids),
'category' => $category,
);
- if ($cursor = scan_api_get_mongo('urls', 'url')) {
+ if ($cursor = scan_api_get_mongo('url')) { // V2r15 / url / scan_id[], category, (url_id[]) / none (count)
try {
$count = $cursor
->find($query)
@@ -1447,6 +1592,69 @@
}
/**
+ * Get bucket data for rendering a sparkline, etc.
+ */
+function scan_get_bucket_data($collection, $time_type, $key = 'scan_id', $values, $query = array(), $when = FALSE) {
+ if (!$when) {
+ $when = time();
+ }
+ $interval_size_ = scan_api_interval_size($collection, $time_type);
+ $interval_count_ = scan_api_interval_count($collection, $time_type);
+ $bucket_cycle_time = $interval_size_ * $interval_count_;
+ $index = scan_api_bucket_index($collection, $time_type, $when);
+
+ if (is_array($values)) {
+ $query[$key] = array('$in' => $values);
+ }
+ else {
+ $query[$key] = $values;
+ $values = array($values);
+ }
+
+ // Initialize the output array.
+ $data = array();
+ foreach ($values as $v) {
+ $data[$v] = array(
+ 'size' => $interval_count_,
+ 'offset' => $index,
+// 'cutoff' => 0, // @@@ Point where we run out of data
+// 'range' => 0, // @@@ Hours in range
+ 'data' => array_fill(0, $interval_count_, 0),
+ );
+ }
+
+ $fields = array(
+ $key => 1,
+ 'created' => 1,
+ 'updated' => 1,
+ $time_type => 1,
+ );
+ if ($cursor = scan_api_get_mongo($collection)) { // V2r15 / * / scan_id[] / none (sparkline data analyzer)
+ try {
+ $cursor = $cursor->find($query, $fields)
+ ->timeout(scan_api_get_mongo_timeout());
+ foreach ($cursor as $row) {
+ // If this scan hasn't been touched for more than a cycle, continue.
+ // We already initialized everything to 0 above.
+ if ($when > ($bucket_cycle_time + $row['updated'])) {
+ continue;
+ }
+
+ for ($i = 0; $i < $interval_count_; $i++) {
+ // OK, so we need to walk forwards on one array while walking backwards and wrapping around on another.
+ // Adding $interval_count_ to $index on the right is done because PHP's modulus handles negatives in the
+ // equally-correct-but-not-as-useful-as-the-other-way-around fashion.
+ $data[$row['scan_id']]['data'][$i] = $row[$time_type][($interval_count_ + $index - $i) % $interval_count_];
+ }
+ }
+ }
+ catch (MongoCursorTimeoutException $e) {
+ }
+ }
+ return $data;
+}
+
+/**
* Produce list of buckets in order from newest to oldest
* scan buckets do not maintain that order, bellow current undext we have new items, above we have old
* which makes it not easy to manage. here's API for that.
@@ -1490,42 +1698,22 @@
return $bucket;
}
function scan_stats_velocity() {
+ $when = time();
// $scan_id,, $count, $max_age, $format
extract(_scan_get_args());
+ $scan_id = intval($scan_id);
- $epoch = gmdate('c', 0);
- $return = array_fill(0, $count, array('count' => 0, 'start_time' => $epoch, 'last_occurence' => $epoch + 3600));
-
- if ($cursor = scan_api_get_mongo('statistics', 'scan')) {
- $fields = array('hours' => 1, 'updated' => 1);
- $query = array(
- 'scan_id' => intval($scan_id),
+ $data = scan_get_bucket_data('scan', 'hours', 'scan_id', $scan_id, array(), $when);
+ $return = array();
+ foreach ($data[$scan_id]['data'] as $k => $v) {
+ $return[$k] = array(
+ 'count' => $v,
+ //@@@V2 This is horribly inefficient, data transfer wise. Would be much better to
+ // pass a single time and have the JS code do offsets from that itself.
+ 'start_time' => gmdate('c', ($when - $when % 3600) - ($k * 3600)),
);
- try {
- $cursor = $cursor
- ->find($query)
- ->timeout(scan_api_get_mongo_timeout());
+ }
- if ($cursor->hasNext()) {
- $row = $cursor->getNext();
- $updated = explode(' ', (string)$row['updated']);
- $bucket = scan_api_reorder_scan_time_buckets($row['hours'], 'scan', 'hours', $updated[1]);
-
- $now = mktime(date('H'), 0, 0 );
- foreach($bucket as $index => $value ) {
- if ( $count == $index ) {
- break;
- }
- $return[$index] = array(
- 'count' => $value,
- 'start_time' => gmdate('c', $now - ($index * 60 * 60)),
- );
- }
- }
- }
- catch (MongoCursorTimeoutException $e) {
- }
- }
print _scan_api_format($return, $format);
}
@@ -1565,17 +1753,17 @@
// fill up the stats
if ($return) {
$scan_ids = array_keys($return);
- $fields = array('scan_id' => 1, 'minutes.velocity' => 1, 'hours.velocity');
+ $fields = array('scan_id' => 1, 'velocity.minutes_scan' => 1);
$query = array(
'scan_id' => array('$in' => $scan_ids),
);
- if ($cursor = scan_api_get_mongo('scan_stats', 'scan')) {
+ if ($cursor = scan_api_get_mongo('scan')) { // V2r15 / scan / scan_id[] / none
try {
$cursor = $cursor
->find($query, $fields)
->timeout(scan_api_get_mongo_timeout());
foreach ($cursor as $row) {
- $return[$row['scan_id']]['velocity'] = isset($row['minutes']['velocity']) ? round(12 * $row['minutes']['velocity']) : 0;
+ $return[$row['scan_id']]['velocity'] = isset($row['velocity']['minutes_scan']) ? round(12 * $row['velocity']['minutes_scan']) : 0;
}
}
catch (MongoCursorTimeoutException $e) {
@@ -1586,25 +1774,33 @@
}
else {
// get a basic data structure in ordered form
- $fields = array('scan_id' => 1, 'minutes.velocity' => 1);
+ $fields = array(
+ 'scan_id' => 1,
+ 'velocity.minutes_scan' => 1,
+ 'velocity.hours_general' => 1, // @@@V2 was days.general.velocity
+ 'velocity.hours_photo' => 1, //@@@V2 was days.photo.velocity
+ 'velocity.hours_video' => 1, //@@@V2 was days.video.velocity
+ 'velocity.hours_urls' => 1, //@@@V2 was days.velocity
+ );
$query = array(
- 'scan.client_id' => intval($client_id), 'scan.status' => 1, 'scan.active' => 1,
+ 'scan.client_id' => intval($client_id), 'scan.status' => 1
);
- if ($cursor = scan_api_get_mongo('scan_stats', 'scan')) {
+ if ($cursor = scan_api_get_mongo('scan')) { // V2r15 / scan / scan.client_id, scan.status / velocity.minutes_scan:-1
try {
$cursor = $cursor
->find($query, $fields)
- ->sort(array('minutes.velocity' => -1))
+ ->sort(array('velocity.minutes_scan' => -1))
->limit($count)
->timeout(scan_api_get_mongo_timeout());
foreach ($cursor as $row) {
$return[$row['scan_id']] = array(
'scan_id' => $row['scan_id'],
- 'velocity' => isset($row['minutes']['velocity']) ? round(12 * $row['minutes']['velocity']) : 0,
- 'general' => 0,
- 'photo' => 0,
- 'video' => 0,
- 'summary' => 0,
+ // @@@ x12 multiplier is less accurate than summing the minutes.scan array.
+ 'velocity' => isset($row['velocity']['minutes_scan']) ? round(12 * $row['velocity']['minutes_scan']) : 0,
+ 'general' => isset($row['velocity']['hours_general']) ? round($row['velocity']['hours_general']) : 0,
+ 'photo' => isset($row['velocity']['hours_photo']) ? round($row['velocity']['hours_photo']) : 0,
+ 'video' => isset($row['velocity']['hours_video']) ? round($row['velocity']['hours_video']) : 0,
+ 'summary' => isset($row['velocity']['hours_urls']) ? round($row['velocity']['hours_urls']) : 0,
);
}
}
@@ -1630,34 +1826,9 @@
}
}
}
- // common query for both cases: if we have data we need to fill up url uniq stats
if ($return) {
- $fields = array(
- 'scan_id' => 1,
- 'days.velocity' => 1,
- 'days.general.velocity' => 1,
- 'days.photo.velocity' => 1,
- 'days.video.velocity' => 1,
- );
- $query = array(
- 'scan_id' => array('$in' => $scan_ids),
- );
- if ($cursor = scan_api_get_mongo('urls', 'scanurl')) {
- try {
- $cursor = $cursor
- ->find($query, $fields)
- ->timeout(scan_api_get_mongo_timeout());
- foreach ($cursor as $row) {
- $return[$row['scan_id']]['summary'] = round($row['days']['velocity'] * 30);
- $return[$row['scan_id']]['general'] = round($row['days']['general']['velocity'] * 30);
- $return[$row['scan_id']]['photo'] = round($row['days']['photo']['velocity'] * 30);
- $return[$row['scan_id']]['video'] = round($row['days']['video']['velocity'] * 30);
- }
- $return = array_filter($return, '_scan_api_filter_nid');
- }
- catch (MongoCursorTimeoutException $e) {
- }
- }
+ // Filter out scans not backed by nodes.
+ $return = array_filter($return, '_scan_api_filter_nid');
}
scan_api_set_active_shard();
print _scan_api_format(array_values($return), $format);
@@ -1752,33 +1923,15 @@
if ($matches) {
$scan_ids = array_keys($matches);
// fill up the scan stats part
- if ($cursor = scan_api_get_mongo('scan_stats', 'scan')) {
- $fields = array('scan_id' => 1, 'minutes.velocity' => 1, 'minutes.prev_velocity' => 1);
- $query = array(
- 'scan_id' => array('$in' => $scan_ids),
- );
- try {
- $cursor = $cursor
- ->find($query, $fields)
- ->timeout(scan_api_get_mongo_timeout());
- foreach ($cursor as $row) {
- $velocity = isset($row['minutes']['velocity']) ? round(12 * $row['minutes']['velocity']) : 0;
- $prev_velocity = isset($row['minutes']['prev_velocity']) ? round(12 * $row['minutes']['prev_velocity']) : 0;
- $data[$row['scan_id']]['velocity'] = $velocity;
- $data[$row['scan_id']]['difference'] = $velocity - $prev_velocity;
- }
- }
- catch (MongoCursorTimeoutException $e) {
- }
- }
- // fill up the url uniq stats part
- if ($cursor = scan_api_get_mongo('urls', 'scanurl')) {
+ if ($cursor = scan_api_get_mongo('scan_stats', 'scan')) { // V2r15 / scan / scan_id[] / none
$fields = array(
'scan_id' => 1,
- 'days.velocity' => 1,
- 'days.general.velocity' => 1,
- 'days.photo.velocity' => 1,
- 'days.video.velocity' => 1,
+ 'velocity.minutes_scan' => 1,
+ 'velocity.increasing' => 1,
+ 'velocity.hours_general' => 1, // @@@V2 was days.general.velocity
+ 'velocity.hours_photo' => 1, //@@@V2 was days.photo.velocity
+ 'velocity.hours_video' => 1, //@@@V2 was days.video.velocity
+ 'velocity.hours_urls' => 1, //@@@V2 was days.velocity
);
$query = array(
'scan_id' => array('$in' => $scan_ids),
@@ -1788,10 +1941,13 @@
->find($query, $fields)
->timeout(scan_api_get_mongo_timeout());
foreach ($cursor as $row) {
- $data[$row['scan_id']]['uniq_links_summary'] = round($row['days']['velocity'] * 30);
- $data[$row['scan_id']]['uniq_links_general'] = round($row['days']['general']['velocity'] * 30);
- $data[$row['scan_id']]['uniq_links_photo'] = round($row['days']['photo']['velocity'] * 30);
- $data[$row['scan_id']]['uniq_links_video'] = round($row['days']['video']['velocity'] * 30);
+ $data[$row['scan_id']]['velocity'] = isset($row['velocity']['minutes_scan']) ? round(12 * $row['velocity']['minutes_scan']) : 0;
+ //@@@V2 This is really a boolean now -- requires theme / js change to fix.
+ $data[$row['scan_id']]['difference'] = isset($row['velocity']['increasing']) ? $row['velocity']['increasing'] : 0;
+ $data[$row['scan_id']]['uniq_links_summary'] = round($row['velocity']['hours_urls']);
+ $data[$row['scan_id']]['uniq_links_general'] = round($row['velocity']['hours_general']);
+ $data[$row['scan_id']]['uniq_links_photo'] = round($row['velocity']['hours_photo']);
+ $data[$row['scan_id']]['uniq_links_video'] = round($row['velocity']['hours_video']);
}
}
catch (MongoCursorTimeoutException $e) {
@@ -2499,20 +2655,30 @@
* @return
* A mongoCollection.
*/
-function scan_api_get_mongo($db_name, $collection_name) {
+function scan_api_get_mongo($collection_name, $shard_key = FALSE) {
static $connections;
- if (!isset($connections[$db_name])) {
+ static $mongo_dbs;
+ if (!isset($mongo_dbs)) {
$mongo_dbs = variable_get('mongo_dbs', array());
- $mongo_db = $mongo_dbs[isset($mongo_dbs[$db_name]) ? $db_name : 'default'];
+ }
+ if (!$shard_key) {
+ $shard_key = $collection_name;
+ }
+ // Normalize shard key so we can reuse connections often.
+ if (!isset($mongo_dbs[$shard_key])) {
+ $shard_key = 'default';
+ }
+ if (!isset($connections[$shard_key])) {
+ $mongo_db = $mongo_dbs[$shard_key];
try {
$mongo = new mongo($mongo_db['host']);
- $connections[$db_name] = $mongo->selectDB($mongo_db['db']);
+ $connections[$shard_key] = $mongo->selectDB($mongo_db['db']);
}
catch (MongoConnectionException $e) {
return;
}
}
- return $connections[$db_name]->selectCollection($collection_name);
+ return $connections[$shard_key]->selectCollection($collection_name);
}
/**
@@ -2530,3 +2696,165 @@
return $timeout;
}
+/**
+ * One stop shop for getting a blob of stuff from mongo.
+ */
+function scan_api_mongo_doquery($args) {
+ //$collection
+ $key = FALSE;
+ //$query
+ $fields = array();
+ $sort = NULL;
+ $limit = 100;
+ $empty = array();
+ $emptykeys = array();
+ $flatten = FALSE;
+ $zeromap = array();
+ $remap = array();
+ $stripmongoid = TRUE;
+ extract($args);
+
+ if (!$key) {
+ // No sense in reading multiple values if this will be single-result.
+ $limit = 1;
+ }
+
+ $return = array();
+ if (!empty($emptykeys)) {
+ foreach ($emptykeys as $k) {
+ $return[$k] = array();
+ }
+ }
+
+ if ($cursor = scan_api_get_mongo('FOO',$collection)) { // V2r15 metaquery
+ try {
+ $cursor = $cursor->find($query, $fields)
+ ->limit($limit);
+ if (isset($sort)) {
+ $cursor->sort($sort);
+ }
+ $query = $cursor->timeout(scan_api_get_mongo_timeout());
+ if (!$key) {
+ if ($query->hasNext()) {
+ // Switch to findOne() instead of find()?
+ $return[0] = $query->getNext();
+ }
+ else {
+ // No result.
+ return FALSE;
+ }
+ }
+ else if (strpos($key, '.') !== FALSE) {
+ $keyparts = explode('.', $key);
+ foreach ($query as $row) {
+ $r =& $row;
+ foreach ($keyparts as $part) {
+ $r =& $r[$part];
+ }
+ $rowkey = $r;
+ unset($r);
+ $return[$rowkey] = $row;
+ }
+ }
+ else {
+ foreach ($query as $row) {
+ // Assumes data from mongo is consistent.
+ $return[$row[$key]] = $row;
+ }
+ }
+ if ($stripmongoid) {
+ foreach ($return as $k => $v) {
+ unset($return[$k]['_id']);
+ }
+ }
+ if ($flatten) {
+ // Single level flattening. Doing it like this because recursion sucks.
+ // Not gonna bother with more than one dot for now.
+ foreach ($return as $k => $v) {
+ if (is_array($v)) {
+ foreach ($return[$k] as $kk => $vv) {
+ if (is_array($vv)) {
+ foreach ($return[$k][$kk] as $kkk => $vvv) {
+ $return[$k]["$kk.$kkk"] =& $return[$k][$kk][$kkk];
+ }
+ unset($return[$k][$kk]);
+ }
+ }
+ }
+ }
+ }
+ // Do a single level initialization of defaults.
+ if (!empty($empty)) {
+ foreach ($return as $k => $v) {
+ foreach ($empty as $kk => $vv) {
+ if (!isset($return[$k][$kk])) {
+ $return[$k][$kk] = $vv;
+ }
+ }
+ }
+ }
+ // Do zero mapping for fallbacks.
+ // Note: This only applies if every entry in that field is 0.
+ if (!empty($zeromap)) {
+ foreach ($zeromap as $src => $dst) {
+ $fallback = TRUE;
+ foreach ($return as $k => $v) {
+ if ($return[$k][$src]) {
+ $fallback = FALSE;
+ break;
+ }
+ }
+ foreach ($return[$k] as $k => $v) {
+ if ($fallback) {
+ $return[$k][$dst] =& $return[$k][$src];
+ }
+ // Always unset src, even if not falling back.
+ unset($return[$k][$src]);
+ }
+ }
+ }
+ // Perform output remapping to adapt the result array on behalf
+ // of the caller.
+ if (!empty($remap)) {
+ foreach ($remap as $src => $dst) {
+ foreach ($return as $k => $v) {
+ $return[$dst] =& $return[$src];
+ unset($return[$src]);
+ }
+ }
+ }
+ if (!$key) {
+ return $return[0];
+ }
+ return $return;
+ }
+ catch (MongoCursorTimeoutException $e) {
+ }
+ }
+}
+
+
+/**
+ * Set query result keys.
+ */
+function scan_api_mongo_keyresult($query, $key) {
+ $return = array();
+ if (strpos($key, '.') !== FALSE) {
+ $keyparts = explode('.', $key);
+ foreach ($query as $row) {
+ $r =& $row;
+ foreach ($keyparts as $part) {
+ $r =& $r[$part];
+ }
+ $rowkey = $r;
+ unset($r);
+ $return[$rowkey] = $row;
+ }
+ }
+ else {
+ foreach ($query as $row) {
+ $return[$row[$key]] = $row;
+ }
+ }
+ return $return;
+}