Index: www/sites/all/modules/custom/np_scan/np_scan.analytics.inc
===================================================================
--- www/sites/all/modules/custom/np_scan/np_scan.analytics.inc (revision 34236)
+++ www/sites/all/modules/custom/np_scan/np_scan.analytics.inc (working copy)
@@ -19,71 +19,6 @@
}
/**
- * Helper function to get the name of the velocity field.
- *
- * @param $timeframe time in hours 24, 48, 168,720
- * @param $collection_name the name of the mongo collection
- * @return string, the name of the velocity field
- */
-function _np_scan_analytics_get_velocity_field($timeframe, $collection_name) {
- $velocity_field = '';
-
- if ($collection_name == 'scanurl') {
- switch ($timeframe) {
- case 24: // day
- $velocity_field = 'hours.velocity24';
- break;
- case 168: //week
- $velocity_field = 'days.velocity7';
- break;
- case 720: // month
- case 'all':
- $velocity_field = 'days.velocity';
- break;
- default: // biday
- $velocity_field = 'hours.velocity';
- break;
- }
- }
- elseif ($collection_name == 'scan') {
- switch ($timeframe) {
- case 24: // day
- $velocity_field = 'hours.velocity';
- break;
- case 168: //week
- $velocity_field = 'days.velocity7';
- break;
- case 720: // month
- case 'all':
- $velocity_field = 'days.velocity';
- break;
- default: // biday
- $velocity_field = 'hours.velocity48';
- break;
- }
- }
- elseif (in_array($collection_name, array('hashtag', 'keyword', 'retweet'))) {
- switch ($timeframe) {
- case 24: // day
- $velocity_field = 'hours.velocity';
- break;
- case 168: //week
- $velocity_field = 'days.velocity'; // no stats use month
- break;
- case 720: // month
- case 'all':
- $velocity_field = 'days.velocity';
- break;
- default: // biday
- $velocity_field = 'days.velocity'; // no stats use month
- break;
- }
- }
-
- return $velocity_field;
-}
-
-/**
* Menu callback; property statistics.
*
* $page - which page are we displaying results for?
@@ -104,7 +39,7 @@
$scans = array();
// this variable gonna be handy later
$key = 'scan_id';
-
+
if ($page == 'views') {
$timeframe = !empty($_GET['timeframe']) ? $_GET['timeframe'] : 'all';
}
@@ -114,6 +49,7 @@
switch ($page) {
case 'views':
+ $velocity_field_2 = ($timeframe == 24) ? 'velocity.hours' : 'velocity.days';
$args = array_merge(array($scan_status), $client_ids );
scan_api_set_active_shard('misc');
$key = 'vid';
@@ -135,36 +71,15 @@
break;
case 'links':
- $velocity_field = _np_scan_analytics_get_velocity_field($timeframe, 'scanurl');
- $fields = array('scan_id' => 1);
- $query = array(
- 'scan.client_id' => array('$in' => $client_ids),
- 'scan.status' => $scan_status,
- );
- if ($cursor = scan_api_get_mongo('urls', 'scanurl')) {
- try {
- $results = $cursor
- ->find($query, $fields)
- ->sort(array($velocity_field => -1))
- ->limit($limit)
- ->timeout(scan_api_get_mongo_timeout());
- foreach ($results as $row) {
- $scans[$row['scan_id']] = FALSE;
- }
- }
- catch (MongoCursorTimeoutException $e) {
- }
- }
- break;
-
default:
- $velocity_field = _np_scan_analytics_get_velocity_field($timeframe, 'scan');
+ $velocity_field = ($page == 'links') ? 'velocity.hours_urls' : 'velocity.hours_scan';
+ $velocity_field_2 = 'velocity.days'; // @@@ V2 Maybe we should standardize on 48h across the board?
$fields = array('scan_id' => 1);
$query = array(
'scan.client_id' => array('$in' => $client_ids),
'scan.status' => $scan_status,
);
- if ($cursor = scan_api_get_mongo('scan_stats', 'scan')) {
+ if ($cursor = scan_api_get_mongo('scan')) { // V2r15 / scan / scan.client_id[], scan.status / velocity.hours_urls, velocity.hours_scan
try {
$results = $cursor
->find($query, $fields)
@@ -203,24 +118,23 @@
// pour together hashtag and keyword velocities
foreach (array('keyword', 'hashtag') as $collection_name) {
if ($words[$collection_name]) {
- $velocity_field = _np_scan_analytics_get_velocity_field($timeframe, $collection_name);
- list($interval, $tmp) = explode('.', $velocity_field);
- $fields = array('word' => 1, $velocity_field => 1);
+ list($tmp, $interval) = explode('.', $velocity_field_2);
+ $fields = array('word' => 1, $velocity_field_2 => 1);
$query = array(
'scan_id' => intval($statistics->scan_id),
'word' => array('$in' => array_map('strtolower', $words[$collection_name])),
);
- if ($cursor = scan_api_get_mongo('scan_stats', $collection_name)) { // keyword, hashtag
+ if ($cursor = scan_api_get_mongo($collection_name)) { // V2r15 / keyword, hashtag / scan_id, word[] / velocity.hours:-1, velocity.days:-1
try {
$results = $cursor
->find($query, $fields)
- ->sort(array($velocity_field => -1))
+ ->sort(array($velocity_field_2 => -1))
->timeout(scan_api_get_mongo_timeout());
foreach ($results as $row) {
if ($collection_name == 'hashtag') {
$row['word'] = '#' . $row['word'];
};
- $word_velocities[$row['word']] = isset($row[$interval]['velocity']) ? round($row[$interval]['velocity'], VELOCITY_PRECISION) : 0;
+ $word_velocities[$row['word']] = isset($row['velocity'][$interval]) ? round($row['velocity'][$interval], VELOCITY_PRECISION) : 0;
}
}
catch (MongoCursorTimeoutException $e) {
@@ -248,60 +162,43 @@
'timestamp' => strtotime('-1 day'),
);
- // fill up velocity and difference
+ // fill up velocity and difference / stats.
$statistics->velocity = 0;
$statistics->difference = 0;
- $fields = array('minutes.velocity' => 1, 'minutes.prev_velocity' => 1);
- $query = array(
- 'scan_id' => intval($statistics->scan_id),
- );
- if ($cursor = scan_api_get_mongo('scan_stats', 'scan')) {
- try {
- $results = $cursor
- ->find($query, $fields)
- ->timeout(scan_api_get_mongo_timeout());
- if ($results->hasNext()) {
- $row = $results->getNext();
- $velocity = isset($row['minutes']['velocity']) ? $row['minutes']['velocity'] * 12 : 0;
- $prev_velocity = isset($row['minutes']['prev_velocity']) ? $row['minutes']['prev_velocity'] * 12 : 0;
- $statistics->velocity = $velocity;
- $statistics->difference = $velocity - $prev_velocity;
- }
- }
- catch (MongoCursorTimeoutException $e) {
- }
- }
-
- // Fill up the url uniq stats part. Initialize to 0.
- $statistics->velocity = 0;
$statistics->general = 0;
$statistics->photo = 0;
$statistics->video = 0;
-
$statistics->url_velocity = 0;
-
$fields = array(
'scan_id' => 1,
- 'hours.velocity' => 1,
- 'hours.general.velocity' => 1,
- 'hours.photo.velocity' => 1,
- 'hours.video.velocity' => 1,
+ // @@@ V2 Previous code disagrees on whether to use minutes or hours here.
+ //'velocity.minutes' => 1,
+ 'velocity.hours_scan' => 1,
+ 'velocity.hours_general' => 1,
+ 'velocity.hours_photo' => 1,
+ 'velocity.hours_video' => 1,
+ 'velocity.hours_urls' => 1,
+ 'increasing' => 1,
);
$query = array(
'scan_id' => intval($statistics->scan_id),
);
- if ($cursor = scan_api_get_mongo('urls', 'scanurl')) {
+ if ($cursor = scan_api_get_mongo('scan')) { // V2r15 / scan / scan_id / none
try {
$results = $cursor
->find($query, $fields)
->timeout(scan_api_get_mongo_timeout());
- foreach ($results as $row) {
- $statistics->velocity = isset($row['hours']['velocity']) ? $row['hours']['velocity'] : 0;
- $statistics->general = isset($row['hours']['general']['velocity']) ? $row['hours']['general']['velocity'] : 0;
- $statistics->photo = isset($row['hours']['photo']['velocity']) ? $row['hours']['photo']['velocity'] : 0;
- $statistics->video = isset($row['hours']['video']['velocity']) ? $row['hours']['video']['velocity'] : 0;
-
- $statistics->url_velocity = isset($row['hours']['velocity']) ? $row['hours']['velocity'] : 0;
+ if ($results->hasNext()) {
+ $row = $results->getNext();
+ // @@@ V2 Previous code disagrees on whether to use minutes or hours here.
+ //$statistics->velocity = isset($row['velocity']['minutes']) ? $row['velocity']['minutes'] * 12 : 0;
+ $statistics->velocity = isset($row['velocity']['hours_scan']) ? $row['velocity']['hours_scan'] : 0;
+ $statistics->general = isset($row['velocity']['hours_general']) ? $row['velocity']['hours_general'] : 0;
+ $statistics->photo = isset($row['velocity']['hours_photo']) ? $row['velocity']['hours_photo'] : 0;
+ $statistics->video = isset($row['velocity']['hours_video']) ? $row['velocity']['hours_video'] : 0;
+ $statistics->url_velocity = isset($row['velocity']['hours_urls']) ? $row['velocity']['hours_urls'] : 0;
+ // @@@ V2 Teach everything about "increasing".
+ $statistics->difference = $row['increasing'] ? 1 : -1;
}
}
catch (MongoCursorTimeoutException $e) {
@@ -365,15 +262,18 @@
$max = 0;
$data = array();
- $order_field = ($order == 'trending') ? 'trending' : 'minutes.velocity';
+ $order_field = ($order == 'trending') ? 'trending' : 'velocity.minutes';
$ordering_data = array();
foreach ($scan_ids as $scan_id) {
- $fields = array('minutes.velocity' => 1, 'trending' => 1, 'word' => 1);
+ $fields = array(
+ 'velocity.minutes' => 1,
+ 'trending' => 1,
+ 'word' => 1,
+ );
$query = array(
'scan_id' => $scan_id,
- 'word' => array('$exists' => TRUE),
);
- if ($cursor = scan_api_get_mongo('scan_stats', $table_type)) { // keyword, hashtag
+ if ($cursor = scan_api_get_mongo($table_type)) { // V2r15 / keyword, hashtag / scan_id / trending:-1, velocity.minutes:-1
try {
$results = $cursor
->find($query, $fields)
@@ -382,11 +282,12 @@
->timeout(scan_api_get_mongo_timeout());
foreach ($results as $row) {
if (!isset($row['word'])) {
+ assert('Worker was speechless!');
// bug in workers. they write empty word records... bad
continue;
}
- $velocity = isset($row['minutes']['velocity']) ? $row['minutes']['velocity'] * 12 : 0;
+ $velocity = isset($row['velocity']['minutes']) ? $row['velocity']['minutes'] * 12 : 0;
$order_data = $order != 'trending' ? $velocity : $row['trending'];
if (!isset($ordering_data[$row['word']]) || ($ordering_data[$row['word']] < $order_data)) {
$data[$row['word']] = array(
@@ -445,16 +346,16 @@
$query = array(
'scan_id' => array('$in' => $scan_ids),
);
- if ($cursor = scan_api_get_mongo('scan_stats', $table_type)) { // keyword, hashtag
+ if ($cursor = scan_api_get_mongo($table_type)) { // V2r15 / keyword, hashtag / scan_id[] / velocity.minutes:-1 limit 1
try {
$results = $cursor
- ->find($query, array('minutes.velocity' => 1))
- ->sort(array('minutes.velocity' => -1))
+ ->find($query, array('velocity.minutes' => 1))
+ ->sort(array('velocity.minutes' => -1))
->limit(1)
->timeout(scan_api_get_mongo_timeout());
if ($results->hasNext()) {
$row = $results->getNext();
- $max = isset($row['minutes']['velocity']) ? $row['minutes']['velocity'] * 12 : 0;
+ $max = isset($row['velocity']['minutes']) ? $row['velocity']['minutes'] * 12 : 0;
}
}
catch (MongoCursorTimeoutException $e) {
@@ -462,15 +363,13 @@
}
}
else {
- // MAX(ABS(trending)
$max_trending = $min_trending = 0;
$query = array(
'scan_id' => array('$in' => $scan_ids),
- 'trending' => array('$ne' => -1000),
);
// max trending
- if ($cursor = scan_api_get_mongo('scan_stats', $table_type)) { // keyword, hashtag
+ if ($cursor = scan_api_get_mongo($table_type)) { // V2r15 / keyword, hashtag / scan_id[] / trending:-1, trending:1 limit 1 (two queries)
try {
$results = $cursor
->find($query, array('trending' => 1))
@@ -531,11 +430,10 @@
foreach (array('keyword', 'hashtag') as $collection_name) {
$query = array(
'scan_id' => 0,
- 'word' => array('$exists' => TRUE),
);
- if ($cursor = scan_api_get_mongo('scan_stats', $collection_name)) { // keyword, hashtag
- // max trending
+ if ($cursor = scan_api_get_mongo($collection_name)) { // V2r15 / keyword, hashtag / scan_id=0 / trending:-1, velocity.minutes:-1 count 1 two queries
try {
+ // max trending
$results = $cursor
->find($query, array('trending' => 1))
->sort(array('trending' => -1))
@@ -548,13 +446,13 @@
// max velocity
$results = $cursor
- ->find($query, array('minutes.velocity' => 1))
- ->sort(array('minutes.velocity' => -1))
+ ->find($query, array('velocity.minutes' => 1))
+ ->sort(array('velocity.minutes' => -1))
->limit(1)
->timeout(scan_api_get_mongo_timeout());
if ($results->hasNext()) {
$row = $results->getNext();
- $result[$collection_name]['velocity'] = isset($row['minutes']['velocity']) ? $row['minutes']['velocity'] * 12 : 0;
+ $result[$collection_name]['velocity'] = isset($row['velocity']['minutes']) ? $row['velocity']['minutes'] * 12 : 0;
}
}
catch (MongoCursorTimeoutException $e) {
@@ -575,7 +473,7 @@
/**
* Helper function to get the pager's current page number.
*/
-function _np_scan_analytics_get_page_number($db, $collection, $limit, $query = array(), $element = 0) {
+function _np_scan_analytics_get_page_number($collection, $limit, $query = array(), $element = 0) {
global $pager_page_array, $pager_total, $pager_total_items;
// Initialize pager, see pager.inc.
@@ -583,7 +481,7 @@
$page = isset($_GET['page']) ? $_GET['page'] : '';
$pager_page_array = explode(',', $page);
$pager_total_items[$element] = 0;
- if ($cursor = scan_api_get_mongo($db, $collection)) { // keyword, hashtag, (url -- disabled)
+ if ($cursor = scan_api_get_mongo($collection)) { // V2r15 / keyword, hashtag, (url -- disabled) / varies(_np_scan_analytics_get_page_number) / none (count)
try {
$pager_total_items[$element] = $cursor->find($query)
->timeout(scan_api_get_mongo_timeout())
@@ -605,13 +503,12 @@
$keyword_origin_access = user_access('access keyword origin');
$collection_name = $hashtag ? 'hashtag' : 'keyword';
- if ($cursor = scan_api_get_mongo('scan_stats', $collection_name)) { // keyword, hashtag
+ if ($cursor = scan_api_get_mongo($collection_name)) { // V2r15 / keyword, hashtag / scan_id=0 / trending (paged query)
$fields = array('scan_id' => 1, 'trending' => 1, 'word' => 1);
$query = array(
'scan_id' => 0,
- 'word' => array('$exists' => TRUE),
);
- $pagenumber = $full ? _np_scan_analytics_get_page_number('scan_stats', $collection_name, $limit, $query) : 0;
+ $pagenumber = $full ? _np_scan_analytics_get_page_number($collection_name, $limit, $query) : 0; // V2r15 / hashtag, keyword / scan_id=0 / none (count)
try {
$results = $cursor
->find($query, $fields)
@@ -659,26 +556,25 @@
$keyword_origin_access = user_access('access keyword origin');
$collection_name = $hashtag ? 'hashtag' : 'keyword';
- if ($cursor = scan_api_get_mongo('scan_stats', $collection_name)) { // keyword, hashtag
+ if ($cursor = scan_api_get_mongo($collection_name)) { // V2r15 / keyword, hashtag / scan_id=0 / velocity.minutes:-1 paged query
$query = array(
'scan_id' => 0,
- 'word' => array('$exists' => TRUE),
);
- $pagenumber = $full ? _np_scan_analytics_get_page_number('scan_stats', $collection_name, $limit, $query) : 0;
+ $pagenumber = $full ? _np_scan_analytics_get_page_number($collection_name, $limit, $query) : 0; // V2r15 / keyword, hashtag / scan_id=0 / none (count)
- $fields = array('scan_id' => 1, 'minutes.velocity' => 1, 'word' => 1);
+ $fields = array('velocity.minutes' => 1, 'word' => 1);
try {
$results = $cursor
->find($query, $fields)
- ->sort(array('minutes.velocity' => -1))
+ ->sort(array('velocity.minutes' => -1))
->skip($pagenumber * $limit)
->limit($limit)
->timeout(scan_api_get_mongo_timeout());
foreach ($results as $row) {
$tweets[$row['word']] = array(
- 'scan_id' => $row['scan_id'],
- 'velocity' => isset($row['minutes']['velocity']) ? $row['minutes']['velocity'] * 12 : 0,
+ 'scan_id' => 0,
+ 'velocity' => isset($row['velocity']['minutes']) ? $row['velocity']['minutes'] * 12 : 0,
'word' => $row['word'],
);
}
@@ -715,13 +611,17 @@
}
if ($tweets) {
-
- $fields = array('word' => 1, 'hours' => 1, 'updated' => 1);
$query = array(
'scan_id' => 0,
'word' => array('$in' => array_map('strtolower', $words)),
);
- if ($cursor = scan_api_get_mongo('scan_stats', $collection_name)) { // keyword, hashtag
+ $fields = array(
+ 'word' => 1,
+ 'hours' => 1,
+ 'updated' => 1,
+ );
+
+ if ($cursor = scan_api_get_mongo($collection_name)) { // V2r15 / keyword, hashtag / scan_id=0, word[] / nond
try {
$results = $cursor
->find($query, $fields)
@@ -753,7 +653,7 @@
'word' => array('$in' => $words),
'updated' => array('$gte' => $date),
);
- $cursor = scan_api_get_mongo('urls', 'url')
+ $cursor = scan_api_get_mongo('urls', 'url') // !V2r15 not converted -- commented out code
->find($query, $fields)
->sort(array('updated' => -1));
$i = 0;
@@ -872,22 +772,22 @@
//$pagenumber = $paged ? _np_scan_analytics_get_page_number('urls', 'url', $limit, array('scan_id' => array('$in' => $scan_ids), 'category' => $category)) : 0;
$pagenumber = 0; //@todo: fixme
foreach ($scan_ids as $scan_id) {
- $fields = array('hours.velocity' => 1, 'url_id' => 1);
+ $fields = array('velocity' => 1, 'url_id' => 1);
$query = array(
- 'scan_id' => $scan_id,
- 'category' => $category,
+ 'scan_id' => intval($scan_id),
+ 'category' => intval($category),
);
- if ($cursor = scan_api_get_mongo('urls', 'url')) {
+ if ($cursor = scan_api_get_mongo('url')) { // V2r15 / url / scan_id, category / velocity:-1 paged query
try {
$results = $cursor
->find($query, $fields)
- ->sort(array('hours.velocity' => -1))
+ ->sort(array('velocity' => -1))
->skip($pagenumber * $limit)
->limit($limit)
->timeout(scan_api_get_mongo_timeout());
foreach ($results as $row) {
$url_ids[$row['url_id']] = $row['url_id'];
- $velocity = isset($row['hours']['velocity']) ? $row['hours']['velocity'] : 0;
+ $velocity = isset($row['velocity']) ? $row['velocity'] : 0;
if (!isset($order_data[$row['url_id']])) {
$order_data[$row['url_id']] = $velocity;
}
@@ -904,6 +804,44 @@
}
/**
+ * FASTER Helper function to get url statistics for different categories.
+ */
+function _np_scan_analytics_group_category_velocity($group_ids, $category, $limit) {
+ $url_ids = array(); // @@@ Use PHP better. This can be done with a single array
+ $order_data = array(); // assuming the callers know how to preserve a sorted array.
+ $fields = array(
+ 'velocity' => 1,
+ 'url_id' => 1,
+ );
+ $query = array(
+ 'scan.client_id' => array('$in' => array_map('intval', $group_ids)),
+ 'category' => intval($category),
+ );
+ if ($cursor = scan_api_get_mongo('url')) { // V2r15 / url / scan_id, category / velocity:-1 paged query
+ try {
+ $results = $cursor->find($query, $fields)
+ ->sort(array('velocity' => -1))
+ // Set limit large enough so that we can generally satisfy the $limit requested.
+ // This is relatively "cheap".
+ ->limit(150)
+ ->timeout(scan_api_get_mongo_timeout());
+ $found = 0;
+ while ($found < $limit && $results->hasNext()) {
+ $curr = $results->getNext();
+ if (!isset($url_ids[$curr['url_id']])) {
+ $found++;
+ $url_ids[$curr['url_id']] = $curr['url_id'];
+ $order_data[$curr['url_id']] = $curr['velocity'];
+ }
+ }
+ }
+ catch (MongoCursorTimeoutException $e) {
+ }
+ }
+ return array($url_ids, $order_data);
+}
+
+/**
* param $page - one of 'links', 'images', 'videos' or NULL for all results.
* param $twitter - if TRUE then find popular links across all of twitter,
* otherwise just find results within the current users' groups.
@@ -917,37 +855,22 @@
$videos_list = array();
$scan_ids = array(0);
+ $group_ids = array();
if ($page_type == 'property') {
- // when we have several scan_ids we need to do some magic, otherwise we
- // force the sql to do filesort to order the resultset and
- // temporary table to handle the distinct clause since several scans can
- // have the same url with diff velocity, so...
- // we will fetch in these cases a diff resultset for each scan id and comb them together ourselves
- // killing paging
$paged = FALSE;
- $scan_ids = array(-1);
if ($GLOBALS['user']->og_groups) {
- scan_api_set_active_shard('misc');
- $rs = db_query("
- SELECT s.scan_id
- FROM {og_ancestry} o
- INNER JOIN {scan_settings} ss on ss.nid = o.nid
- INNER JOIN {scan} s on s.vid = ss.active_vid
- WHERE o.group_nid IN (" . implode(',', array_fill(0, count($GLOBALS['user']->og_groups), '%d')) . ") ORDER BY o.nid DESC LIMIT 100", array_keys($GLOBALS['user']->og_groups));
- $scan_ids = array();
- while ($row = db_fetch_object($rs)) {
- $scan_ids[] = intval($row->scan_id);
- }
- scan_api_set_active_shard();
- if (!$scan_ids) {
- $scan_ids = array(-1);
- }
+ $group_ids = array_keys($GLOBALS['user']->og_groups);
}
}
// TOP LINKS
if ($page == 'links' || is_null($page)) {
- list($url_ids, $order_data) = _np_scan_analytics_get_url_statistics_all_time($scan_ids, 1, $paged, $limit);
+ if (!empty($group_ids)) {
+ list($url_ids, $order_data) = _np_scan_analytics_group_category_velocity($group_ids, 1, $limit);
+ }
+ else {
+ list($url_ids, $order_data) = _np_scan_analytics_get_url_statistics_all_time($scan_ids, 1, $paged, $limit);
+ }
$data = array();
if ($url_ids) {
@@ -993,8 +916,14 @@
// IMAGES
if ($page == 'images' || is_null($page)) {
$limit_override = is_null($page) ? 12 : $limit;
- list($url_ids, $order_data) = _np_scan_analytics_get_url_statistics_all_time($scan_ids, 2, $paged, $limit_override);
+ if (!empty($group_ids)) {
+ list($url_ids, $order_data) = _np_scan_analytics_group_category_velocity($group_ids, 2, $limit_override);
+ }
+ else {
+ list($url_ids, $order_data) = _np_scan_analytics_get_url_statistics_all_time($scan_ids, 2, $paged, $limit_override);
+ }
+
$data = array();
if ($url_ids) {
$url_id_placeholders = implode(',', array_fill(0, count($url_ids), '%d'));
@@ -1041,7 +970,12 @@
// VIDEOS
if ($page == 'videos' || is_null($page)) {
$limit_override = is_null($page) ? 4 : $limit;
- list($url_ids, $order_data) = _np_scan_analytics_get_url_statistics_all_time($scan_ids, 3, $paged, $limit_override);
+ if (!empty($group_ids)) {
+ list($url_ids, $order_data) = _np_scan_analytics_group_category_velocity($group_ids, 3, $limit_override);
+ }
+ else {
+ list($url_ids, $order_data) = _np_scan_analytics_get_url_statistics_all_time($scan_ids, 3, $paged, $limit_override);
+ }
$data = array();
if ($url_ids) {
Index: www/sites/all/modules/custom/np_scan/np_scan.module
===================================================================
--- www/sites/all/modules/custom/np_scan/np_scan.module (revision 34236)
+++ www/sites/all/modules/custom/np_scan/np_scan.module (working copy)
@@ -435,9 +435,11 @@
$scan = db_fetch_object(db_query('SELECT scan_id, vid, nid FROM {scan} WHERE scan_id = %d', $scan_id));
if ($scan) {
// Refuse to update to a lower vid.
- db_query('UPDATE {scan_settings} SET active_vid = %d WHERE nid = %d AND active_vid < %d', $scan->vid, $scan->nid, $scan->vid);
+ db_query('UPDATE {scan_settings} SET active_vid = %d WHERE nid = %d AND active_vid <= %d', $scan->vid, $scan->nid, $scan->vid);
if (db_affected_rows()) {
- np_scan_denorm($scan->scan_id, 1, 'active');
+ $status = db_result(db_query('SELECT status FROM {scan_settings} WHERE nid = %d', $scan->nid));
+ // Denorm status at this point so scans appear in the appropriate aggregates.
+ np_scan_denorm($scan_id, $status, 'status');
watchdog('np_scan', 'Scan id %scan_id for revision %vid promoted to active on node %nid.', array('%scan_id' => $scan->scan_id, '%vid' => $scan->vid, '%nid' => $scan->nid));
// Get any non archived/deleted scans older than the one being promoted to...
$result = db_query("SELECT scan_id, nid, vid, archived FROM {scan} WHERE nid = %d AND scan_id < %d AND archived = 0", $scan->nid, $scan->scan_id);
@@ -967,9 +969,6 @@
'auto_unyaml' => false,
));
- // Mark scan as inactive in mongo.
- np_scan_denorm($scan_id, 0, 'active');
-
// insert matching marker into DB for deletion worker to double check
db_query("INSERT INTO {scan_delete} (scan_id) VALUES (%d)", $scan_id);
@@ -1912,16 +1911,16 @@
* @param $new_status new scan status 1=online, 0=offline
*/
function np_scan_denorm($scan_id, $new_value, $key = 'status') {
- $collections = array('scan', 'keyword', 'hashtag', 'location', 'retweet', 'url', 'scanurl');
- if ($key == 'active' && $new_value == 0) {
- // Don't bother propogating the active status fully for something that's about
- // to get deleted by terminator.
- $collections = array('scan', 'scanurl');
+ // @@@ Todo: Do this processing on the c++ side.
+ $collections = array('scan', 'keyword', 'hashtag', 'location', 'retweet', 'url');
+ if ($key == 'active') {
+ assert('Someone tried to denorm scan.active!');
+ return FALSE;
}
$key = "scan.". $key;
$set = array('$set' => array($key => intval($new_value)));
foreach ($collections as $collection) {
- if ($cursor = scan_api_get_mongo('statistics', $collection)) { // denorm
+ if ($cursor = scan_api_get_mongo($collection)) { // V2r15 / * / scan_id / none (update np_scan_denorm)
$cursor
->update(array('scan_id' => intval($scan_id)), $set, array('multiple' => TRUE));
}
Index: www/sites/all/modules/custom/np_scan/np_scan.archive.inc
===================================================================
--- www/sites/all/modules/custom/np_scan/np_scan.archive.inc (revision 34236)
+++ www/sites/all/modules/custom/np_scan/np_scan.archive.inc (working copy)
@@ -8,6 +8,7 @@
* @return TRUE on success, FALSE if details could not be found for this scan in scan and node DB tables
*/
function _np_scan_snapshot($scan_id, $title) {
+ $scan_id = intval($scan_id);
$scan = db_fetch_object(db_query_range('SELECT r.timestamp, s.* FROM {scan} s INNER JOIN {node_revisions} r USING(vid) WHERE scan_id = %d', $scan_id, 0, 1));
if (!empty($scan)) {
@@ -21,9 +22,9 @@
$url_stats->photo = 0;
$url_stats->video = 0;
try {
- if ($cursor = scan_api_get_mongo('scan_stats', 'scanurl')) {
+ if ($cursor = scan_api_get_mongo('scan')) { // V2r15 / scan / scan_id / none (ALL)
$query = array(
- 'scan_id' => intval($scan_id),
+ 'scan_id' => $scan_id,
);
$fields = array('days.general' => 1, 'days.photo' => 1, 'days.video' => 1, 'updated' => 1);
$cursor = $cursor->find($query, $fields)
@@ -33,12 +34,12 @@
foreach (array('general', 'photo', 'video') as $key) {
if (isset($result['days'][$key])) {
// get the index of the last updated bucket
- $last_update = scan_api_bucket_index('scanurl', 'days', $result['updated']->sec);
+ $last_update = scan_api_bucket_index('scanurl', 'days', $result['updated']);
$sum = 0;
foreach ($result['days'][$key] as $index => $value) {
+ // @@@ V2 This is completely wrong and probabaly kills kittens.
// ignore velocity values and keys which have not been updated recently enough
- // if $index is int(0), it will return true for in_array($index, array('velocity', 'velocity7')
- if ((!is_int($index) && in_array($index, array('velocity', 'velocity7'))) || ($index > $last_update)) {
+ if ($index > $last_update) {
continue;
}
else {
@@ -89,7 +90,7 @@
*
*/
function _np_scan_archive_get_urls_cursor($scan_id, $category) {
- if ($cursor = scan_api_get_mongo('scan_stats', 'url')) {
+ if ($cursor = scan_api_get_mongo('url')) { // V2r15 / url / scan_id, category / count:-1 (limit 1000)
$query = array(
'scan_id' => intval($scan_id),
'category' => intval($category),
@@ -370,12 +371,10 @@
* @param $snapshot_id
*/
function np_scan_archive_stats($scan, $snapshot_id) {
-
-
$insert_params = array();
$rows = 0;
$max = array('count' => 0, 'start_time' => '2009-01-01');
- if ($cursor = scan_api_get_mongo('scan_stats', 'scan')) {
+ if ($cursor = scan_api_get_mongo('scan')) { // V2r15 / scan / scan_id / none (single)
$query = array(
'scan_id' => intval($scan->scan_id),
);
@@ -384,30 +383,25 @@
->timeout(scan_api_get_mongo_timeout());
if ($cursor->hasNext()) {
$result = $cursor->getNext();
- $last_update_start = $result['updated']->sec;
- $last_update_index = scan_api_bucket_index('scan', 'days', $result['updated']->sec);
+ $last_update_start = $result['updated'];
+ $last_update_index = scan_api_bucket_index('scan', 'days', $result['updated']);
foreach ($result['days'] as $index => $count) {
- if (!is_int($index) && in_array($index, array('velocity', 'prev_velocity', 'velocity7'))) {
- continue;
+ if ($index <= $last_update_index) {
+ // anything which is <= last-update-index = last updated - (last-updated-index - bucket index) * 86400
+ $start_time = gmdate('Y-m-d', $last_update_start - (($last_update_index - $index) * 86400));
}
else {
- if ($index <= $last_update_index) {
- // anything which is <= last-update-index = last updated - (last-updated-index - bucket index) * 86400
- $start_time = gmdate('Y-m-d', $last_update_start - (($last_update_index - $index) * 86400));
- }
- else {
- // @TODO decide whether to drop the older stats or not, index in mongo here is 0-29
- // anything > last-update-index is the same as above + another 30 days or so? ... or just ignore these as too old?
- $start_time = gmdate('Y-m-d', $last_update_start - ((($last_update_index - $index) + 30) * 86400));
- }
- $insert_params[] = $snapshot_id;
- $insert_params[] = $start_time;
- $insert_params[] = $count;
- ++$rows;
- if ($count > $max['count']) {
- $max = array('count' => $count, 'start_time' => $start_time); // @TODO fill in the start time
- }
+ // @TODO decide whether to drop the older stats or not, index in mongo here is 0-29
+ // anything > last-update-index is the same as above + another 30 days or so? ... or just ignore these as too old?
+ $start_time = gmdate('Y-m-d', $last_update_start - ((($last_update_index - $index) + 30) * 86400));
}
+ $insert_params[] = $snapshot_id;
+ $insert_params[] = $start_time;
+ $insert_params[] = $count;
+ ++$rows;
+ if ($count > $max['count']) {
+ $max = array('count' => $count, 'start_time' => $start_time); // @TODO fill in the start time
+ }
}
}
}
Index: www/sites/all/modules/custom/np_scan_stats/np_scan_stats.module
===================================================================
--- www/sites/all/modules/custom/np_scan_stats/np_scan_stats.module (revision 34236)
+++ www/sites/all/modules/custom/np_scan_stats/np_scan_stats.module (working copy)
@@ -1,101 +0,0 @@
- t('Scan statistics'),
- 'description' => t('View scan, keyword, hashtag, and url statistics'),
- 'access callback' => 'user_access',
- 'access arguments' => array('view scan stats'),
- 'page callback' => 'np_scan_stats_proto_view',
- 'page arguments' => array(3, 4, 5),
- 'type' => MENU_NORMAL_ITEM,
- 'file' => 'np_scan_stats.proto.inc',
- );
-
- return $items;
-}
-
-/**
- * Implementation of hook_cron
- */
-function np_scan_stats_cron() {
- $mc = dmemcache_object('cache');
- // aggregated stat maintanence, compressed stat maintanence and fresh stat maintanence should not run together
- while (!$mc->add('np_scan_stats_maintanence', 1, FALSE, 1800)) {
- sleep(120);
- }
- $last_run = variable_get('np_scan_stats_cron_run', 0);
- $time = time();
- // dont run more than once
- if (date('H') != date('H', $last_run) || $lastrun + 3600 < $time) {
- include_once drupal_get_path('module', 'np_scan_stats') . '/np_scan_stats.cron.inc';
- $last_hour = mktime(date('H') - 1, 0, 0);
- // cleanup
- $cleanup_times = array();
- // stats
- // scan_statistics, keyword_statistics, hashtag_statistics, retweet_statistics, url_statistics, url_uniq_statistics, location_statistics
- foreach (array('scan', 'keyword', 'hashtag', 'retweet', 'url', 'url_uniq', 'location') as $table) {
- $start = time();
- $mongo_date = new MongoDate($last_hour - 7 * 86400);
- scan_api_get_mongo('statistics', $table)->remove(array('updated' => array('$lte' => $mongo_date))); // scan, keyword, hashtag, retweet, url, url_uniq, location
- $cleanup_times[] = $table . ' ' . (time() - $start);
- }
- scan_api_set_active_shard('misc');
- // urls
- $start = time();
- db_query("DELETE su FROM {source_urls} su INNER JOIN {urls} u ON su.url_id = u.id WHERE u.last_occurrence < '%s'", date('Y-m-d H:i:s', $last_hour - 30 * 86400));
- $cleanup_times[] = 'source_urls ' . (time() - $start);
- $start = time();
- db_query("DELETE su FROM {scan_urls} su INNER JOIN {urls} u ON su.url_id = u.id WHERE u.last_occurrence < '%s'", date('Y-m-d H:i:s', $last_hour - 30 * 86400));
- $cleanup_times[] = 'scan_urls ' . (time() - $start);
-
- $start = time();
- $mongo_date = new MongoDate($last_hour - 30 * 86400);
- scan_api_get_mongo('statistics', 'url')->remove(array('updated' => array('$lte' => $mongo_date)));
- $cleanup_times[] = 'url_statistics_all_time ' . (time() - $start);
-
- $start = time();
- db_query("DELETE se FROM {scan_embeds} se INNER JOIN {urls} u ON se.url_id = u.id WHERE u.last_occurrence < '%s'", date('Y-m-d H:i:s', $last_hour - 30 * 86400));
- $cleanup_times[] = 'scan_embeds ' . (time() - $start);
- $start = time();
- db_query("DELETE si FROM {scan_images} si INNER JOIN {urls} u ON si.url_id = u.id WHERE u.last_occurrence < '%s'", date('Y-m-d H:i:s', $last_hour - 30 * 86400));
- $cleanup_times[] = 'scan_images ' . (time() - $start);
- $start = time();
- // make sure we don't delete a url that is blocked, as they are blocked by ID and we still need the url table entry to get that blocked url
- db_query("DELETE u FROM {urls} u LEFT JOIN {group_blocked_urls} gbu ON u.id = gbu.url_id WHERE last_occurrence < '%s' AND gbu.url_id IS NULL", date('Y-m-d H:i:s', $last_hour - 30 * 86400));
- $cleanup_times[] = 'urls ' . (time() - $start);
- scan_api_set_active_shard();
- watchdog('cron', 'np_scan_stat cleanup times in seconds: ' . implode("\n", $cleanup_times));
- variable_set('np_scan_stats_cron_run', $time);
- }
- $mc->delete('np_scan_stats_maintanence');
- if (date('d', variable_get('np_scan_views_24_cleanup', 0)) != date('d')) {
- variable_set('np_scan_views_24_cleanup', time());
- db_query('UPDATE {node} SET np_views_24 = 0 WHERE np_views_24 != 0');
- db_query('UPDATE {node} SET np_views_widget_24 = 0 WHERE np_views_widget_24 != 0');
- }
-}
-
-function np_scan_stats_perm() {
- return array('view scan stats');
-}
-
-function np_scan_stats_theme() {
- return array(
- 'mongo_query' => array('arguments' => array('collection' => NULL, 'find' => NULL, 'fields' => array(), 'sort' => array(), 'limit' => 0)),
- 'mongo_json' => array('arguments' => array('json' => array())),
- 'mongo_short_datetime' => array('arguments' => array('dt' => NULL)),
- 'mongo_datetime' => array('arguments' => array('dt' => NULL)),
- 'mongo_scan_details' => array('arguments' => array('record' => array())),
- 'mongo_word_details' => array('arguments' => array('collection' => 'keyword', 'record' => array())),
- 'mongo_location_details' => array('arguments' => array('record' => array())),
- 'mongo_url_details' => array('arguments' => array('record' => array())),
- 'twitter_word' => array('arguments' => array('collection' => NULL, 'word' => NULL)),
- );
-}
Index: www/sites/all/modules/custom/np_scan_stats/np_scan_stats_mail.module
===================================================================
--- www/sites/all/modules/custom/np_scan_stats/np_scan_stats_mail.module (revision 34236)
+++ www/sites/all/modules/custom/np_scan_stats/np_scan_stats_mail.module (working copy)
@@ -56,19 +56,28 @@
function np_scan_stats_mail_cron() {
$mail_params = array('time' => time(), 'keyword' => array(), 'hashtag' => array(),);
$collections = array('keyword' => 15, 'hashtag' => 10);
+ $query = array(
+ 'scan_id' => 0,
+ );
+ $fields = array(
+ 'word' => 1,
+ 'velocity.mintues' => 1,
+ 'velocity.hours' => 1,
+ 'trending' => 1,
+ );
foreach ($collections as $collection => $default_length) {
- $cursor = scan_api_get_mongo('statistics', $collection); // keyword, hashtag
- if ($cursor) {
+ if ($cursor = scan_api_get_mongo($collection)) { // V2r15 / keyword, hashtag / scan_id=0 / trending:-1
try {
$result = $cursor
- ->find(array('scan_id' => 0))
+ ->find($query, $fields)
->sort(array('trending' => -1))
->limit(variable_get('np_scan_stats_mail_' . $collection . '_list_length', $default_length))
->timeout(scan_api_get_mongo_timeout());
foreach($result as $document) {
$mail_params[$collection][] = array(
'word' => $document['word'],
- 'velocity' => $document['minutes']['velocity']? $document['minutes']['velocity'] : $document['hours']['velocity'],
+ // @@@ V2 This fallback is kinda stupid.
+ 'velocity' => $document['velocity']['minutes'] ? $document['velocity']['minutes'] : $document['velocity']['hours'],
'trending' => $document['trending'],
);
}
Index: www/sites/all/modules/custom/np_scan_stats/np_scan_stats.cron.inc
===================================================================
--- www/sites/all/modules/custom/np_scan_stats/np_scan_stats.cron.inc (revision 34236)
+++ www/sites/all/modules/custom/np_scan_stats/np_scan_stats.cron.inc (working copy)
@@ -1,264 +0,0 @@
- 1, 'biday' => 2,) as $timeslice => $multiplier) {
- db_query("
- UPDATE {scan_statistics_aggregated} dst
- INNER JOIN {scan_statistics} src ON dst.scan_id = src.scan_id AND src.start_time = '%s'
- SET dst.velocity_" . $timeslice . " = IF(dst.velocity_" . $timeslice . " > src.count, dst.velocity_" . $timeslice . " - src.count, 0)
- WHERE dst.timestamp = %d
- ", date('Y-m-d H:i:s', $last_hour - $multiplier * 86400), $time + $i);
- }
- $runtimes[] = time() - $start;
- }
- watchdog('cron', 'np_scan_stat cron scan runtimes: ' . implode(', ', $runtimes) . ' (seconds)');
- if ($db_change) {
- db_set_active();
- }
-}
-
-/**
- * Maintain keyword/hashtag aggregated statistics table
- */
-function np_scan_stats_cron_word($table_type, $last_hour = 0) {
- if ($table_type != 'keyword' && $table_type != 'hashtag') {
- return;
- }
- $db_change = FALSE;
- if (!$last_hour) {
- $last_hour = mktime(date('H') - 1, 0, 0);
- $db_change = TRUE;
- db_set_active('scan');
- }
- // keyword_statistics_aggregated, hashtag_statistics_aggregated
- $max = db_result(db_query('SELECT COUNT(*) FROM {' . $table_type . '_statistics_aggregated}'));
- $run_length = variable_get('np_scan_stats_runlength_' . $table_type, ($table_type == 'keyword' ? 25000 : 100000));
- $runs = ceil($max / $run_length);
- $time = time();
- $runtimes = array();
- for ($i = 0; $i < $runs; $i++) {
- $start = time();
- // flag the rows that needs to be updated (cant use limit with joined tables in update)
- // keyword_statistics_aggregated, hashtag_statistics_aggregated
- db_query("
- UPDATE {" . $table_type . "_statistics_aggregated}
- SET timestamp = %d
- WHERE timestamp < %d
- LIMIT %d
- ", $time + $i, $time, $run_length);
- // last hour + age
- // keyword_statistics_aggregated, hashtag_statistics_aggregated, keyword_statistics, hashtag_statistics
- db_query("
- UPDATE {" . $table_type . "_statistics_aggregated} dst
- LEFT JOIN {" . $table_type . "_statistics} src ON dst.scan_id = src.scan_id AND dst.word = src.word AND src.start_time = '%s'
- SET dst.velocity_recent = IFNULL(src.count, 0), dst.age = dst.age + 1
- WHERE dst.timestamp = %d
- ", date('Y-m-d H:i:s', $last_hour), $time + $i);
- // remove out of interval ones
- foreach (array('6hour' => 6,) as $timeslice => $multiplier) {
- // keyword_statistics_aggregated, hashtag_statistics_aggregated, keyword_statistics, hashtag_statistics
- db_query("
- UPDATE {" . $table_type . "_statistics_aggregated} dst
- INNER JOIN {" . $table_type . "_statistics} src ON dst.scan_id = src.scan_id AND src.word = dst.word AND src.start_time = '%s'
- SET dst.velocity_" . $timeslice . " = IF(dst.velocity_" . $timeslice . " > src.count, dst.velocity_" . $timeslice . " - src.count, 0)
- WHERE dst.timestamp = %d
- ", date('Y-m-d H:i:s', $last_hour - $multiplier * 3600), $time + $i);
- }
- // trending
- // velocity_6hour / (velocity_month / 30 days)
- // keyword_statistics_aggregated, hashtag_statistics_aggregated
- db_query("
- UPDATE {" . $table_type . "_statistics_aggregated}
- SET trending = IF(velocity_6hour < 18 OR velocity_month = 0 OR age = 0, 0, (velocity_6hour/6) / ( velocity_month / IF(age > 744, 744, age) ))
- WHERE timestamp = %d
- ", $time + $i);
- $runtimes[] = time() - $start;
- }
- watchdog('cron', 'np_scan_stat cron ' . $table_type . ' runtimes: ' . implode(', ', $runtimes) . ' (seconds)');
- if ($db_change) {
- db_set_active();
- }
-}
-
-/**
- * Maintain location aggregated statistics table
- */
-function np_scan_stats_cron_location($last_hour = 0) {
- $db_change = FALSE;
- if (!$last_hour) {
- $last_hour = mktime(date('H') - 1, 0, 0);
- $db_change = TRUE;
- db_set_active('scan');
- }
- // location_statistics_aggregated
- $max = db_result(db_query('SELECT COUNT(*) FROM {location_statistics_aggregated}'));
- $run_length = variable_get('np_scan_stats_runlength_location', 100000);
- $runs = ceil($max / $run_length);
- $time = time();
- $runtimes = array();
- for ($i = 0; $i < $runs; $i++) {
- $start = time();
- // flag the rows that needs to be updated (cant use limit with joined tables in update)
- // keyword_statistics_aggregated, hashtag_statistics_aggregated
- db_query("
- UPDATE {location_statistics_aggregated}
- SET timestamp = %d
- WHERE timestamp < %d
- LIMIT %d
- ", $time + $i, $time, $run_length);
- // last hour + age
- // location_statistics_aggregated, location_statistics
- db_query("
- UPDATE {location_statistics_aggregated} dst
- LEFT JOIN {location_statistics} src ON dst.scan_id = src.scan_id AND dst.location_id = src.location_id AND src.start_time = '%s'
- SET dst.velocity_recent = IFNULL(src.count, 0), dst.age = dst.age + 1
- WHERE dst.timestamp = %d
- ", date('Y-m-d H:i:s', $last_hour), $time + $i);
- // remove out of interval ones
- foreach (array('6hour' => 6, 'biday' => 48,) as $timeslice => $multiplier) {
- // location_statistics_aggregated, location_statistics
- db_query("
- UPDATE {location_statistics_aggregated} dst
- INNER JOIN {location_statistics} src ON dst.scan_id = src.scan_id AND src.location_id = dst.location_id AND src.start_time = '%s'
- SET dst.velocity_" . $timeslice . " = IF(dst.velocity_" . $timeslice . " > src.count, dst.velocity_" . $timeslice . " - src.count, 0)
- WHERE dst.timestamp = %d
- ", date('Y-m-d H:i:s', $last_hour - $multiplier * 3600), $time + $i);
- }
- // trending
- // velocity_6hour / (velocity_month / 30 days)
- // location_statistics_aggregated
- db_query("
- UPDATE {location_statistics_aggregated}
- SET trending = IF(velocity_6hour < 18 OR velocity_month = 0 OR age = 0, 0, (velocity_6hour/6) / ( velocity_month / IF(age > 744, 744, age) ))
- WHERE timestamp = %d
- ", $time + $i);
- $runtimes[] = time() - $start;
- }
- watchdog('cron', 'np_scan_stat cron location runtimes: ' . implode(', ', $runtimes) . ' (seconds)');
- if ($db_change) {
- db_set_active();
- }
-}
-
-/**
- * Maintain url aggregated statistics table
- */
-function np_scan_stats_cron_url($last_hour = 0) {
- $db_change = FALSE;
- if (!$last_hour) {
- $last_hour = mktime(date('H') - 1, 0, 0);
- $db_change = TRUE;
- db_set_active('scan');
- }
- $max = db_result(db_query('SELECT COUNT(*) FROM {url_statistics_aggregated}'));
- $run_length = variable_get('np_scan_stats_runlength_url', 100000);
- $runs = ceil($max / $run_length);
- $time = time();
- $runtimes = array();
- for ($i = 0; $i < $runs; $i++) {
- $start = time();
- // flag the rows that needs to be updated (cant use limit with joined tables in update)
- db_query("
- UPDATE {url_statistics_aggregated}
- SET timestamp = %d
- WHERE timestamp < %d
- LIMIT %d
- ", $time + $i, $time, $run_length);
- // remove out of interval ones
- db_query("
- UPDATE {url_statistics_aggregated} dst
- INNER JOIN {url_statistics} src ON dst.scan_id = src.scan_id AND src.url_id = dst.url_id AND src.start_time = '%s'
- SET dst.velocity = IF(dst.velocity > src.count, dst.velocity - src.count, 0), dst.cleanup = IF(dst.velocity > src.count, 0, 1)
- WHERE dst.timestamp = %d
- ", date('Y-m-d H:i:s', $last_hour - 6 * 3600), $time + $i);
- $runtimes[] = time() - $start;
- }
- watchdog('cron', 'np_scan_stat cron url runtimes: ' . implode(', ', $runtimes) . ' (seconds)');
- // cleanup
- db_query("DELETE FROM {url_statistics_aggregated} WHERE cleanup = 1");
- if ($db_change) {
- db_set_active();
- }
-}
-
-/**
- * Maintain Uniq url aggregated statistics table
- */
-function np_scan_stats_cron_url_uniq($last_hour = 0) {
- $db_change = FALSE;
- if (!$last_hour) {
- $last_hour = mktime(date('H') - 1, 0, 0);
- $db_change = TRUE;
- db_set_active('scan');
- }
- $max = db_result(db_query('SELECT COUNT(*) FROM {url_uniq_statistics_aggregated}'));
- $run_length = variable_get('np_scan_stats_runlength_url_unique', 100000);
- $runs = ceil($max / $run_length);
- $time = time();
- $runtimes = array();
- for ($i = 0; $i < $runs; $i++) {
- $start = time();
- // flag the rows that needs to be updated (cant use limit with joined tables in update)
- db_query("
- UPDATE {url_uniq_statistics_aggregated}
- SET timestamp = %d
- WHERE timestamp < %d
- LIMIT %d
- ", $time + $i, $time, $run_length);
- // remove out of interval ones
- foreach (array('day' => 1, 'biday' => 2,) as $timeslice => $multiplier) {
- db_query("
- UPDATE {url_uniq_statistics_aggregated} dst
- INNER JOIN {url_uniq_statistics} src ON dst.scan_id = src.scan_id AND src.start_time = '%s'
- SET dst.velocity_" . $timeslice . " = IF(dst.velocity_" . $timeslice . " > src.general + src.photo + src.video, dst.velocity_" . $timeslice . " - src.general - src.photo - src.video, 0),
- dst.general_" . $timeslice . " = IF(dst.general_" . $timeslice . " > src.general, dst.general_" . $timeslice . " - src.general, 0),
- dst.photo_" . $timeslice . " = IF(dst.photo_" . $timeslice . " > src.photo, dst.photo_" . $timeslice . " - src.photo, 0),
- dst.video_" . $timeslice . " = IF(dst.video_" . $timeslice . " > src.video, dst.video_" . $timeslice . " - src.video, 0)
- WHERE dst.timestamp = %d
- ", date('Y-m-d H:i:s', $last_hour - $multiplier * 86400), $time + $i);
- }
- $runtimes[] = time() - $start;
- }
- watchdog('cron', 'np_scan_stat cron uniq url runtimes: ' . implode(', ', $runtimes) . ' (seconds)');
- if ($db_change) {
- db_set_active();
- }
-}
Index: www/sites/all/modules/custom/np_scan_stats/np_scan_stats.proto.css
===================================================================
--- www/sites/all/modules/custom/np_scan_stats/np_scan_stats.proto.css (revision 34236)
+++ www/sites/all/modules/custom/np_scan_stats/np_scan_stats.proto.css (working copy)
@@ -1,30 +0,0 @@
-#proto-page th.title { width: 99% }
-#proto-page th.word { min-width: 100px; max-width: 100px }
-#proto-page th.location { min-width: 99%; max-width: 100px }
-#proto-page th.url-link { min-width: 200px; max-width: 200px }
-#proto-page th.url-title { min-width: 50% }
-#proto-page th.id { min-width: 75px; max-width: 75px }
-#proto-page th.updated { min-width: 100px; max-width: 100px }
-#proto-page th.created { min-width: 50px; max-width: 50px }
-#proto-page th.trending { min-width: 75px; max-width: 75px }
-#proto-page th.velocity { min-width: 75px; max-width: 75px }
-#proto-page th.prev_velocity { min-width: 75px; max-width: 75px }
-#proto-page th.general { min-width: 75px; max-width: 75px }
-#proto-page th.video { min-width: 75px; max-width: 75px }
-#proto-page th.photo { min-width: 75px; max-width: 75px }
-#proto-page th.category { min-width: 75px; max-width: 75px }
-#proto-page th.count { min-width: 75px; max-width: 75px }
-#proto-page th.source { min-width: 75px; max-width: 75px }
-#proto-page h3 { padding: 10px 0 0 0; text-align: center}
-#proto-page h4 { text-align: center }
-#proto-page hr { padding: 10px 0 10px 0; clear: both }
-#proto-page div#toc { line-height: 100% }
-#proto-page div#toc > div.item-list > ul > li { float: left }
-#proto-page div#option-form { clear: both }
-#proto-page div#option-form form { margin-bottom: 0 }
-#proto-page div#option-form div, #proto-page div#option-form > div, #proto-page div#option-form label { display: inline }
-#proto-page div#help { clear: both; padding: 5px 0 0 0; line-height: 100%; font-size: 90% }
-#proto-page div.scan-list { float: right; clear: both }
-#proto-page div.scan-list-keyword,
-#proto-page div.scan-list-hashtag,
-#proto-page div.scan-list-retweet { float: right; clear: none }
Index: www/sites/all/modules/custom/np_scan_stats/np_scan_stats.info
===================================================================
--- www/sites/all/modules/custom/np_scan_stats/np_scan_stats.info (revision 34236)
+++ www/sites/all/modules/custom/np_scan_stats/np_scan_stats.info (working copy)
@@ -1,6 +0,0 @@
-; $Id: $
-name = Np Scan Stats
-description = Scan Stats Cron / Admin functions
-dependencies[] = scan_api
-package = NP
-core = 6.x
Index: www/sites/all/modules/custom/np_scan_stats/np_scan_stats.proto.inc
===================================================================
--- www/sites/all/modules/custom/np_scan_stats/np_scan_stats.proto.inc (revision 34236)
+++ www/sites/all/modules/custom/np_scan_stats/np_scan_stats.proto.inc (working copy)
@@ -1,985 +0,0 @@
- array());
- if (isset($_GET['limit'])) {
- $options['query']['limit'] = $_GET['limit'];
- }
- if (isset($_GET['fields'])) {
- $options['query']['fields'] = $_GET['fields'];
- }
-
- $current_url = 'admin/reports/scan_stats/';
- $toc = array(
- array(
- 'data' => l('Scans', $current_url . 'scan', array_merge($options, array('fragment' => 'scan'))),
- 'children' => array(
- l('Most active online scans', $current_url . 'scan', array_merge($options, array('fragment' => 'scan_online'))),
- l('Most active offline scans', $current_url . 'scan', array_merge($options, array('fragment' => 'scan_offline'))),
- l('Most active scans by link', $current_url . 'scan', array_merge($options, array('fragment' => 'scan_bylink'))),
- l('Top trending scans', $current_url . 'scan', array_merge($options, array('fragment' => 'scan_trending'))),
- )
- ),
- array(
- 'data' => l('Keywords', $current_url . 'keyword', array_merge($options, array('fragment' => 'keyword'))),
- 'children' => array(
- l('Most mentioned keywords', $current_url . 'keyword', array_merge($options, array('fragment' => 'keyword_velocity'))),
- l('Top trending keywords', $current_url . 'keyword', array_merge($options, array('fragment' => 'keyword_trending'))),
- ),
- ),
- array(
- 'data' => l('Hashtags', $current_url . 'hashtag', array_merge($options, array('fragment' => 'hashtag'))),
- 'children' => array(
- l('Most mentioned hashtags', $current_url . 'hashtag', array_merge($options, array('fragment' => 'hashtag_velocity'))),
- l('Top trending hashtags', $current_url . 'hashtag', array_merge($options, array('fragment' => 'hashtag_trending'))),
- ),
- ),
-/* array(
- 'data' => l('Retweets', $current_url . 'retweet', array_merge($options, array('fragment' => 'retweet'))),
- 'children' => array(
- l('Most mentioned retweets', $current_url . 'retweet', array_merge($options, array('fragment' => 'retweet_velocity'))),
- l('Top trending retweets', $current_url . 'retweet', array_merge($options, array('fragment' => 'retweet_trending'))),
- ),
- ), */
- array(
- 'data' => l('Links', $current_url . 'link', array_merge($options, array('fragment' => 'link'))),
- 'children' => array(
- l('Top links', $current_url . 'link', array_merge($options, array('fragment' => 'link_general'))),
- l('Top photos', $current_url . 'link', array_merge($options, array('fragment' => 'link_photo'))),
- l('Top videos', $current_url . 'link', array_merge($options, array('fragment' => 'link_video'))),
- l('Top links by count', $current_url . 'link', array_merge($options, array('fragment' => 'link_count_general'))),
- l('Top photos by count', $current_url . 'link', array_merge($options, array('fragment' => 'link_count_photo'))),
- l('Top videos by count', $current_url . 'link', array_merge($options, array('fragment' => 'link_count_video'))),
- ),
- ),
- array(
- 'data' => l('Locations', $current_url . 'location', array_merge($options, array('fragment' => 'location'))),
- 'children' => array(
- l('Most used locations', $current_url . 'location', array_merge($options, array('fragment' => 'location_velocity'))),
- l('Top trending locations', $current_url . 'location', array_merge($options, array('fragment' => 'location_trending'))),
- ),
- ),
- );
- $out = '
' . theme('item_list', $toc) . '
';
-
- if (!empty($heading)) {
- $out .= '' . drupal_get_form('np_scan_stats_proto_options_form') . '
';
- }
-
- $help = array();
- $help[] = t('Trending measures current momentum usng the normalized ratio of the current velocity (5 minute increments) to the long term velocity (30 day).');
- $help[] .= t('See Mongo Schema for more help.', array('@wiki-url' => url('https://apps.d2.nowpublic.com/trac/wiki/Scan/MongoSchema', array('absolute' => TRUE))));
- $out .= '' . theme('item_list', $help) . '
';
-
- $query_active_and_online = array('scan_id' => array('$gt' => 0), 'scan.status' => 0, 'scan.active' => 1);
- $query_no_scan = array('scan_id' => 0);
- $query_scan = array('scan_id' => array('$gt' => 0));
-
- if ($heading == 'scan') {
- if (empty($detail)) {
- $out .= '
' . t('Scans') . '
';
- $out .= '' . t('Most active online scans') . '
';
- $out .= _np_scan_stats_scan_top_velocity($query_active_and_online);
- $out .= '' . t('Most active offline scans') . '
';
- $out .= _np_scan_stats_scan_top_velocity(array('scan.status' => 1, 'scan.active' => 1));
- $out .= '' . t('Most active scans by link') . '
';
- $out .= _np_scan_stats_top_unique_links($query_active_and_online);
- $out .= '' . t('Top trending scans') . '
';
- $out .= _np_scan_stats_word_top_sort('scan', array('trending' => -1), $query_scan);
- }
- else {
- $out .= '
';
- $out .= _np_scan_stats_scan_details_header($detail);
- $out .= _np_scan_stats_scan_title($detail, false);
- $out .= _np_scan_stats_scan_details($detail);
- }
- }
- else if ($heading == 'keyword') {
- if (empty($detail)) {
- $out .= '
' . t('Keywords') . '
';
- $out .= '' . t('Most mentioned keywords') . '
';
- $out .= _np_scan_stats_word_top_velocity('keyword', $query_no_scan);
- $out .= '' . t('Top trending keywords') . '
';
- $out .= _np_scan_stats_word_top_sort('keyword', array('trending' => -1), $query_no_scan);
- }
- else {
- $out .= '
';
- $out .= _np_scan_stats_word_details_header($heading, $detail);
- $out .= _np_scan_stats_scan_title($scan_id);
- $out .= '' . t('Keyword: %word', array('%word' => $detail)) . '
';
- $out .= _np_scan_stats_word_details('keyword', $detail, $scan_id);
- }
- }
- else if ($heading == 'hashtag') {
- if (empty($detail)) {
- $out .= '
' . t('Hashtag') . '
';
- $out .= '' . t('Most mentioned hashtags') . '
';
- $out .= _np_scan_stats_word_top_velocity('hashtag', $query_no_scan);
- $out .= '' . t('Top trending hashtags') . '
';
- $out .= _np_scan_stats_word_top_sort('hashtag', array('trending' => -1), $query_no_scan);
- }
- else {
- $out .= '
';
- $out .= _np_scan_stats_word_details_header($heading, $detail);
- $out .= _np_scan_stats_scan_title($scan_id);
- $out .= '' . t('Hashtag: #%word', array('%word' => $detail)) . '
';
- $out .= _np_scan_stats_word_details('hashtag', $detail, $scan_id);
- }
- }
- else if ($heading == 'retweet') {
-/* if (empty($detail)) {
- $out .= '
' . t('Retweet') . '
';
- $out .= '' . t('Most mentioned retweets') . '
';
- $out .= _np_scan_stats_word_top_velocity('retweet', $query_no_scan);
- $out .= '' . t('Top trending retweets') . '
';
- $out .= _np_scan_stats_word_top_sort('retweet', array('trending' => -1), $query_no_scan);
- }
- else { */
- $out .= '
';
- $out .= _np_scan_stats_word_details_header($heading, $detail);
- $out .= _np_scan_stats_scan_title($scan_id);
- $out .= '' . t('Retweet: %word', array('%word' => $detail)) . '
';
- $out .= _np_scan_stats_word_details('retweet', $detail, $scan_id);
-// }
- }
- else if ($heading == 'location') {
- if (empty($detail)) {
- $out .= '
' . t('Locations') . '
';
- $out .= '' . t('Most used locations') . '
';
- $out .= _np_scan_stats_word_top_velocity('location', $query_no_scan);
- $out .= '' . t('Top trending locations') . '
';
- $out .= _np_scan_stats_word_top_sort('location', array('trending' => -1), $query_no_scan);
- }
- else {
- $name = db_result(db_query("SELECT name FROM {geonames} WHERE geonameid = %d", $detail));
- $out .= '
';
- $out .= _np_scan_stats_word_details_header($heading, intval($detail));
- $out .= _np_scan_stats_scan_title($scan_id);
- $out .= '' . t('Location: %location_id: %title', array('%location_id' => $detail, '%title' => $name)) . '
';
- $out .= _np_scan_stats_location_details($detail, $scan_id);
- }
- }
- else if ($heading == 'link') {
- $out .= '
' . t('Links') . '
';
-
- $categories = array(
- "general" => 1,
- "photo" => 2,
- "video" => 3,
- );
- foreach ($categories as $category_name => $category_id) {
- $out .= '' . t('Top %category links by velocity', array('%category' => $category_name)) . '
';
- $out .= _np_scan_stats_top_links(array('scan_id' => 0, 'category' => $category_id), array('hours.velocity' => -1));
- $out .= '' . t('Top %category links by count', array('%category' => $category_name)) . '
';
- $out .= _np_scan_stats_top_links(array('scan_id' => 0, 'category' => $category_id), array('count' => -1));
- }
- }
- else if ($heading == 'url' && !empty($detail)) {
- $title = db_result(db_query("SELECT title FROM {urls} WHERE id = %d", $detail));
- $out .= '
';
-// $out .= _np_scan_stats_word_details_header($heading, intval($detail));
- $out .= _np_scan_stats_scan_title($scan_id);
- $out .= '' . t('Url: %url_id: %title', array('%url_id' => $detail, '%title' => $title)) . '
';
- $out .= _np_scan_stats_url_details($detail, $scan_id);
- }
-
- db_set_active();
- return '' . $out . '
';
-}
-
-function _np_scan_stats_scan_title($scan_id, $link = true) {
- if ($scan_id) {
- $title = db_result(db_query("SELECT title FROM {scan} INNER JOIN {node_revisions} using(vid) WHERE scan_id = %d", $scan_id));
- $out = t('Scan: %scan_id: %title', array('%scan_id' => $scan_id, '%title' => $title));
- return '' . ($link ? l($out, 'admin/reports/scan_stats/scan/' . $scan_id, array('html' => 1)) : $out) . '
';
- }
-}
-
-function _np_scan_stats_scan_top_velocity($query = array()) {
- $output = array();
- if (!($cursor = scan_api_get_mongo('scan_stats', 'scan'))) {
- _np_scan_stats_mongo_error();
- }
- else {
- $buckets = array(
- 'hour' => array('bucket' => 'minutes', 'velocity' => 'velocity', 'prev_velocity' => 1),
- 'day' => array('bucket' => 'hours', 'velocity' => 'velocity'),
- 'biday' => array('bucket' => 'hours', 'velocity' => 'velocity48', 'prev_velocity' => 1),
- 'week' => array('bucket' => 'days', 'velocity' => 'velocity7'),
- 'month' => array('bucket' => 'days', 'velocity' => 'velocity', 'prev_velocity' => 1),
- );
- $caption = array();
- foreach ($buckets as $timeslice => $mongo) {
- $output[$timeslice] = array();
- $bucket = $mongo['bucket'];
- $velocity_field = $bucket . '.' . $mongo['velocity'];
- $fields = array('scan_id' => 1, $velocity_field => 1, 'trending' => 1, 'updated' => 1, 'created' => 1);
- if (isset($mongo['prev_velocity'])) {
- $fields[$bucket . '.prev_velocity'] = 1;
- }
- $bucket_query = $query;
- $bucket_query[$velocity_field] = array('$gt' => 0);
- $hint = array($velocity_field => -1);
- $sort = array($velocity_field => -1);
- $limit = isset($_GET['limit']) ? $_GET['limit'] : LIMIT;
- $caption[$timeslice] = theme('mongo_query', 'scan', $bucket_query, $fields, $sort, $limit);
- try {
- $results = $cursor->find($bucket_query, $fields)
- ->sort($sort)
- ->limit($limit)
- ->hint($hint)
- ->timeout(scan_api_get_mongo_timeout())
- ;
- foreach ($results as $row) {
- $title = db_result(db_query("SELECT title FROM {scan} INNER JOIN {node_revisions} using(vid) WHERE scan_id = %d", $row['scan_id']));
- $output[$timeslice][] = array(
- l($row['scan_id'], 'admin/reports/scan_stats/scan/'. $row['scan_id']),
- theme('mongo_short_datetime', $row['created']),
- theme('mongo_datetime', $row['updated']),
- $title ? l($title, 'admin/reports/scan_stats/scan/' . $row['scan_id']) : '-',
- isset($row['trending']) ? round($row['trending'], STATS_TRENDING_PRECISION) : '-',
- round($row[$bucket][$mongo['velocity']], STATS_VELOCITY_PRECISION),
- isset($mongo['prev_velocity']) ? round($row[$bucket]['prev_velocity'], STATS_VELOCITY_PRECISION) : '-',
- );
- }
- }
- catch (MongoCursorTimeoutException $e) {
- _np_scan_stats_mongo_timeout_error();
- }
- }
- }
-
- $headers = array(
- array('data' => t('id'), 'class' => 'id'),
- array('data' => t('Created'), 'class' => 'created'),
- array('data' => t('Updated'), 'class' => 'updated'),
- array('data' => t('Title'), 'class' => 'title'),
- array('data' => t('Trending'), 'class' => 'trending'),
- array('data' => t('Velocity'), 'class' => 'velocity'),
- array('data' => t('Prev'), 'class' => 'prev_velocity'),
- );
-
- $out = '';
- $out .= '' . t('Last 60 Minutes') . '
';
- $out .= theme('table', $headers, $output['hour'], array(), $caption['hour']);
-
- $out .= '' . t('Last 24 Hours') . '
';
- $out .= theme('table', $headers, $output['day'], array(), $caption['day']);
-
- $out .= '' . t('Last 48 Hours') . '
';
- $out .= theme('table', $headers, $output['biday'], array(), $caption['biday']);
-
- $out .= '' . t('Last 7 Days') . '
';
- $out .= theme('table', $headers, $output['week'], array(), $caption['week']);
-
- $out .= '' . t('Last 30 Days') . '
';
- $out .= theme('table', $headers, $output['month'], array(), $caption['month']);
-
- return $out;
-}
-
-function _np_scan_stats_top_unique_links($query = array()) {
- if (!($cursor = scan_api_get_mongo('scan_stats', 'scanurl'))) {
- _np_scan_stats_mongo_error();
- }
- else {
- // most active scans by number of unique links
- $buckets = array(
- 'day' => array('bucket' => 'hours', 'velocity' => 'velocity24'),
- 'biday' => array('bucket' => 'hours', 'velocity' => 'velocity'),
- 'week' => array('bucket' => 'days', 'velocity' => 'velocity7'),
- 'month' => array('bucket' => 'days', 'velocity' => 'velocity'),
- );
- $output = array();
- $caption = array();
- foreach ($buckets as $timeslice => $mongo) {
- $output[$timeslice] = array();
- $bucket = $mongo['bucket'];
- $velocity_field = $bucket . '.' . $mongo['velocity'];
- $fields = array(
- 'scan_id' => 1,
- $velocity_field => 1,
- 'updated' => 1,
- 'created' => 1,
- $bucket . '.general.' . $mongo['velocity'] => 1,
- $bucket . '.video.' . $mongo['velocity'] => 1,
- $bucket . '.photo.' . $mongo['velocity'] => 1,
- );
- $bucket_query = $query;
- $bucket_query[$velocity_field] = array('$gt' => 0);
- $sort = array($velocity_field => -1);
- $hint = array('scan_id' => 1, $velocity_field => -1);
- $limit = isset($_GET['limit']) ? $_GET['limit'] : LIMIT;
- $caption[$timeslice] = theme('mongo_query', 'scanurl', $bucket_query, $fields, $sort, $limit);
- try {
- $results = $cursor
- ->find($bucket_query, $fields)
- ->sort($sort)
- ->limit($limit)
- ->hint($hint)
- ->timeout(scan_api_get_mongo_timeout())
- ;
- foreach ($results as $row) {
- $title = db_result(db_query("SELECT title FROM {scan} INNER JOIN {node_revisions} using(vid) WHERE scan_id = %d", $row['scan_id']));
- $output[$timeslice][] = array(
- l($row['scan_id'], 'admin/reports/scan_stats/scan/' . $row['scan_id']),
- theme('mongo_short_datetime', $row['created']),
- theme('mongo_datetime', $row['updated']),
- $title ? l($title, 'admin/reports/scan_stats/scan/' . $row['scan_id']) : '-',
- round($row[$bucket][$mongo['velocity']], STATS_VELOCITY_PRECISION),
- isset($row[$bucket]['general'][$mongo['velocity']]) ? round($row[$bucket]['general'][$mongo['velocity']], STATS_VELOCITY_PRECISION) : '-',
- isset($row[$bucket]['video'][$mongo['velocity']]) ? round($row[$bucket]['video'][$mongo['velocity']], STATS_VELOCITY_PRECISION) : '-',
- isset($row[$bucket]['photo'][$mongo['velocity']]) ? round($row[$bucket]['photo'][$mongo['velocity']], STATS_VELOCITY_PRECISION) : '-',
- );
- }
- }
- catch (MongoCursorTimeoutException $e) {
- _np_scan_stats_mongo_timeout_error();
- return;
- }
- }
- }
-
- $headers = array(
- array('data' => t('id'), 'class' => 'id'),
- array('data' => t('Created'), 'class' => 'created'),
- array('data' => t('Updated'), 'class' => 'updated'),
- array('data' => t('Title'), 'class' => 'title'),
- array('data' => t('Velocity'), 'class' => 'velocity'),
- array('data' => t('General'), 'class' => 'general'),
- array('data' => t('Video'), 'class' => 'video'),
- array('data' => t('Photo'), 'class' => 'photo'),
- );
-
- $out = '';
- $out .= '' . t('Last 24 Hours') . '
';
- $out .= theme('table', $headers, $output['day'], array(), $caption['day']);
-
- $out .= '' . t('Last 48 Hours') . '
';
- $out .= theme('table', $headers, $output['biday'], array(), $caption['biday']);
-
- $out .= '' . t('Last 7 Days') . '
';
- $out .= theme('table', $headers, $output['week'], array(), $caption['week']);
-
- $out .= '' . t('Last 30 Days') . '
';
- $out .= theme('table', $headers, $output['month'], array(), $caption['month']);
-
- return $out;
-}
-
-function _np_scan_stats_top_links($query = array(), $sort = array()) {
- $output = array();
- if (!($cursor = scan_api_get_mongo('scan_stats', 'url'))) {
- _np_scan_stats_mongo_error();
- $caption = theme('mongo_query', 'url', array(), array(), array(), LIMIT);
- }
- else {
- $fields = array(
- 'category' => 1,
- 'url_id' => 1,
- 'hours.velocity' => 1,
- 'count' => 1,
- 'updated' => 1,
- 'created' => 1,
- );
- if (isset($sort['hours.velocity'])) {
- $query['hours.velocity'] = array('$gt' => 0);
- $hint = array('scan_id' => 1, 'category' => 1, 'hours.velocity' => -1);
- }
- else {
- $hint = array('scan_id' => 1, 'category' => 1, 'count' => -1);
- }
- // Add category to query so we can use the index
- if (!isset($query['category'])) {
- $query['category'] = array('$in' => array(1, 2, 3));
- }
- $limit = isset($_GET['limit']) ? $_GET['limit'] : LIMIT;
- try {
- $results = $cursor->find($query, $fields)
- ->sort($sort)
- ->limit($limit)
- ->hint($hint)
- ->timeout(scan_api_get_mongo_timeout())
- ;
- $caption = theme('mongo_query', 'url', $query, $fields, $sort, $limit);
- foreach ($results as $row) {
- db_set_active('misc');
- $url = db_fetch_array(db_query("
- SELECT u.title, u.resolved,
- si.thumb_path,
- se.domain, se.video_id
- FROM {urls} u
- LEFT JOIN {scan_images} si ON si.url_id = u.id
- LEFT JOIN {scan_embeds} se ON se.url_id = u.id
- WHERE u.id = %d
- ", $row['url_id']));
- $title = empty($url['title']) ? '-' : $url['title'];
- if ($row['category'] == 2 && !empty($url['thumb_path'])) {
- $path = basename($url['thumb_path']);
- $full_path = 'http://media.scan.nowpublic.com/'. substr($path, 0, 1) . '/' . substr($path, 1, 1) . substr($path, 2, 1) . '/'. $path;
- $title = theme('image', $full_path, $title, $title, NULL, FALSE);
- }
- elseif ($row['category'] == 3 && !empty($url['video_id'])) {
- $title = _api_embed_code($url['domain'], $url['video_id'], 280, 200);
- }
- if (!empty($url['resolved'])) {
- $url['host'] = parse_url($url['resolved'], PHP_URL_HOST);
- }
- db_set_active();
- $output[] = array(
- l($row['url_id'], 'admin/reports/scan_stats/url/' . $row['url_id']),
- theme('mongo_short_datetime', $row['created']),
- theme('mongo_datetime', $row['updated']),
- $title,
- empty($url['resolved']) ? '-' : l($url['host'], $url['resolved']),
- round($row['hours']['velocity'], STATS_VELOCITY_PRECISION),
- $row['count'],
- );
- }
- }
- catch (MongoCursorTimeoutException $e) {
- _np_scan_stats_mongo_timeout_error();
- }
- }
-
- $headers = array(
- array('data' => t('Id'), 'class' => 'id'),
- array('data' => t('Created'), 'class' => 'created'),
- array('data' => t('Updated'), 'class' => 'updated'),
- array('data' => t('Title'), 'class' => 'url-title'),
- array('data' => t('Link'), 'class' => 'url-link'),
- array('data' => t('Velocity'), 'class' => 'velocity'),
- array('data' => t('Count'), 'class' => 'count'),
- );
-
- return theme('table', $headers, $output, array(), $caption);
-}
-
-function _np_scan_stats_scan_details($scan_id) {
- if ($cursor = scan_api_get_mongo('scan_stats', 'scan')) {
- $query = array('scan_id' => intval($scan_id));
- try {
- $results = $cursor
- ->find($query)
- ->timeout(scan_api_get_mongo_timeout())
- ;
- return theme('mongo_scan_details', _np_scan_stats_get_record('scan', $results));
- }
- catch (MongoCursorTimeoutException $e) {
- _np_scan_stats_mongo_timeout_error();
- }
- }
- else {
- _np_scan_stats_mongo_error();
- }
-}
-
-function _np_scan_stats_word_details_header($collection, $value) {
- if (!($cursor = scan_api_get_mongo('scan_stats', $collection))) { // keyword, hashtag, keyword, location, (url -- disabled)
- _np_scan_stats_mongo_error();
- return;
- }
- $collections = array(
- 'keyword' => 'word',
- 'hashtag' => 'word',
- 'retweet' => 'word',
- 'location' => 'location_id',
- 'url' => 'url_id',
- );
- $query = array($collections[$collection] => $value, 'scan_id' => array('$gt' => 0));
- $hint = array('scan_id' => 1, $collections[$collection] => 1, 'hours.velocity' => -1);
- $limit = isset($_GET['limit']) ? $_GET['limit'] : LIMIT;
- try {
- $results = $cursor
- ->find($query, array('scan_id' => 1, 'hours.velocity' => 1))
- ->sort(array('hours.velocity' => -1))
- ->hint($hint)
- ->limit($limit)
- ->timeout(scan_api_get_mongo_timeout())
- ;
- $rows = array();
- foreach ($results as $row) {
- $title = db_result(db_query("SELECT title FROM {scan} INNER JOIN {node_revisions} using(vid) WHERE scan_id = %d", $row['scan_id']));
- if (strlen($title) > 40) {
- $title = substr($title, 0, 40) . "...";
- }
- $rows[] = round($row['hours']['velocity'], STATS_VELOCITY_PRECISION) . ': ' . l($title ? $title : $row['scan_id'], 'admin/reports/scan_stats/scan/' . $row['scan_id']);
- }
- }
- catch (MongoCursorTimeoutException $e) {
- _np_scan_stats_mongo_timeout_error();
- }
- if ($rows) {
- return '' . theme('item_list', $rows, 'scans', 'ol') . '
';
- }
-}
-
-function _np_scan_stats_scan_details_header($scan_id) {
- $query = array('scan_id' => intval($scan_id));
- $collections = array(
- 'keyword' => 'word',
- 'hashtag' => 'word',
- 'retweet' => 'word',
- 'location' => 'location_id',
- 'url' => 'url_id',
- );
- $toc = array();
- $limit = isset($_GET['limit']) ? $_GET['limit'] : LIMIT;
- foreach ($collections as $collection => $field) {
- if ($cursor = scan_api_get_mongo('scan_stats', $collection)) { // keyword, hashtag, retweet, location, url
- try {
- $results = $cursor
- ->find($query, array($field => 1, 'hours.velocity' => 1))
- ->sort(array('hours.velocity' => -1))
- ->limit($limit)
- ->timeout(scan_api_get_mongo_timeout())
- ;
- $rows = array();
- foreach ($results as $row) {
- $title = $row[$field];
- if ($collection == 'location') {
- if ($name = db_result(db_query("SELECT name FROM {geonames} WHERE geonameid = %d", $row[$field]))) {
- $title = $name . ' (' . $row[$field] . ')';
- }
- }
- else if ($collection == 'url') {
- if ($title = db_result(db_query("SELECT title FROM {urls} WHERE id = %d", $row[$field]))) {
- $title = preg_replace('/^YouTube\s+-\s+/', '', $title);
- if (strlen($title) > 40) {
- $title = substr($title, 0, 40) . "...";
- }
- $title .= ' (' . $row[$field] . ')';
- }
- }
- $rows[] = round($row['hours']['velocity'], STATS_VELOCITY_PRECISION) . ': ' . l($title, 'admin/reports/scan_stats/'. $collection . '/' . $row[$field] . '/' . $scan_id);
- }
- }
- catch (MongoCursorTimeoutException $e) {
- _np_scan_stats_mongo_timeout_error();
- }
- }
- else {
- _np_scan_stats_mongo_error();
- }
- if ($rows) {
- return '' . theme('item_list', $rows, $collection . 's', 'ol') . '
';
- }
- }
-}
-
-function _np_scan_stats_word_top_velocity($collection, $query = array()) {
- $output = array('hour' => array(), 'day' => array(), 'month' => array());
- $key = $collection == 'location' ? 'location_id' : 'word';
-
- if (!($cursor = scan_api_get_mongo('scan_stats', $collection))) { // keyword, hashtag, (retweet -- disabled), location
- _np_scan_stats_mongo_error();
- $caption_title = theme('mongo_query', $collection, array(), array(), array(), LIMIT);
- $caption = array('hour' => $caption_title, 'day' => $caption_title, 'month' => $caption_title);
- }
- else {
- $buckets = array(
- 'hour' => array('bucket' => 'minutes', 'velocity' => 'velocity'),
- 'day' => array('bucket' => 'hours', 'velocity' => 'velocity'),
- 'month' => array('bucket' => 'days', 'velocity' => 'velocity'),
- );
- $limit = isset($_GET['limit']) ? $_GET['limit'] : LIMIT;
- foreach ($buckets as $timeslice => $mongo) {
- $bucket = $mongo['bucket'];
- $velocity_field = $bucket . '.' . $mongo['velocity'];
- $bucket_query = $query;
- $bucket_query[$velocity_field] = array('$gt' => 0);
- $hint = array('scan_id' => 1, $velocity_field => -1);
- $sort = array($velocity_field => -1);
- $fields = array($velocity_field => 1, 'trending' => 1, 'created' => 1, 'updated' => 1, $bucket . '.prev_velocity', $key => 1);
- $caption[$timeslice] = theme('mongo_query', $collection, $bucket_query, $fields, $sort, $limit);
- try {
- $results = $cursor
- ->find($bucket_query, $fields)
- ->sort($sort)
- ->limit($limit)
- ->hint($hint)
- ->timeout(scan_api_get_mongo_timeout())
- ;
- foreach ($results as $row) {
- if ($collection == 'location') {
- $name = db_result(db_query("SELECT name FROM {geonames} WHERE geonameid = %d", $row[$key]));
- $key_link = l($row[$key], 'admin/reports/scan_stats/location/' . $row[$key]);
- $key_value = empty($name) ? $key_link : l($name, 'admin/reports/scan_stats/location/' . $row[$key]) . ' (' . $key_link . ')';
- }
- else {
- $key_value = theme('twitter_word', $collection, $row[$key]);
- }
- $output_row = array(
- theme('mongo_short_datetime', $row['created']),
- theme('mongo_datetime', $row['updated']),
- $key_value,
- isset($row['trending']) ? round($row['trending'], STATS_TRENDING_PRECISION) : '-',
- round($row[$bucket][$mongo['velocity']], STATS_VELOCITY_PRECISION),
- isset($mongo['prev_velocity']) ? round($row[$bucket]['prev_velocity'], STATS_VELOCITY_PRECISION) : '-',
- );
- if ($collection != 'location') {
- $whitelist = db_result(db_query("SELECT IFNULL(type, -1) as whitelist FROM {keyword_whitelist} WHERE word = '%s'", $row['word']));
- $output_row[] = $whitelist == -1 ? "NER" : (($whitelist == 5) ? "whitelist-user" : "whitelist");
- }
- $output[$timeslice][] = $output_row;
- }
- }
- catch (MongoCursorTimeoutException $e) {
- _np_scan_stats_mongo_timeout_error();
- }
- }
- }
-
- $headers = array(
- array('data' => t('Created'), 'class' => 'created'),
- array('data' => t('Updated'), 'class' => 'updated'),
- array('data' => $collection == 'location' ? t('Location') : t('Word'), 'class' => $collection == 'location' ? 'location' : 'word'),
- array('data' => t('Trending'), 'class' => 'trending'),
- array('data' => t('Velocity'), 'class' => 'velocity'),
- array('data' => t('Prev'), 'class' => 'prev_velocity'),
- );
- if ($collection != 'location') {
- $headers[] = array('data' => t('Source'), 'class' => 'source');
- }
-
- $out = '';
- $out .= '' . t('Last 60 Minutes') . '
';
- $out .= theme('table', $headers, $output['hour'], array(), $caption['hour']);
-
- $out .= '' . t('Last 24 Hours') . '
';
- $out .= theme('table', $headers, $output['day'], array(), $caption['day']);
-
- $out .= '' . t('Last 30 Days') . '
';
- $out .= theme('table', $headers, $output['month'], array(), $caption['month']);
-
- return $out;
-}
-
-function _np_scan_stats_word_top_sort($collection, $sort, $query = array()) {
- $output = array();
- $keys = array('location' => 'location_id', 'scan' => 'scan_id');
- $key = isset($keys[$collection]) ? $keys[$collection] : 'word';
- $sort_keys = array_keys($sort);
- $sort_key = $sort_keys[0];
-
- if (!($cursor = scan_api_get_mongo('scan_stats', $collection))) { // scan, keyword, hashtag, (retweet -- disabled), location
- _np_scan_stats_mongo_error();
- $caption = theme('mongo_query', $collection, array(), array(), array(), LIMIT);
- }
- else {
- $fields = array('trending' => 1, 'created' => 1, 'updated' => 1, 'minutes.velocity' => 1, 'minutes.prev_velocity' => 1, 'hours.velocity' => 1, 'hours.prev_velocity' => 1, 'days.velocity' => 1, 'days.prev_velocity' => 1, $key => 1);
- $hint = ($collection == 'scan') ? array() : array('scan_id' => 1);
- $hint = array_merge($hint, $sort);
- $limit = isset($_GET['limit']) ? $_GET['limit'] : LIMIT;
- $caption = theme('mongo_query', $collection, $query, $fields, $sort, $limit);
- try {
- $results = $cursor
- ->find($query, $fields)
- ->sort($sort)
- ->limit($limit)
- ->hint($hint)
- ->timeout(scan_api_get_mongo_timeout())
- ;
- foreach ($results as $row) {
- if ($collection == 'location') {
- $name = db_result(db_query("SELECT name FROM {geonames} WHERE geonameid = %d", $row[$key]));
- $path = 'admin/reports/scan_stats/location/' . $row[$key];
- $key_path = l($row[$key], $path);
- $key_value = empty($name) ? $key_path : l($name, $path) . ' (' . $key_path . ')';
- }
- else if ($collection == 'scan') {
- $title = db_result(db_query("SELECT title FROM {scan} INNER JOIN {node_revisions} using(vid) WHERE scan_id = %d", $row[$key]));
- $key_value = l($title, 'admin/reports/scan_stats/scan/' . $row[$key]);
- }
- else {
- $key_value = theme('twitter_word', $collection, $row[$key]);
- }
- $output_row = array(
- theme('mongo_short_datetime', $row['created']),
- theme('mongo_datetime', $row['updated']),
- $key_value,
- isset($row['trending']) ? round($row['trending'], STATS_TRENDING_PRECISION) : '-',
- isset($row['minutes']['velocity']) ? round($row['minutes']['velocity'], STATS_VELOCITY_PRECISION) : '-',
- isset($row['minutes']['prev_velocity']) ? round($row['minutes']['prev_velocity'], STATS_VELOCITY_PRECISION) : '-',
- isset($row['hours']['velocity']) ? round($row['hours']['velocity'], STATS_VELOCITY_PRECISION) : '-',
- isset($row['hours']['prev_velocity']) ? round($row['hours']['prev_velocity'], STATS_VELOCITY_PRECISION) : '-',
- isset($row['days']['velocity']) ? round($row['days']['velocity'], STATS_VELOCITY_PRECISION) : '-',
- isset($row['days']['prev_velocity']) ? round($row['days']['prev_velocity'], STATS_VELOCITY_PRECISION) : '-',
- );
- if ($key == 'word') {
- $whitelist = db_result(db_query("SELECT IFNULL(type, -1) as whitelist FROM {keyword_whitelist} WHERE word = '%s'", $row['word']));
- $output_row[] = $whitelist == -1 ? "NER" : (($whitelist == 5) ? "whitelist-user" : "whitelist");
- }
- if (floatval($row[$sort_key]) > 0) {
- $output[] = $output_row;
- }
- }
- }
- catch (MongoCursorTimeoutException $e) {
- }
- }
-
- $headers = array(
- array('data' => t('Created'), 'class' => 'created'),
- array('data' => t('Updated'), 'class' => 'updated'),
- array('data' => $collection == 'location' ? t('Location') : t('Word'), 'class' => $collection == 'location' ? 'location' : 'word'),
- array('data' => t('Trending'), 'class' => 'trending'),
- array('data' => t('Velocity hr'), 'class' => 'velocity'),
- array('data' => t('Prev hr'), 'class' => 'prev_velocity'),
- array('data' => t('Velocity day'), 'class' => 'velocity'),
- array('data' => t('Prev day'), 'class' => 'prev_velocity'),
- array('data' => t('Velocity mon'), 'class' => 'velocity'),
- array('data' => t('Prev mon'), 'class' => 'prev_velocity'),
- );
- if ($key == 'word') {
- $headers[] = array('data' => t('Source'), 'class' => 'source');
- }
-
- return theme('table', $headers, $output, array(), $caption);
-}
-
-function _np_scan_stats_word_details($collection, $word, $scan_id) {
- if ($cursor = scan_api_get_mongo('scan_stats', $collection)) { // keyword, hashtag, retweet
- $query = array('scan_id' => empty($scan_id) ? 0 : intval($scan_id), 'word' => $word);
- try {
- $results = $cursor
- ->find($query)
- ->timeout(scan_api_get_mongo_timeout())
- ;
- return theme('mongo_word_details', $collection, _np_scan_stats_get_record($collection, $results));
- }
- catch (MongoCursorTimeoutException $e) {
- _np_scan_stats_mongo_timeout_error();
- }
- }
- else {
- _np_scan_stats_mongo_error();
- }
-}
-
-function _np_scan_stats_location_details($location_id, $scan_id) {
- if ($cursor = scan_api_get_mongo('scan_stats', 'location')) {
- $query = array('scan_id' => empty($scan_id) ? 0 : intval($scan_id), 'location_id' => intval($location_id));
- try {
- $results = $cursor
- ->find($query)
- ->timeout(scan_api_get_mongo_timeout())
- ;
- return theme('mongo_location_details', _np_scan_stats_get_record('location', $results));
- }
- catch (MongoCursorTimeoutException $e) {
- _np_scan_stats_mongo_timeout_error();
- }
- }
- else {
- _np_scan_stats_mongo_error();
- }
-}
-
-function _np_scan_stats_url_details($url_id, $scan_id) {
- if ($cursor = scan_api_get_mongo('scan_stats', 'url')) {
- $query = array('scan_id' => empty($scan_id) ? 0 : intval($scan_id), 'url_id' => intval($url_id));
- $hint = array('scan_id' => 1, 'url_id' => 1, 'hours.velocity' => -1);
- try {
- $results = $cursor
- ->find($query)
- ->hint($hint)
- ->timeout(scan_api_get_mongo_timeout())
- ;
- return theme('mongo_url_details', _np_scan_stats_get_record('url', $results));
- }
- catch (MongoCursorTimeoutException $e) {
- _np_scan_stats_mongo_timeout_error();
- }
- }
- else {
- _np_scan_stats_mongo_error();
- }
-}
-
-function _np_scan_stats_get_record($collection, $cursor) {
- $record = $cursor->getNext();
- if (!$record) {
- return array('error' => t('can not read collection'));
- }
- $updated = $record['updated']->sec;
- foreach (array('minutes', 'hours', 'days') as $type) {
- if (isset($record[$type])) {
- $index = scan_api_bucket_index($collection, $type, $updated);
- if (isset($record[$type][$index])) {
- $record[$type][$index] .= ' (*' . t('updated') . ')';
- }
- else {
-// drupal_set_message(t('current %type index of %index missing', array('%type' => $type, '%index' => $index)));
- }
- // Validate that the velocities.
- $size = scan_api_interval_count($collection, $type);
- $velocity = 0;
- foreach ($record[$type] as $key => $value) {
- if (is_numeric($key)) {
- $velocity += $value;
- }
- }
- $field = 'velocity';
- if ($collection == 'scan' && $type == 'hours') {
- $field .= $size;
- }
- if (intval($record[$type][$field] * $size + 0.0001 /*account for rounding error*/) != $velocity) {
- drupal_set_message(t('%type %field is %old, but should be %new', array('%type' => $type, '%field' => $field, '%old' => round($record[$type]['velocity'], 4), '%new' => round($velocity / $size, 4))), 'error');
- }
- }
- }
- foreach (array('updated', 'created') as $field) {
- if (isset($record[$field])) {
- $dt = $record[$field];
- $record[$field] = (array) $dt;
- $record[$field]['sec'] .= ' (*' . theme('mongo_datetime', $dt) . ')';
- }
- }
- if (isset($record['scan'])) {
- foreach (array('client_id', 'active', 'status') as $key) {
- if (!isset($record['scan'][$key])) {
- drupal_set_message(t('scan.%key missing', array('%key' => $key)), 'error');
- }
- }
- }
- return $record;
-}
-
-function np_scan_stats_proto_options_form($form_state) {
- $form = array();
- $form['limit'] = array(
- '#type' => 'select',
- '#title' => t('Limit'),
- '#default_value' => isset($_GET['limit']) ? $_GET['limit'] : LIMIT,
- '#options' => drupal_map_assoc(array(5, 10, 15, 20, 30, 40, 50, 60, 100, 200, 300, 400, 500)),
- );
- $form['fields'] = array(
- '#type' => 'checkbox',
- '#title' => t('Show Fields in JSON'),
- '#default_value' => isset($_GET['fields']) ? $_GET['fields'] : 0,
- );
- $form['submit'] = array(
- '#type' => 'submit',
- '#value' => t('Change'),
- );
- return $form;
-}
-
-function np_scan_stats_proto_options_form_submit($form, &$form_state) {
- $options = array();
- if ($form_state['values']['limit'] != LIMIT) {
- $options['limit'] = $form_state['values']['limit'];
- }
- if ($form_state['values']['fields'] == 1) {
- $options['fields'] = $form_state['values']['fields'];
- }
- if (count($options)) {
- drupal_goto($_GET['q'], $options);
- }
-}
-
-function theme_mongo_query($collection, $query, $fields, $sort, $limit) {
- $out = 'mongo> db.' . $collection;
- $out .= '.find(' . theme('mongo_json', $query);
- if (count($fields) > 0 && isset($_GET['fields'])) {
- $out .= ',' . theme('mongo_json', $fields);
- }
- $out .= ')';
- if (count($sort) > 0) {
- $out .= '.sort(' . theme('mongo_json', $sort) . ')';
- }
- if ($limit) {
- $out .= '.limit(' . $limit . ')';
- }
- $out .= ".timeout(" . scan_api_get_mongo_timeout() . ")";
- return $out;
-}
-
-function theme_mongo_json($json) {
- return str_replace(array(',', ':'), array(', ', ': '), json_encode($json));
-}
-
-function theme_mongo_short_datetime($dt) {
- return date(SHORT_FORMAT, $dt->sec);
-}
-
-function theme_mongo_datetime($dt) {
- if (empty($dt)) {
- return '-';
- }
- static $now;
- if (!isset($now)) {
- $now = time();
- }
- if ($now <= $dt->sec + 60) {
- return t('last minute');
- }
- $ago = $now - $dt->sec;
- if ($ago <= 3600) {
- return t('%mins minutes', array('%mins' => round($ago / 60, $ago <= 300 ? 1 : 0)));
- }
- return date(MEDIUM_FORMAT, $dt->sec);
-}
-
-// @TODO: Here's a quick implementation for writing detail records,
-// but would be nice to see some real theming.
-
-function theme_mongo_scan_details($record) {
- return '' . _mongo_record_to_string($record) . '
';
-}
-
-function theme_mongo_word_details($collection, $record) {
- $output = '';
- if (isset($record['word'])) {
- $output .= theme('twitter_word', $collection, $record['word']);
- }
- $output .= '' . _mongo_record_to_string($record) . '
';
- return $output;
-}
-
-function theme_mongo_location_details($record) {
- return '' . _mongo_record_to_string($record) . '
';
-}
-
-function theme_mongo_url_details($record) {
- return '' . _mongo_record_to_string($record) . '
';
-}
-
-function _np_scan_stats_mongo_timeout_error() {
- static $once;
- if (!isset($once)) {
- drupal_set_message(t('Mongo timed out, use "mongo> db.currentOp()" to find the long running query'), 'error');
- $once = TRUE;
- }
-}
-
-function _np_scan_stats_mongo_error() {
- static $once;
- if (!isset($once)) {
- drupal_set_message(t('Mongo server is probably down.'), 'error');
- $once = TRUE;
- }
-}
-
-function theme_twitter_word($collection, $word) {
- $output = '';
- if ($collection == 'keyword' || $collection == 'hashtag') {
- $output .= l('T', 'http://twitter.com/#search?q="' . $word . '"') . ' ';
- }
- $pre_word = $collection == 'hashtag' ? '#' : '';
- $output .= l($pre_word . $word, 'admin/reports/scan_stats/' . $collection . '/' . $word);
- return $output;
-}
-
-function _mongo_record_to_string($record) {
- return print_r($record, 1);
-}
Index: www/sites/all/modules/custom/np_potpourri/np_potpourri.pages.inc
===================================================================
--- www/sites/all/modules/custom/np_potpourri/np_potpourri.pages.inc (revision 34236)
+++ www/sites/all/modules/custom/np_potpourri/np_potpourri.pages.inc (working copy)
@@ -247,8 +247,17 @@
$placeholders = array_fill(0, count($add), "'%s'");
// $add comes from a form submission. data size assumed to always be sane ( < 1000) so we are not splitting this into chunked operation.
- scan_api_get_mongo('statistics', 'keyword')->remove(array('word' => array('$in' => $add)));
- scan_api_get_mongo('statistics', 'hashtag')->remove(array('word' => array('$in' => $add)));
+ $query = array('word' => array('$in' => $add));
+ try {
+ if ($cursor = scan_api_get_mongo('keyword')) { // V2r15 / keyword / word[] / none (remove query)
+ $cursor->remove($query);
+ }
+ if ($cursor = scan_api_get_mongo('hashtag')) { // V2r15 / hashtag / word[] / none (remove query)
+ $cursor->remove($query);
+ }
+ }
+ catch (MongoCursorTimeoutException $e) {
+ }
}
if (!empty($remove)) {
// remove from the blacklist
Index: www/sites/all/modules/custom/np_views_sharding/np_views_sharding_query.inc
===================================================================
--- www/sites/all/modules/custom/np_views_sharding/np_views_sharding_query.inc (revision 34236)
+++ www/sites/all/modules/custom/np_views_sharding/np_views_sharding_query.inc (working copy)
@@ -45,30 +45,32 @@
* a field from mongo.
*/
function query($get_count=FALSE) {
- // Only make these changes when generating the COUNT queries.
- if ($get_count) {
- if (!empty($this->np_views_sharding['scan_statistics_fresh_scan'])) {
- $search = 'scan_scan_settings.scan_id IN (';
- foreach ($this->where[0]['clauses'] as $k => $v) {
- if (substr($v, 0, strlen($search)) == $search) {
- unset($this->where[0]['clauses'][$k]);
+ if (isset($this->where[0])) {
+ // Only make these changes when generating the COUNT queries.
+ if ($get_count) {
+ if (!empty($this->np_views_sharding['scan_statistics_fresh_scan'])) {
+ $search = 'scan_scan_settings.scan_id IN (';
+ foreach ($this->where[0]['clauses'] as $k => $v) {
+ if (substr($v, 0, strlen($search)) == $search) {
+ unset($this->where[0]['clauses'][$k]);
+ }
}
}
- }
- if (!empty($this->np_views_sharding['url_statistics_all_time'])) {
- $search = 'urls.id IN (';
- foreach ($this->where[0]['clauses'] as $k => $v) {
- if (substr($v, 0, strlen($search)) == $search) {
- unset($this->where[0]['clauses'][$k]);
+ if (!empty($this->np_views_sharding['url_statistics_all_time'])) {
+ $search = 'urls.id IN (';
+ foreach ($this->where[0]['clauses'] as $k => $v) {
+ if (substr($v, 0, strlen($search)) == $search) {
+ unset($this->where[0]['clauses'][$k]);
+ }
}
}
}
+ if (count($this->where[0]['clauses']) == 0) {
+ // If we ran out of clauses, unset the where array so views doesn't
+ // generate an empty WHERE.
+ unset($this->where[0]);
+ }
}
- if (count($this->where[0]['clauses']) == 0) {
- // If we ran out of clauses, unset the where array so views doesn't
- // generate an empty WHERE.
- unset($this->where[0]);
- }
return parent::query($get_count);
}
@@ -79,19 +81,17 @@
return parent::add_field($table, $field, $alias, $params);
case 'scan_statistics_fresh_scan':
if (empty($this->np_views_sharding[$table])) {
- $query = array(
- 'minutes.velocity' => array('$gt' => 0),
- 'scan_id' => array('$in' => array()),
- );
$this->np_views_sharding[$table] = array(
'table' => 'scan_scan_settings',
'field' => 'scan_id',
- 'mongo' => scan_api_get_mongo('scan', 'scan'),
- 'query' => $query,
+ 'mongo' => 'scan',
+ 'query' => array(
+ 'scan_id' => array('$in' => array()),
+ ),
'mongo_id_field' => 'scan_id',
'map' => array(
- 'scan_statistics_fresh_scan_difference' => array('minutes', 'prev_velocity'),
- 'scan_statistics_fresh_scan_velocity' => array('minutes', 'velocity'),
+ 'scan_statistics_fresh_scan_difference' => array('increasing'),
+ 'scan_statistics_fresh_scan_velocity' => array('velocity', 'minutes_scan'),
),
);
unset($this->table_queue['scan_statistics_fresh_scan']);
@@ -99,16 +99,16 @@
break;
case 'url_statistics_all_time':
if (empty($this->np_views_sharding[$table])) {
- $scan_id = db_result(db_query("SELECT MAX(scan_id) FROM {scan} WHERE nid = %d", arg(1)));
- $query = array(
- 'scan_id' => intval($scan_id),
- 'url_id' => array('$in' => array()),
- );
+ // @@@ NO! This is BAD!
+ $scan_id = db_result(db_query('SELECT s.scan_id FROM {scan} s INNER JOIN {scan_settings} ss ON s.vid = ss.active_vid WHERE ss.nid = %d', arg(1)));
$this->np_views_sharding[$table] = array(
'table' => 'scan_urls',
'field' => 'url_id',
- 'mongo' => scan_api_get_mongo('scan', 'url'),
- 'query' => $query,
+ 'mongo' => 'url',
+ 'query' => array(
+ 'scan_id' => intval($scan_id),
+ 'url_id' => array('$in' => array()),
+ ),
'mongo_id_field' => 'url_id',
'map' => array(
'url_statistics_all_time_count' => array('count'),
@@ -194,13 +194,13 @@
return; // Abort sort.
}
}
- if ($cursor = scan_api_get_mongo('scan', 'scan')) {
-
- // These were previously unconditional criteria applied to mongo instead of being applied to mysql first.
-// $query['minutes.velocity'] = array('$gt' => 0);
-
+ if ($cursor = scan_api_get_mongo('scan')) { // V2r15 / scan / scan.status,scan.client_id[] / velocity.minutes:-1,velocity.minutes:1 paged query
+ $fields = array(
+ 'scan_id' => 1,
+ 'velocity.minutes_scan' => 1,
+ );
$sort = array(
- 'minutes.velocity' => $order == 'desc' ? -1 : +1,
+ 'velocity.minutes_scan' => $order == 'desc' ? -1 : +1,
);
$per_page = 20;
@@ -208,14 +208,14 @@
$cases = array();
try {
$result = $cursor
- ->find($query, array('scan_id' => 1, 'minutes.velocity' => 1, 'minutes.prev_velocity' => 1))
+ ->find($query, $fields)
->sort($sort)
->limit($per_page)
->skip($on_page * $per_page)
->timeout(scan_api_get_mongo_timeout());
foreach ($result as $a) {
$scan_id = $a['scan_id'];
- $cases[$scan_id] = "WHEN $scan_id THEN " . $a['minutes']['velocity'];
+ $cases[$scan_id] = "WHEN $scan_id THEN " . $a['velocity']['minutes_scan'];
}
}
catch (MongoCursorTimeoutException $e) {
@@ -229,7 +229,6 @@
$order_field = "FIELD($id_field, $scan_ids)";
$alias = 'np_views_sharding_order';
$this->add_field(NULL, $order_field, $alias);
- $this->add_groupby($id_field);
unset($this->table_queue['scan_statistics_fresh_scan']);
// Ordering is already done by Mongo, we just need to keep it, so desc
// would mean we order the opposite way as Mongo returned our IDs.
@@ -276,18 +275,22 @@
return; // Abort sort.
}
}
-
- $sort = array(
- 'count' => $order == 'desc' ? -1 : +1,
- );
- if ($cursor = scan_api_get_mongo('scan', 'url')) {
+ if ($cursor = scan_api_get_mongo('url')) { // V2r15 / url / scan_id,scan.client_id,category / count:-1,count:1 paged query
+ $fields = array(
+ 'url_id' => 1,
+ 'count' => 1,
+ );
+ $sort = array(
+ 'count' => $order == 'desc' ? -1 : +1,
+ );
+
$per_page = 20;
$on_page = isset($_GET['page']) ? $_GET['page'] : 0;
$cases = array();
try {
$result = $cursor
- ->find($query, array('url_id' => 1, 'count' => 1))
+ ->find($query, $fields)
->sort($sort)
->limit($per_page)
->skip($on_page * $per_page)
@@ -307,7 +310,6 @@
$order_field = "FIELD($id_field, $url_ids)";
$alias = 'np_views_sharding_order';
$this->add_field(NULL, $order_field, $alias);
- $this->add_groupby($id_field);
unset($this->table_queue['url_statistics_all_time']);
// Ordering is already done by Mongo, we just need to keep it, so desc
// would mean we order the opposite way as Mongo returned our IDs.
@@ -319,7 +321,12 @@
return parent::add_orderby($table, $field, $order, $alias);
}
}
+
+ /**
+ * Tack on id_field for sharding stuff.
+ */
function build(&$view) {
+ // @@@ I doubt if this is the best way to do this.
foreach ($this->np_views_sharding as $key => $data) {
$id_field = $this->add_field($data['table'], $data['field']);
$this->np_views_sharding[$key]['id_field'] = $id_field;
@@ -431,6 +438,7 @@
$fields[$mongo_id_field] = 1;
// Initialize the returned mongo field to 0.
foreach ($view->result as $view_result_key => $result) {
+ // Fill the ids with values found on the mysql side.
$data['query'][$mongo_id_field]['$in'][] = (int)$result->$id_field;
$id_map[$result->$id_field][] = $view_result_key;
foreach ($data['map'] as $mysql_field => $mongo_field) {
@@ -440,7 +448,7 @@
if ($data['mongo']) {
$per_page = 20;
try {
- $result = $data['mongo']
+ $result = scan_api_get_mongo($data['mongo']) // V2r15 / scan,url / @@@ / none (variable sharding query)
->find($data['query'], $fields)
->limit($per_page)
->timeout(scan_api_get_mongo_timeout());
@@ -450,16 +458,24 @@
}
$view_result_key = $id_map[$array[$mongo_id_field]];
unset($id_map[$array[$mongo_id_field]]);
- if (isset($array['minutes'])) {
- $array['minutes'] += array('prev_velocity' => 0, 'velocity' => 0);
- }
+
+ // Pull in
foreach ($data['map'] as $mysql_field => $mongo_field) {
+ // Special handling for subfields.
if (isset($mongo_field[1])) {
- if ($mongo_field[1] == 'prev_velocity') {
- $value = empty($array['minutes']['prev_velocity']) ? 0 : 100 * ($array['minutes']['velocity'] - $array['minutes']['prev_velocity']) / $array['minutes']['prev_velocity'];
+ if ($mongo_field[0] == 'velocity' && $mongo_field[1] == 'minutes') {
+ // Multiplier hack for velocity.minutes.
+ $value = $array['velocity']['minutes'] * 12;
}
else {
- $value = $array[$mongo_field[0]][$mongo_field[1]] * 12;
+ // Copy stuff out of arrays.
+ if (isset($mongo_field[2])) {
+ $value = $array[$mongo_field[0]][$mongo_field[1]][$mongo_field[2]];
+ assert("np_views_sharding_query: Someone is using a sub sub field?");
+ }
+ else {
+ $value = $array[$mongo_field[0]][$mongo_field[1]];
+ }
}
}
else {
Index: www/sites/all/modules/custom/np_scan_import/np_scan_import.module
===================================================================
--- www/sites/all/modules/custom/np_scan_import/np_scan_import.module (revision 34236)
+++ www/sites/all/modules/custom/np_scan_import/np_scan_import.module (working copy)
@@ -99,14 +99,18 @@
scan_api_set_active_shard();
if (isset($nodes)) {
// fill up scan stat
- $result = scan_api_get_mongo('statistics', 'scan')
- ->find(
- array('scan_id' => array('$in' => $scan_ids)),
- array('scan_id'=> 1, 'minutes.velocity' => 1)
- );
- while($result->hasNext()) {
- $row = $result->getNext();
- $nodes[$row['scan_id']]->velocity = $row['minutes']['velocity'];
+ if ($cursor = scan_api_get_mongo('scan')) { // V2r15 / scan / scan_id[] / none
+ try {
+ $query = array('scan_id' => array('$in' => $scan_ids));
+ $fields = array('scan_id' => 1, 'velocity.minutes' => 1);
+ $result = $cursor->find($fields, $result)
+ ->timeout(scan_api_get_mongo_timeout());
+ foreach ($result as $row) {
+ $nodes[$row['scan_id']]->velocity = $row['velocity']['minutes'];
+ }
+ }
+ catch (MongoCursorTimeoutException $e) {
+ }
}
$notify = FALSE;
Index: view/sites/all/modules/scan_api/scan_api.module
===================================================================
--- view/sites/all/modules/scan_api/scan_api.module (revision 34236)
+++ view/sites/all/modules/scan_api/scan_api.module (working copy)
@@ -686,6 +686,7 @@
if ($related) {
scan_api_set_active_shard('misc', 'scan');
// look for scans that has the words
+ // @@@ This storage format is stupid.
$rs = db_query('SELECT and_words, or_words FROM {scan}');
$related_result = array();
while ($row = db_fetch_object($rs)) {
@@ -753,7 +754,7 @@
function _scan_top_keywords(&$velocity, &$trending, $type, $scan_ids, $order, $count, $related, $related_keywords, $related_hashtags) {
$interval = ($type == 1) ? 'minutes' : 'hours';
$multiplier = ($type == 1) ? 12 : 1;
- $velocity_field = $interval . '.velocity';
+ $velocity_field = 'velocity.' . $interval;
foreach (array('keyword', 'hashtag') as $collection_name) {
if ($related) {
$words = ($collection_name == 'keyword') ? $related_keywords : $related_hashtags;
@@ -764,7 +765,7 @@
if ($scan_ids && (!$related || ($related && isset($query['word'])))) {
$order_field = ($order == 'velocity') ? $velocity_field : 'trending';
- $cursor = scan_api_get_mongo('scan_stats', $collection_name); // keyword, hashtag
+ $cursor = scan_api_get_mongo($collection_name); // V2r15 / keyword, hashtag / scan_id[], word[] / velocity.minutes, velocity.hours, trending
if (!$cursor) {
return;
}
@@ -779,6 +780,7 @@
->timeout(scan_api_get_mongo_timeout());
foreach ($results as $row) {
if (!isset($row['word'])) {
+ assert('Worker bug: keyword / hashtag is speechless!');
// bug in workers. they write empty word records... bad
continue;
}
@@ -786,7 +788,7 @@
$row['word'] = '#' . $row['word'];
}
if ($order == 'velocity') {
- $order_data = isset($row[$interval]['velocity']) ? $row[$interval]['velocity'] : 0;
+ $order_data = isset($row['velocity'][$interval]) ? $row['velocity'][$interval] : 0;
}
else {
$order_data = $row['trending'];
@@ -794,7 +796,7 @@
if (!isset($ordering_data[$row['word']]) || ($ordering_data[$row['word']] < $order_data)) {
$ordering_data[$row['word']] = $order_data;
$data[$row['word']] = array(
- 'velocity' => isset($row[$interval]['velocity']) ? $row[$interval]['velocity'] * $multiplier: 0,
+ 'velocity' => isset($row['velocity'][$interval]) ? $row['velocity'][$interval] * $multiplier: 0,
'trending' => $row['trending'],
'word' => $row['word'],
);
@@ -824,59 +826,6 @@
}
/**
- * Helper function for keyword / hashtag stat retrieval
- *
- * @param $scan_id
- * int - scan_id
- * @param $keywords
- * array of keywords
- * @param $is_hashtag
- * bool - are the keywords hashtags or normal keywords
- * @param $multirow
- * bool - TRUE if need order on sql, and a return of array of velocities
- * @param $type
- * int, 1 - fresh stats, 2 - stats based on 6 hr timeslice
- */
-function _scan_keyword_velocity($scan_id, $keywords, $is_hashtag, $multirow, $type) {
- $result = array();
-
- $interval = ($type == 1) ? 'minutes' : 'hours';
- $multiplier = ($type == 1) ? 12 : 1;
- $velocity_field = $interval . '.velocity';
- $collection_name = $is_hashtag ? 'hashtag' : 'keyword';
- $fields = array($velocity_field => 1, 'trending' => 1, 'word' => 1);
- $query = array(
- 'scan_id' => intval($scan_id),
- 'word' => array('$in' => array_map('strtolower', $keywords)),
- );
- if ($cursor = scan_api_get_mongo('scan_stats', $collection_name)) { // keyword, hashtag
- try {
- $cursor = $cursor
- ->find($query, $fields)
- ->sort(array($velocity_field => -1))
- ->timeout(scan_api_get_mongo_timeout());
- if ($multirow) {
- foreach ($cursor as $row) {
- if ($is_hashtag) {
- $row['word'] = '#' . $row['word'];
- }
- $result[$row['word']] = isset($row[$interval]['velocity']) ? $row[$interval]['velocity'] * $multiplier : 0;
- }
- }
- elseif ($cursor->hasNext()) {
- $result = $cursor->getNext();
- if (isset($result[$interval]['velocity'])) {
- $result['velocity'] = $result[$interval]['velocity'] * $multiplier;
- }
- }
- }
- catch (MongoCursorTimeoutException $e) {
- }
- }
- return $result;
-}
-
-/**
* Determine a keyword's velocity
*/
function scan_keyword_velocity() {
@@ -887,32 +836,50 @@
$hashtag = TRUE;
$keyword = substr($keyword, 1);
}
- // get global stat
- $return = _scan_keyword_velocity(0, array($keyword), $hashtag, FALSE, 1);
- // if twitter have gone sleeping fall back to 6 hr timeslice
- if (!$return || !$return['velocity']) {
- $return = _scan_keyword_velocity(0, array($keyword), $hashtag, FALSE, 2);
- }
- // get per scan stat if we have scan_id (searchapi mentions do not get into global stat
- // making global stat too low for what we actually show...
- $per_scan = array();
+ $collection_name = $hashtag ? 'hashtag' : 'keyword';
+ $fields = array(
+ 'scan_id' => 1,
+ 'velocity.minutes' => 1,
+ 'velocity.hours' => 1,
+ 'trending' => 1,
+ );
+ $query = array(
+ 'scan_id' => $scan_id ? array('$in' => array(0, $scan_id)) : 0,
+ 'word' => strtolower($keyword),
+ );
+ $empty = array(
+ 'velocity.minutes' => 0,
+ 'velocity.hours' => 0,
+ 'trending' => 0,
+ );
+
+ $data = scan_api_mongo_doquery(array( // V2r15 / keyword, hashtag / scan_id[2], word / none
+ 'collection' => $collection_name,
+ 'key' => 'scan_id',
+ 'query' => $query,
+ 'fields' => $fields,
+ 'empty' => $empty,
+ 'emptykeys' => $scan_id ? $query['scan_id']['$in'] : array(0),
+ 'flatten' => TRUE,
+ ));
+
+ $return = $data[0];
+ // Fall back to hours if minutes is 0 (i.e. twitter might be down?). @@@ V2 Should we just go with minutes?
+ $return['velocity'] = $return['velocity.minutes'] ? $return['velocity.minutes'] : $return['velocity.hours'];
+
if ($scan_id) {
- $per_scan = _scan_keyword_velocity($scan_id, array($keyword), $hashtag, FALSE, 1);
- if (!$per_scan || !$per_scan['velocity']) {
- $per_scan = _scan_keyword_velocity($scan_id, array($keyword), $hashtag, FALSE, 2);
+ $scan = $data[$scan_id];
+ $scan['velocity'] = $scan['velocity.minutes'] ? $scan['velocity.minutes'] : $scan['velocity.hours'];
+ if ($scan['velocity'] > $return['velocity']) {
+ // Use per scan stats if it is better than global stats.
+ // Scans can be higher than global because searchapi mentions do not end up in global stats.
+ $return = $scan;
}
- if (!$return && $per_scan) {
- $return = $per_scan;
- }
}
- // if we have per scan stat as well then return the one that's higher
- if ($per_scan && $per_scan['velocity'] > $return['velocity']) {
- $return = $per_scan;
- }
- // if not found in DB
- if (!$return) {
- $return = array('velocity' => 0, 'trending' => 0);
- }
+
+ unset($return['velocity.minutes']);
+ unset($return['velocity.hours']);
+
print _scan_api_format($return, $format);
}
@@ -927,49 +894,14 @@
// $scan_id, $format
extract(_scan_get_args());
scan_api_set_active_shard('misc');
- $row_keywords = db_fetch_object(db_query("SELECT and_words, or_words FROM {scan} WHERE scan_id = %d", $scan_id));
+ $r = db_fetch_object(db_query("SELECT and_words, or_words FROM {scan} WHERE scan_id = %d", $scan_id));
scan_api_set_active_shard();
- $return = array();
- if ($row_keywords) {
- foreach (array('and_words', 'or_words') as $keyword_field) {
- // normalize keywords / hashtags
- $keywords = array_filter(array_map('trim', explode(',', $row_keywords->$keyword_field)));
- // separate keywords / hashtags
- $words['hashtag'] = array_filter($keywords, '_scan_api_filter_hashtags');
- $words['keyword'] = array_diff($keywords, $words['hashtag']);
- $result = array();
- // pour to one container both hashtag and keyword velocities
- foreach (array('keyword', 'hashtag') as $type) {
- if ($words[$type]) {
- $params = $words[$type];
- if ($type == 'hashtag') {
- // hashtag table doesnt contain starting #
- foreach ($params as $key => $value) {
- $params[$key] = substr($value, 1);
- }
- }
- $tmp = _scan_keyword_velocity(0, $params, $type == 'hashtag', TRUE, 1);
- // if twitter went to sleep we fall back to 6hr timeslice
- if (!$tmp) {
- $tmp = _scan_keyword_velocity(0, $params, $type == 'hashtag', TRUE, 2);
- }
- $result = array_merge($result, $tmp);
- }
- }
- // ordering
- arsort($result);
- // merge sorted results into return container
- foreach ($result as $word => $velocity) {
- $return[] = $word;
- }
- // ones that didnt get in...
- $remaining = array_udiff(array_merge($words['keyword'], $words['hashtag']), array_keys($result), 'strcasecmp');
- // ...gets ordered by name
- sort($remaining);
- // and gets into the end of their respective list
- $return = array_merge($return, $remaining);
- }
- }
+
+ // Fuck this, I am so not sorting by velocity.
+ // Ticket #1765 and changeset [31715] can get bent.
+ // This callback shouldn't even exist!
+ $return = array_filter(array_map('trim', explode(',', $r->and_words . ',' . $r->or_words)));
+
print _scan_api_format($return, $format);
}
@@ -983,22 +915,20 @@
'velocity' => 0,
'difference' => 0,
);
- $fields = array('minutes.velocity' => 1, 'minutes.prev_velocity' => 1);
+ $fields = array('velocity.minutes_scan' => 1, 'increasing' => 1);
$query = array(
'scan_id' => intval($scan_id),
);
- if ($cursor = scan_api_get_mongo('scan_stats', 'scan')) {
+ if ($cursor = scan_api_get_mongo('scan')) { // V2r15 / scan / scan_id / none (single)
try {
$cursor = $cursor
->find($query, $fields)
->timeout(scan_api_get_mongo_timeout());
if ($cursor->hasNext()) {
$row = $cursor->getNext();
- $velocity = isset($row['minutes']['velocity']) ? round(12 * $row['minutes']['velocity']) : 0;
- $prev_velocity = isset($row['minutes']['prev_velocity']) ? round(12 * $row['minutes']['prev_velocity']) : 0;
$return = array(
- 'velocity' => $velocity,
- 'difference' => $velocity - $prev_velocity,
+ 'velocity' => isset($row['velocity']['minutes_scan']) ? round(12 * $row['velocity']['minutes_scan']) : 0,
+ 'difference' => $row['increasing'] ? 1 : -1,
);
}
}
@@ -1011,7 +941,7 @@
/**
* Determine a scan's number of uniq links
*/
-function scan_scan_uniq_links() {
+function scan_scan_uniq_links() { // @@@ V2 performance?
// $scan_id, $format
extract(_scan_get_args());
$return = array(
@@ -1025,7 +955,8 @@
SCAN_CATEGORY_PHOTOS => 'photo',
SCAN_CATEGORY_VIDEOS => 'video',
);
- if ($cursor = scan_api_get_mongo('urls', 'url')) {
+ if ($cursor = scan_api_get_mongo('url')) { // V2r15 / url / scan_id, category / none (count)
+ // @@@V2 performance Switch to single query grouped by category?
foreach ($categories as $category => $return_key) {
$query = array(
'scan_id' => intval($scan_id),
@@ -1104,7 +1035,7 @@
'category' => $category,
);
$range = $blocked_url_ids ? sizeof($blocked_url_ids) : 0;
- if ($cursor = scan_api_get_mongo('urls', 'url')) {
+ if ($cursor = scan_api_get_mongo('url')) { // V2r15 / url / scan_id[], category / count:-1 (query is paged!)
try {
$cursor = $cursor
->find($query, $fields)
@@ -1140,7 +1071,7 @@
'scan_id' => array('$in' => $scan_ids),
'category' => $category,
);
- if ($cursor = scan_api_get_mongo('urls', 'url')) {
+ if ($cursor = scan_api_get_mongo('url')) { // V2r15 / url / scan_id[], category, (url_id[]) / none (count)
try {
$count = $cursor
->find($query)
@@ -1329,6 +1260,7 @@
scan_get_links(SCAN_CATEGORY_VIDEOS);
}
+// @@@ V2 This has been broken for almost a year, it needs to either get fixed or removed.
function scan_top_users() {
// $scan_id,, $count, $max_age, $format
extract(_scan_get_args());
@@ -1383,7 +1315,7 @@
$interval_size_ = scan_api_interval_size($collection, $time_type);
$interval_count_ = scan_api_interval_count($collection, $time_type);
-
+
// This is a direct port of the C++ code.
// int i = when % (interval_size_ * interval_count_);
@@ -1447,6 +1379,69 @@
}
/**
+ * Get bucket data for rendering a sparkline, etc.
+ */
+function scan_get_bucket_data($collection, $time_type, $key = 'scan_id', $values, $query = array(), $when = FALSE) {
+ if (!$when) {
+ $when = time();
+ }
+ $interval_size_ = scan_api_interval_size($collection, $time_type);
+ $interval_count_ = scan_api_interval_count($collection, $time_type);
+ $bucket_cycle_time = $interval_size_ * $interval_count_;
+ $index = scan_api_bucket_index($collection, $time_type, $when);
+
+ if (is_array($values)) {
+ $query[$key] = array('$in' => $values);
+ }
+ else {
+ $query[$key] = $values;
+ $values = array($values);
+ }
+
+ // Initialize the output array.
+ $data = array();
+ foreach ($values as $v) {
+ $data[$v] = array(
+ 'size' => $interval_count_,
+ 'offset' => $index,
+// 'cutoff' => 0, // @@@ Point where we run out of data
+// 'range' => 0, // @@@ Hours in range
+ 'data' => array_fill(0, $interval_count_, 0),
+ );
+ }
+
+ $fields = array(
+ $key => 1,
+ 'created' => 1,
+ 'updated' => 1,
+ $time_type => 1,
+ );
+ if ($cursor = scan_api_get_mongo($collection)) { // V2r15 / * / scan_id[] / none (sparkline data analyzer)
+ try {
+ $cursor = $cursor->find($query, $fields)
+ ->timeout(scan_api_get_mongo_timeout());
+ foreach ($cursor as $row) {
+ // If this scan hasn't been touched for more than a cycle, continue.
+ // We already initialized everything to 0 above.
+ if ($when > ($bucket_cycle_time + $row['updated'])) {
+ continue;
+ }
+
+ for ($i = 0; $i < $interval_count_; $i++) {
+ // OK, so we need to walk forwards on one array while walking backwards and wrapping around on another.
+ // Adding $interval_count_ to $index on the right is done because PHP's modulus handles negatives in the
+ // equally-correct-but-not-as-useful-as-the-other-way-around fashion.
+ $data[$row['scan_id']]['data'][$i] = $row[$time_type][($interval_count_ + $index - $i) % $interval_count_];
+ }
+ }
+ }
+ catch (MongoCursorTimeoutException $e) {
+ }
+ }
+ return $data;
+}
+
+/**
* Produce list of buckets in order from newest to oldest
* scan buckets do not maintain that order, bellow current undext we have new items, above we have old
* which makes it not easy to manage. here's API for that.
@@ -1490,42 +1485,22 @@
return $bucket;
}
function scan_stats_velocity() {
+ $when = time();
// $scan_id,, $count, $max_age, $format
extract(_scan_get_args());
+ $scan_id = intval($scan_id);
- $epoch = gmdate('c', 0);
- $return = array_fill(0, $count, array('count' => 0, 'start_time' => $epoch, 'last_occurence' => $epoch + 3600));
-
- if ($cursor = scan_api_get_mongo('statistics', 'scan')) {
- $fields = array('hours' => 1, 'updated' => 1);
- $query = array(
- 'scan_id' => intval($scan_id),
+ $data = scan_get_bucket_data('scan', 'hours', 'scan_id', $scan_id, array(), $when);
+ $return = array();
+ foreach ($data[$scan_id]['data'] as $k => $v) {
+ $return[$k] = array(
+ 'count' => $v,
+ //@@@V2 This is horribly inefficient, data transfer wise. Would be much better to
+ // pass a single time and have the JS code do offsets from that itself.
+ 'start_time' => gmdate('c', ($when - $when % 3600) - ($k * 3600)),
);
- try {
- $cursor = $cursor
- ->find($query)
- ->timeout(scan_api_get_mongo_timeout());
+ }
- if ($cursor->hasNext()) {
- $row = $cursor->getNext();
- $updated = explode(' ', (string)$row['updated']);
- $bucket = scan_api_reorder_scan_time_buckets($row['hours'], 'scan', 'hours', $updated[1]);
-
- $now = mktime(date('H'), 0, 0 );
- foreach($bucket as $index => $value ) {
- if ( $count == $index ) {
- break;
- }
- $return[$index] = array(
- 'count' => $value,
- 'start_time' => gmdate('c', $now - ($index * 60 * 60)),
- );
- }
- }
- }
- catch (MongoCursorTimeoutException $e) {
- }
- }
print _scan_api_format($return, $format);
}
@@ -1565,17 +1540,17 @@
// fill up the stats
if ($return) {
$scan_ids = array_keys($return);
- $fields = array('scan_id' => 1, 'minutes.velocity' => 1, 'hours.velocity');
+ $fields = array('scan_id' => 1, 'velocity.minutes_scan' => 1);
$query = array(
'scan_id' => array('$in' => $scan_ids),
);
- if ($cursor = scan_api_get_mongo('scan_stats', 'scan')) {
+ if ($cursor = scan_api_get_mongo('scan')) { // V2r15 / scan / scan_id[] / none
try {
$cursor = $cursor
->find($query, $fields)
->timeout(scan_api_get_mongo_timeout());
foreach ($cursor as $row) {
- $return[$row['scan_id']]['velocity'] = isset($row['minutes']['velocity']) ? round(12 * $row['minutes']['velocity']) : 0;
+ $return[$row['scan_id']]['velocity'] = isset($row['velocity']['minutes_scan']) ? round(12 * $row['velocity']['minutes_scan']) : 0;
}
}
catch (MongoCursorTimeoutException $e) {
@@ -1586,25 +1561,33 @@
}
else {
// get a basic data structure in ordered form
- $fields = array('scan_id' => 1, 'minutes.velocity' => 1);
+ $fields = array(
+ 'scan_id' => 1,
+ 'velocity.minutes_scan' => 1,
+ 'velocity.hours_general' => 1, // @@@V2 was days.general.velocity
+ 'velocity.hours_photo' => 1, //@@@V2 was days.photo.velocity
+ 'velocity.hours_video' => 1, //@@@V2 was days.video.velocity
+ 'velocity.hours_urls' => 1, //@@@V2 was days.velocity
+ );
$query = array(
- 'scan.client_id' => intval($client_id), 'scan.status' => 1, 'scan.active' => 1,
+ 'scan.client_id' => intval($client_id), 'scan.status' => 1
);
- if ($cursor = scan_api_get_mongo('scan_stats', 'scan')) {
+ if ($cursor = scan_api_get_mongo('scan')) { // V2r15 / scan / scan.client_id, scan.status / velocity.minutes_scan:-1
try {
$cursor = $cursor
->find($query, $fields)
- ->sort(array('minutes.velocity' => -1))
+ ->sort(array('velocity.minutes_scan' => -1))
->limit($count)
->timeout(scan_api_get_mongo_timeout());
foreach ($cursor as $row) {
$return[$row['scan_id']] = array(
'scan_id' => $row['scan_id'],
- 'velocity' => isset($row['minutes']['velocity']) ? round(12 * $row['minutes']['velocity']) : 0,
- 'general' => 0,
- 'photo' => 0,
- 'video' => 0,
- 'summary' => 0,
+ // @@@ x12 multiplier is less accurate than summing the minutes.scan array.
+ 'velocity' => isset($row['velocity']['minutes_scan']) ? round(12 * $row['velocity']['minutes_scan']) : 0,
+ 'general' => isset($row['velocity']['hours_general']) ? round($row['velocity']['hours_general']) : 0,
+ 'photo' => isset($row['velocity']['hours_photo']) ? round($row['velocity']['hours_photo']) : 0,
+ 'video' => isset($row['velocity']['hours_video']) ? round($row['velocity']['hours_video']) : 0,
+ 'summary' => isset($row['velocity']['hours_urls']) ? round($row['velocity']['hours_urls']) : 0,
);
}
}
@@ -1630,34 +1613,9 @@
}
}
}
- // common query for both cases: if we have data we need to fill up url uniq stats
if ($return) {
- $fields = array(
- 'scan_id' => 1,
- 'days.velocity' => 1,
- 'days.general.velocity' => 1,
- 'days.photo.velocity' => 1,
- 'days.video.velocity' => 1,
- );
- $query = array(
- 'scan_id' => array('$in' => $scan_ids),
- );
- if ($cursor = scan_api_get_mongo('urls', 'scanurl')) {
- try {
- $cursor = $cursor
- ->find($query, $fields)
- ->timeout(scan_api_get_mongo_timeout());
- foreach ($cursor as $row) {
- $return[$row['scan_id']]['summary'] = round($row['days']['velocity'] * 30);
- $return[$row['scan_id']]['general'] = round($row['days']['general']['velocity'] * 30);
- $return[$row['scan_id']]['photo'] = round($row['days']['photo']['velocity'] * 30);
- $return[$row['scan_id']]['video'] = round($row['days']['video']['velocity'] * 30);
- }
- $return = array_filter($return, '_scan_api_filter_nid');
- }
- catch (MongoCursorTimeoutException $e) {
- }
- }
+ // Filter out scans not backed by nodes.
+ $return = array_filter($return, '_scan_api_filter_nid');
}
scan_api_set_active_shard();
print _scan_api_format(array_values($return), $format);
@@ -1752,33 +1710,15 @@
if ($matches) {
$scan_ids = array_keys($matches);
// fill up the scan stats part
- if ($cursor = scan_api_get_mongo('scan_stats', 'scan')) {
- $fields = array('scan_id' => 1, 'minutes.velocity' => 1, 'minutes.prev_velocity' => 1);
- $query = array(
- 'scan_id' => array('$in' => $scan_ids),
- );
- try {
- $cursor = $cursor
- ->find($query, $fields)
- ->timeout(scan_api_get_mongo_timeout());
- foreach ($cursor as $row) {
- $velocity = isset($row['minutes']['velocity']) ? round(12 * $row['minutes']['velocity']) : 0;
- $prev_velocity = isset($row['minutes']['prev_velocity']) ? round(12 * $row['minutes']['prev_velocity']) : 0;
- $data[$row['scan_id']]['velocity'] = $velocity;
- $data[$row['scan_id']]['difference'] = $velocity - $prev_velocity;
- }
- }
- catch (MongoCursorTimeoutException $e) {
- }
- }
- // fill up the url uniq stats part
- if ($cursor = scan_api_get_mongo('urls', 'scanurl')) {
+ if ($cursor = scan_api_get_mongo('scan')) { // V2r15 / scan / scan_id[] / none
$fields = array(
'scan_id' => 1,
- 'days.velocity' => 1,
- 'days.general.velocity' => 1,
- 'days.photo.velocity' => 1,
- 'days.video.velocity' => 1,
+ 'velocity.minutes_scan' => 1,
+ 'velocity.increasing' => 1,
+ 'velocity.hours_general' => 1, // @@@V2 was days.general.velocity
+ 'velocity.hours_photo' => 1, //@@@V2 was days.photo.velocity
+ 'velocity.hours_video' => 1, //@@@V2 was days.video.velocity
+ 'velocity.hours_urls' => 1, //@@@V2 was days.velocity
);
$query = array(
'scan_id' => array('$in' => $scan_ids),
@@ -1788,10 +1728,13 @@
->find($query, $fields)
->timeout(scan_api_get_mongo_timeout());
foreach ($cursor as $row) {
- $data[$row['scan_id']]['uniq_links_summary'] = round($row['days']['velocity'] * 30);
- $data[$row['scan_id']]['uniq_links_general'] = round($row['days']['general']['velocity'] * 30);
- $data[$row['scan_id']]['uniq_links_photo'] = round($row['days']['photo']['velocity'] * 30);
- $data[$row['scan_id']]['uniq_links_video'] = round($row['days']['video']['velocity'] * 30);
+ $data[$row['scan_id']]['velocity'] = isset($row['velocity']['minutes_scan']) ? round(12 * $row['velocity']['minutes_scan']) : 0;
+ //@@@V2 This is really a boolean now -- requires theme / js change to fix.
+ $data[$row['scan_id']]['difference'] = isset($row['velocity']['increasing']) ? $row['velocity']['increasing'] : 0;
+ $data[$row['scan_id']]['uniq_links_summary'] = round($row['velocity']['hours_urls']);
+ $data[$row['scan_id']]['uniq_links_general'] = round($row['velocity']['hours_general']);
+ $data[$row['scan_id']]['uniq_links_photo'] = round($row['velocity']['hours_photo']);
+ $data[$row['scan_id']]['uniq_links_video'] = round($row['velocity']['hours_video']);
}
}
catch (MongoCursorTimeoutException $e) {
@@ -1932,10 +1875,10 @@
* This is the sorting function
*/
function _loc_sort($a, $b) {
- if ($a['hours']['velocity'] == $b['hours']['velocity']) {
+ if ($a['velocity']['hours'] == $b['velocity']['hours']) {
return 0;
}
- return ($a['hours']['velocity'] > $b['hours']['velocity']) ? -1 : 1;
+ return ($a['velocity']['hours'] > $b['velocity']['hours']) ? -1 : 1;
}
/**
* Aditional explanation including draging to explain this logic is available
@@ -1974,10 +1917,10 @@
* This will be executed in 3 places and i didnt want to replicate the same query. Also by using function i'm hoping to show relation between uses.
*/
function _mongo_top_locations_get_query_result($query) {
- if ($cursor = scan_api_get_mongo('statistics', 'location')) {
+ if ($cursor = scan_api_get_mongo('location')) { // V2r15 / location / location.lon, location.lat, scan_id, ??? / velocity.hours
$fields = array(
'location_id' => 1,
- 'hours.velocity' => 1,
+ 'velocity.hours' => 1,
'location.lat' => 1,
'location.lon' => 1,
'location.name' => 1,
@@ -1985,7 +1928,7 @@
try {
return $cursor
->find($query, $fields)
- ->sort(array('hours.velocity' => -1))
+ ->sort(array('velocity.hours' => -1))
->limit(10)
->timeout(scan_api_get_mongo_timeout());
}
@@ -2017,7 +1960,7 @@
foreach($result as $top_loc) {
$locations[] = array(
'name' => $top_loc['location']['name'],
- 'count' => $top_loc['hours']['velocity'],
+ 'count' => $top_loc['velocity']['hours'],
'location_id' => $top_loc['location_id'],
'longitude' => $top_loc['location']['lon'],
'latitude' => $top_loc['location']['lat'],
@@ -2499,20 +2442,30 @@
* @return
* A mongoCollection.
*/
-function scan_api_get_mongo($db_name, $collection_name) {
+function scan_api_get_mongo($collection_name, $shard_key = FALSE) {
static $connections;
- if (!isset($connections[$db_name])) {
+ static $mongo_dbs;
+ if (!isset($mongo_dbs)) {
$mongo_dbs = variable_get('mongo_dbs', array());
- $mongo_db = $mongo_dbs[isset($mongo_dbs[$db_name]) ? $db_name : 'default'];
+ }
+ if (!$shard_key) {
+ $shard_key = $collection_name;
+ }
+ // Normalize shard key so we can reuse connections often.
+ if (!isset($mongo_dbs[$shard_key])) {
+ $shard_key = 'default';
+ }
+ if (!isset($connections[$shard_key])) {
+ $mongo_db = $mongo_dbs[$shard_key];
try {
$mongo = new mongo($mongo_db['host']);
- $connections[$db_name] = $mongo->selectDB($mongo_db['db']);
+ $connections[$shard_key] = $mongo->selectDB($mongo_db['db']);
}
catch (MongoConnectionException $e) {
return;
}
}
- return $connections[$db_name]->selectCollection($collection_name);
+ return $connections[$shard_key]->selectCollection($collection_name);
}
/**
@@ -2530,3 +2483,139 @@
return $timeout;
}
+/**
+ * One stop shop for getting a blob of stuff from mongo.
+ */
+function scan_api_mongo_doquery($args) {
+ //$collection
+ $key = FALSE;
+ //$query
+ $fields = array();
+ $sort = NULL;
+ $limit = 100;
+ $empty = array();
+ $emptykeys = array();
+ $flatten = FALSE;
+ $zeromap = array();
+ $remap = array();
+ $stripmongoid = TRUE;
+ extract($args);
+
+ if (!$key) {
+ // No sense in reading multiple values if this will be single-result.
+ $limit = 1;
+ }
+
+ $return = array();
+ if (!empty($emptykeys)) {
+ foreach ($emptykeys as $k) {
+ $return[$k] = array();
+ }
+ }
+
+ if ($cursor = scan_api_get_mongo($collection)) { // V2r15 metaquery
+ try {
+ $cursor = $cursor->find($query, $fields)
+ ->limit($limit);
+ if (isset($sort)) {
+ $cursor->sort($sort);
+ }
+ $query = $cursor->timeout(scan_api_get_mongo_timeout());
+ if (!$key) {
+ if ($query->hasNext()) {
+ // Switch to findOne() instead of find()?
+ $return[0] = $query->getNext();
+ }
+ else {
+ // No result.
+ return FALSE;
+ }
+ }
+ else if (strpos($key, '.') !== FALSE) {
+ $keyparts = explode('.', $key);
+ foreach ($query as $row) {
+ $r =& $row;
+ foreach ($keyparts as $part) {
+ $r =& $r[$part];
+ }
+ $rowkey = $r;
+ unset($r);
+ $return[$rowkey] = $row;
+ }
+ }
+ else {
+ foreach ($query as $row) {
+ // Assumes data from mongo is consistent.
+ $return[$row[$key]] = $row;
+ }
+ }
+ if ($stripmongoid) {
+ foreach ($return as $k => $v) {
+ unset($return[$k]['_id']);
+ }
+ }
+ if ($flatten) {
+ // Single level flattening. Doing it like this because recursion sucks.
+ // Not gonna bother with more than one dot for now.
+ foreach ($return as $k => $v) {
+ if (is_array($v)) {
+ foreach ($return[$k] as $kk => $vv) {
+ if (is_array($vv)) {
+ foreach ($return[$k][$kk] as $kkk => $vvv) {
+ $return[$k]["$kk.$kkk"] =& $return[$k][$kk][$kkk];
+ }
+ unset($return[$k][$kk]);
+ }
+ }
+ }
+ }
+ }
+ // Do a single level initialization of defaults.
+ if (!empty($empty)) {
+ foreach ($return as $k => $v) {
+ foreach ($empty as $kk => $vv) {
+ if (!isset($return[$k][$kk])) {
+ $return[$k][$kk] = $vv;
+ }
+ }
+ }
+ }
+ // Do zero mapping for fallbacks.
+ // Note: This only applies if every entry in that field is 0.
+ if (!empty($zeromap)) {
+ foreach ($zeromap as $src => $dst) {
+ $fallback = TRUE;
+ foreach ($return as $k => $v) {
+ if ($return[$k][$src]) {
+ $fallback = FALSE;
+ break;
+ }
+ }
+ foreach ($return[$k] as $k => $v) {
+ if ($fallback) {
+ $return[$k][$dst] =& $return[$k][$src];
+ }
+ // Always unset src, even if not falling back.
+ unset($return[$k][$src]);
+ }
+ }
+ }
+ // Perform output remapping to adapt the result array on behalf
+ // of the caller.
+ if (!empty($remap)) {
+ foreach ($remap as $src => $dst) {
+ foreach ($return as $k => $v) {
+ $return[$dst] =& $return[$src];
+ unset($return[$src]);
+ }
+ }
+ }
+ if (!$key) {
+ return $return[0];
+ }
+ return $return;
+ }
+ catch (MongoCursorTimeoutException $e) {
+ }
+ }
+}