Alphred
  • Namespace
  • Class
  • Tree
  • Deprecated
  • Todo
  • Download

Namespaces

  • Alphred
  • None

Classes

  • Alphred
  1 <?php
  2 /**
  3  * Contains Filter class for Alphred
  4  *
  5  * This file contains the functions for filtering arrays of text
  6  *
  7  * This is almost a direct translation of the filter functionality of
  8  * Deanishe's Alfred Workflows (https://github.com/deanishe/alfred-workflow).
  9  * He and his collaborators get credit for this one.
 10  *
 11  * Also, if you are folding diacritics, then you can work only with
 12  * characters that can be transliterated to ASCII.
 13  *
 14  * PHP version 5
 15  *
 16  * @package      Alphred
 17  * @copyright  Shawn Patrick Rice 2014
 18  * @license    http://opensource.org/licenses/MIT  MIT
 19  * @version    1.0.0
 20  * @author     Shawn Patrick Rice <rice@shawnrice.org>
 21  * @link       http://www.github.com/shawnrice/alphred
 22  * @link       http://shawnrice.github.io/alphred
 23  * @since      File available since Release 1.0.0
 24  *
 25  */
 26 
 27 namespace Alphred;
 28 
 29 /**
 30  * Filters arrays to match a query
 31  */
 32 class Filter {
 33 
 34     /**
 35      * Filters an array based on a query
 36      *
 37      * Passing an empty query ($needle) to this method will simply return the initial array.
 38      * If you have `fold` on, then this will fail on characters that cannot be translitered
 39      * into regular ASCII, so most Asian languages.
 40      *
 41      * The options to be set are:
 42      *  * max_results  -- the maximum number of results to return (default: false)
 43      *  * min_score    -- the minimum score to return (0-100) (default: false)
 44      *  * return_score -- whether or not to return the score along with the results (default: false)
 45      *  * fold         -- whether or not to fold diacritical marks, thus making
 46      *                                      `über` into `uber`. (default: true)
 47      *  * match_type     -- the type of filters to run. (default: MATCH_ALL)
 48      *
 49      *  The match_type is defined as constants, and so you can call them by the flags or by
 50      *  the integer value. Options:
 51      *    Match items that start with the query
 52      *    1: MATCH_STARTSWITH
 53      *    Match items whose capital letters start with ``query``
 54      *    2: MATCH_CAPITALS
 55      *    Match items with a component "word" that matches ``query``
 56      *    4: MATCH_ATOM
 57      *    Match items whose initials (based on atoms) start with ``query``
 58      *    8: MATCH_INITIALS_STARTSWITH
 59      *    Match items whose initials (based on atoms) contain ``query``
 60      *    16: MATCH_INITIALS_CONTAIN
 61      *    Combination of MATCH_INITIALS_STARTSWITH and MATCH_INITIALS_CONTAIN
 62      *    24: MATCH_INITIALS
 63      *    Match items if ``query`` is a substring
 64      *    32: MATCH_SUBSTRING
 65      *    Match items if all characters in ``query`` appear in the item in order
 66      *    64: MATCH_ALLCHARS
 67      *    Combination of all other ``MATCH_*`` constants
 68      *    127: MATCH_ALL
 69      *
 70      * @param  array                $haystack the array of items to filter
 71      * @param  string               $needle   the search query to filter against
 72      * @param  string|boolean $key      the name of the key to filter on if array is associative
 73      * @param  array                    $options  a list of options to configure the filter
 74      * @return array          an array of filtered items
 75      */
 76     public function Filter( $haystack, $needle, $key = false, $options = [] ) {
 77         // Set the defaults if not already set
 78         $max             = ( isset( $options['max_results'] ) ) ? $options['max_results'] : false;
 79         $fold_diacritics = ( isset( $options['fold'] ) ) ? $options['fold'] : true;
 80         $match_type      = ( isset( $options['match_type'] ) ) ? $options['match_type'] : MATCH_ALL;
 81         $min             = ( isset( $options['min_score'] ) ) ? $options['min_score'] : false;
 82         $return_score    = ( isset( $options['return_score'] ) ) ? $options['return_score'] : false;
 83 
 84         // Here, we make the assumption that, if the $needle or search string is empty, then the filter was a misfire, so
 85         // we'll just return all of the results.
 86         if ( empty( trim( $needle ) ) ) {
 87             return $haystack;
 88         }
 89 
 90         // Initialize an empty results array
 91         $results = [];
 92 
 93         // Go through each item in the "haystack" array
 94         foreach ( $haystack as $row ) :
 95 
 96             // Start with a score of 0
 97             $score = 0;
 98 
 99             // Treat each word in "needle" as separate
100             $words = explode( ' ', $needle );
101             // trim the whitespace off the needles
102             array_walk( $words, 'trim' );
103 
104             // If a key was specified, use that; otherwise, just use the value of the row
105             if ( $key ) {
106                 $value = $row[ $key ];
107             } else {
108                 $value = $row;
109             }
110 
111             // If the value is empty, then don't bother searching. We got whitespace.
112             if ( empty( trim( $value ) ) )
113                 continue;
114 
115             // Foreach word, do a search
116             foreach( $words as $word ) :
117 
118                 // If the word is empty, then don't bother searching
119                 if ( empty( $word ) )
120                     continue;
121 
122                 // Perform the search
123                 $result = self::filter_item( $value, $word, $match_type, $fold_diacritics );
124                 // Check is a score was sent back that was not 0. If it was 0, then just
125                 // continue because it didn't matter
126                 if ( ! $result[0] ) {
127                     continue;
128                 }
129                 // It did matter! And so augment the score.
130                 $score += $result[0];
131 
132             endforeach;
133 
134             // If the score is greater than 0, then include it in the results
135             if ( $score > 0 ) {
136                 $results[] = [ $score, $row ];
137             }
138 
139         endforeach;
140 
141         // Sort the array by score
142         usort( $results, 'self::sort_by_score' );
143         // If we have a max result set, then take the top results
144         if ( $max && ( count( $results ) > $max ) ) {
145             $results = array_slice( $results, 0, $max );
146         }
147 
148         // If min_score is set, then unset any values that have
149         // a score less than min
150         if ( $min ) {
151             foreach ( $results as $key => $value ) :
152                 if ( $value[0] < $min ) {
153                     unset( $results[ $key ] );
154                 }
155             endforeach;
156         }
157 
158         // If they want the score, then return it
159         if ( $return_score ) {
160             return $results;
161         }
162 
163         // They don't want the score, so just remove them and simply the array
164         foreach ($results as $key => $value ) :
165             $results[ $key ] = $value[1];
166         endforeach;
167         // Return the sorted results
168         return $results;
169     }
170 
171     /**
172      * Callback function to help sort the results array
173      *
174      * @internal I made this `public` because there was an error using filter()
175      *           through the wrapper. This is a temporary measure and should
176      *           be remedied.
177      *
178      * @param  array $a an array
179      * @param  array $b an array
180      * @return bool
181      */
182     public function sort_by_score( $a, $b ) {
183         return $a[0] < $b[0];
184     }
185 
186     /**
187      * Removes all non-capital characters and non-digit characters frmo a string
188      *
189      * @param  string $string   a string to process
190      * @return string           the processed string
191      */
192     private function remove_all_non_caps( $string ) {
193         return strtolower( preg_replace( '/[^A-Z0-9]/', '', $string ) );
194     }
195 
196     /**
197      * Converts and transliterates a string to ascii
198      *
199      * @param  string $string a string to transliterate
200      * @return string         the transliterated string
201      */
202     private function convert( $string ) {
203         // I don't want to mess with encodings, so we'll just auto-detect
204         $encoding = mb_detect_encoding( $string );
205         // Note: if PHP will throw a notice if the string contains characters that cannot
206         // be transliterated. These will most likely be certain symbols and characters
207         // from many different Asian languages.
208         return iconv( $encoding, 'ASCII//TRANSLIT', $string );
209     }
210 
211     /**
212      * Runs the filter rules
213      *
214      * @todo Refactor this
215      *
216      * @param  string $value           the value string (haystack)
217      * @param  string $query           the query string (needle)
218      * @param  mixed  $match_on        the search flags, so constants or integers
219      * @param  bool   $fold_diacritics whether or not to transliterate to ascii
220      * @return array                   an array that is score and then the rule matched
221      */
222     private function filter_item( $value, $query, $match_on, $fold_diacritics ) {
223         $query = strtolower( $query );
224 
225         if ( $fold_diacritics ) {
226             $value = self::convert( $value );
227         }
228 
229     // Pre-filter anything that doesn't contain all of the characters
230         $arr = array_unique( str_split( $query ) );
231         $arr2 = array_unique( str_split( strtolower( $value ) ) );
232         if ( count( array_diff( $arr, $arr2 ) ) > 0 ) {
233             return [ 0, 'None' ];
234         }
235 
236     // Item starts with $query
237     if ( ( $match_on & MATCH_STARTSWITH ) && ( 0 === strpos( strtolower( $value ), $query ) ) ) {
238         $score = 100.0 - ( strlen( $value ) / strlen( $query ) );
239       return [ $score, MATCH_STARTSWITH ];
240     }
241 
242     // $query matches capitalised letters in item, e.g. of = OmniFocus
243     if ( $match_on & MATCH_CAPITALS ) {
244             $initials = self::remove_all_non_caps( $value );
245         if ( false !== strpos( $initials, $query ) ) {
246             $score = 100.0 - ( strlen( $initials ) / strlen( $query ) );
247             return [ $score, MATCH_CAPITALS ];
248         }
249     }
250 
251     // split the item into "atoms", i.e. words separated by spaces or other non-word characters
252     if ( $match_on & MATCH_ATOM || $match_on & MATCH_INITIALS_CONTAIN || $match_on & MATCH_INITIALS_STARTSWITH ) {
253         // Split into atoms, note: if you are not transliterating, then this will split on accented characters too
254         $atoms = preg_split('/[^a-zA-Z0-9]/', strtolower($value), -1, PREG_SPLIT_NO_EMPTY);
255 
256         // initials of the atoms
257         $initials = $atoms;
258         array_walk( $initials, function ( &$value, $key ) { $value = substr( $value, 0, 1 ); } );
259         $initials = implode( '', $initials );
260         }
261 
262     if ( $match_on & MATCH_ATOM ) {
263         // is `$query` one of the atoms in item? Similar to substring, but $scores more highly, as it's
264         // a word within the item
265         if ( in_array( $query, $atoms ) ) {
266             $score = 100.0 - ( strlen( $value ) / strlen( $query ) );
267             return [ $score, MATCH_ATOM ];
268         }
269         }
270     # `$query` matches start (or all) of the initials of the
271     # atoms, e.g. ``himym`` matches "How I Met Your Mother"
272     # *and* "how i met your mother" (the ``capitals`` rule only
273     # matches the former)
274     if ( ( $match_on & MATCH_INITIALS_STARTSWITH ) && ( 0 === strpos( $initials, $query ) ) ) {
275         $score = 100.0 - ( strlen( $initials ) / strlen( $query ) );
276         return [ $score, MATCH_INITIALS_STARTSWITH ];
277         } else if ($match_on & MATCH_INITIALS_CONTAIN && (false !== strpos( $initials, $query ) ) ) {
278     # `query` is a substring of initials, e.g. ``doh`` matches
279     # "The Dukes of Hazzard"
280         $score = 95.0 - (strlen($initials) / strlen($query));
281         return [$score, MATCH_INITIALS_CONTAIN];
282         }
283 
284     # `query` is a substring of item
285     if ( ( $match_on & MATCH_SUBSTRING ) && ( false !== strpos( strtolower( $value ), $query ) ) ) {
286         $score = 90.0 - ( strlen( $value ) / strlen( $query ) );
287         return [ $score, MATCH_SUBSTRING ];
288     }
289 
290     # finally, assign a $score based on how close together the
291     # characters in `query` are in item.
292     /**
293      * @todo Rework the scoring on this part
294      */
295     if ( $match_on & MATCH_ALLCHARS ) {
296         foreach( str_split( $query ) as $character ) :
297             $position[] = strpos( $value, $character );
298         endforeach;
299         $divisor = ( ( 1 + reset( $position ) ) * ( ( abs( end( $position ) - reset( $position ) ) ) ) );
300         // protect from divide by 0 warnings
301         if ( $divisor == 0 ) {
302             $divisor = 1;
303         }
304       $score = 100.0 / $divisor;
305       $score = $score * ( strlen($query) / strlen($value) );
306       return [$score, MATCH_ALLCHARS ];
307     }
308 
309     // Nothing matched, so return a score of 0
310     return [0, 'None'];
311     }
312 
313 }
314 
315 
Alphred API documentation generated by ApiGen