1 <?php
2 /**
3 * Contains Filter class for Alphred
4 *
5 * This file contains the functions for filtering arrays of text
6 *
7 * This is almost a direct translation of the filter functionality of
8 * Deanishe's Alfred Workflows (https://github.com/deanishe/alfred-workflow).
9 * He and his collaborators get credit for this one.
10 *
11 * Also, if you are folding diacritics, then you can work only with
12 * characters that can be transliterated to ASCII.
13 *
14 * PHP version 5
15 *
16 * @package Alphred
17 * @copyright Shawn Patrick Rice 2014
18 * @license http://opensource.org/licenses/MIT MIT
19 * @version 1.0.0
20 * @author Shawn Patrick Rice <rice@shawnrice.org>
21 * @link http://www.github.com/shawnrice/alphred
22 * @link http://shawnrice.github.io/alphred
23 * @since File available since Release 1.0.0
24 *
25 */
26
27 namespace Alphred;
28
29 /**
30 * Filters arrays to match a query
31 */
32 class Filter {
33
34 /**
35 * Filters an array based on a query
36 *
37 * Passing an empty query ($needle) to this method will simply return the initial array.
38 * If you have `fold` on, then this will fail on characters that cannot be translitered
39 * into regular ASCII, so most Asian languages.
40 *
41 * The options to be set are:
42 * * max_results -- the maximum number of results to return (default: false)
43 * * min_score -- the minimum score to return (0-100) (default: false)
44 * * return_score -- whether or not to return the score along with the results (default: false)
45 * * fold -- whether or not to fold diacritical marks, thus making
46 * `über` into `uber`. (default: true)
47 * * match_type -- the type of filters to run. (default: MATCH_ALL)
48 *
49 * The match_type is defined as constants, and so you can call them by the flags or by
50 * the integer value. Options:
51 * Match items that start with the query
52 * 1: MATCH_STARTSWITH
53 * Match items whose capital letters start with ``query``
54 * 2: MATCH_CAPITALS
55 * Match items with a component "word" that matches ``query``
56 * 4: MATCH_ATOM
57 * Match items whose initials (based on atoms) start with ``query``
58 * 8: MATCH_INITIALS_STARTSWITH
59 * Match items whose initials (based on atoms) contain ``query``
60 * 16: MATCH_INITIALS_CONTAIN
61 * Combination of MATCH_INITIALS_STARTSWITH and MATCH_INITIALS_CONTAIN
62 * 24: MATCH_INITIALS
63 * Match items if ``query`` is a substring
64 * 32: MATCH_SUBSTRING
65 * Match items if all characters in ``query`` appear in the item in order
66 * 64: MATCH_ALLCHARS
67 * Combination of all other ``MATCH_*`` constants
68 * 127: MATCH_ALL
69 *
70 * @param array $haystack the array of items to filter
71 * @param string $needle the search query to filter against
72 * @param string|boolean $key the name of the key to filter on if array is associative
73 * @param array $options a list of options to configure the filter
74 * @return array an array of filtered items
75 */
76 public function Filter( $haystack, $needle, $key = false, $options = [] ) {
77 // Set the defaults if not already set
78 $max = ( isset( $options['max_results'] ) ) ? $options['max_results'] : false;
79 $fold_diacritics = ( isset( $options['fold'] ) ) ? $options['fold'] : true;
80 $match_type = ( isset( $options['match_type'] ) ) ? $options['match_type'] : MATCH_ALL;
81 $min = ( isset( $options['min_score'] ) ) ? $options['min_score'] : false;
82 $return_score = ( isset( $options['return_score'] ) ) ? $options['return_score'] : false;
83
84 // Here, we make the assumption that, if the $needle or search string is empty, then the filter was a misfire, so
85 // we'll just return all of the results.
86 if ( empty( trim( $needle ) ) ) {
87 return $haystack;
88 }
89
90 // Initialize an empty results array
91 $results = [];
92
93 // Go through each item in the "haystack" array
94 foreach ( $haystack as $row ) :
95
96 // Start with a score of 0
97 $score = 0;
98
99 // Treat each word in "needle" as separate
100 $words = explode( ' ', $needle );
101 // trim the whitespace off the needles
102 array_walk( $words, 'trim' );
103
104 // If a key was specified, use that; otherwise, just use the value of the row
105 if ( $key ) {
106 $value = $row[ $key ];
107 } else {
108 $value = $row;
109 }
110
111 // If the value is empty, then don't bother searching. We got whitespace.
112 if ( empty( trim( $value ) ) )
113 continue;
114
115 // Foreach word, do a search
116 foreach( $words as $word ) :
117
118 // If the word is empty, then don't bother searching
119 if ( empty( $word ) )
120 continue;
121
122 // Perform the search
123 $result = self::filter_item( $value, $word, $match_type, $fold_diacritics );
124 // Check is a score was sent back that was not 0. If it was 0, then just
125 // continue because it didn't matter
126 if ( ! $result[0] ) {
127 continue;
128 }
129 // It did matter! And so augment the score.
130 $score += $result[0];
131
132 endforeach;
133
134 // If the score is greater than 0, then include it in the results
135 if ( $score > 0 ) {
136 $results[] = [ $score, $row ];
137 }
138
139 endforeach;
140
141 // Sort the array by score
142 usort( $results, 'self::sort_by_score' );
143 // If we have a max result set, then take the top results
144 if ( $max && ( count( $results ) > $max ) ) {
145 $results = array_slice( $results, 0, $max );
146 }
147
148 // If min_score is set, then unset any values that have
149 // a score less than min
150 if ( $min ) {
151 foreach ( $results as $key => $value ) :
152 if ( $value[0] < $min ) {
153 unset( $results[ $key ] );
154 }
155 endforeach;
156 }
157
158 // If they want the score, then return it
159 if ( $return_score ) {
160 return $results;
161 }
162
163 // They don't want the score, so just remove them and simply the array
164 foreach ($results as $key => $value ) :
165 $results[ $key ] = $value[1];
166 endforeach;
167 // Return the sorted results
168 return $results;
169 }
170
171 /**
172 * Callback function to help sort the results array
173 *
174 * @internal I made this `public` because there was an error using filter()
175 * through the wrapper. This is a temporary measure and should
176 * be remedied.
177 *
178 * @param array $a an array
179 * @param array $b an array
180 * @return bool
181 */
182 public function sort_by_score( $a, $b ) {
183 return $a[0] < $b[0];
184 }
185
186 /**
187 * Removes all non-capital characters and non-digit characters frmo a string
188 *
189 * @param string $string a string to process
190 * @return string the processed string
191 */
192 private function remove_all_non_caps( $string ) {
193 return strtolower( preg_replace( '/[^A-Z0-9]/', '', $string ) );
194 }
195
196 /**
197 * Converts and transliterates a string to ascii
198 *
199 * @param string $string a string to transliterate
200 * @return string the transliterated string
201 */
202 private function convert( $string ) {
203 // I don't want to mess with encodings, so we'll just auto-detect
204 $encoding = mb_detect_encoding( $string );
205 // Note: if PHP will throw a notice if the string contains characters that cannot
206 // be transliterated. These will most likely be certain symbols and characters
207 // from many different Asian languages.
208 return iconv( $encoding, 'ASCII//TRANSLIT', $string );
209 }
210
211 /**
212 * Runs the filter rules
213 *
214 * @todo Refactor this
215 *
216 * @param string $value the value string (haystack)
217 * @param string $query the query string (needle)
218 * @param mixed $match_on the search flags, so constants or integers
219 * @param bool $fold_diacritics whether or not to transliterate to ascii
220 * @return array an array that is score and then the rule matched
221 */
222 private function filter_item( $value, $query, $match_on, $fold_diacritics ) {
223 $query = strtolower( $query );
224
225 if ( $fold_diacritics ) {
226 $value = self::convert( $value );
227 }
228
229 // Pre-filter anything that doesn't contain all of the characters
230 $arr = array_unique( str_split( $query ) );
231 $arr2 = array_unique( str_split( strtolower( $value ) ) );
232 if ( count( array_diff( $arr, $arr2 ) ) > 0 ) {
233 return [ 0, 'None' ];
234 }
235
236 // Item starts with $query
237 if ( ( $match_on & MATCH_STARTSWITH ) && ( 0 === strpos( strtolower( $value ), $query ) ) ) {
238 $score = 100.0 - ( strlen( $value ) / strlen( $query ) );
239 return [ $score, MATCH_STARTSWITH ];
240 }
241
242 // $query matches capitalised letters in item, e.g. of = OmniFocus
243 if ( $match_on & MATCH_CAPITALS ) {
244 $initials = self::remove_all_non_caps( $value );
245 if ( false !== strpos( $initials, $query ) ) {
246 $score = 100.0 - ( strlen( $initials ) / strlen( $query ) );
247 return [ $score, MATCH_CAPITALS ];
248 }
249 }
250
251 // split the item into "atoms", i.e. words separated by spaces or other non-word characters
252 if ( $match_on & MATCH_ATOM || $match_on & MATCH_INITIALS_CONTAIN || $match_on & MATCH_INITIALS_STARTSWITH ) {
253 // Split into atoms, note: if you are not transliterating, then this will split on accented characters too
254 $atoms = preg_split('/[^a-zA-Z0-9]/', strtolower($value), -1, PREG_SPLIT_NO_EMPTY);
255
256 // initials of the atoms
257 $initials = $atoms;
258 array_walk( $initials, function ( &$value, $key ) { $value = substr( $value, 0, 1 ); } );
259 $initials = implode( '', $initials );
260 }
261
262 if ( $match_on & MATCH_ATOM ) {
263 // is `$query` one of the atoms in item? Similar to substring, but $scores more highly, as it's
264 // a word within the item
265 if ( in_array( $query, $atoms ) ) {
266 $score = 100.0 - ( strlen( $value ) / strlen( $query ) );
267 return [ $score, MATCH_ATOM ];
268 }
269 }
270 # `$query` matches start (or all) of the initials of the
271 # atoms, e.g. ``himym`` matches "How I Met Your Mother"
272 # *and* "how i met your mother" (the ``capitals`` rule only
273 # matches the former)
274 if ( ( $match_on & MATCH_INITIALS_STARTSWITH ) && ( 0 === strpos( $initials, $query ) ) ) {
275 $score = 100.0 - ( strlen( $initials ) / strlen( $query ) );
276 return [ $score, MATCH_INITIALS_STARTSWITH ];
277 } else if ($match_on & MATCH_INITIALS_CONTAIN && (false !== strpos( $initials, $query ) ) ) {
278 # `query` is a substring of initials, e.g. ``doh`` matches
279 # "The Dukes of Hazzard"
280 $score = 95.0 - (strlen($initials) / strlen($query));
281 return [$score, MATCH_INITIALS_CONTAIN];
282 }
283
284 # `query` is a substring of item
285 if ( ( $match_on & MATCH_SUBSTRING ) && ( false !== strpos( strtolower( $value ), $query ) ) ) {
286 $score = 90.0 - ( strlen( $value ) / strlen( $query ) );
287 return [ $score, MATCH_SUBSTRING ];
288 }
289
290 # finally, assign a $score based on how close together the
291 # characters in `query` are in item.
292 /**
293 * @todo Rework the scoring on this part
294 */
295 if ( $match_on & MATCH_ALLCHARS ) {
296 foreach( str_split( $query ) as $character ) :
297 $position[] = strpos( $value, $character );
298 endforeach;
299 $divisor = ( ( 1 + reset( $position ) ) * ( ( abs( end( $position ) - reset( $position ) ) ) ) );
300 // protect from divide by 0 warnings
301 if ( $divisor == 0 ) {
302 $divisor = 1;
303 }
304 $score = 100.0 / $divisor;
305 $score = $score * ( strlen($query) / strlen($value) );
306 return [$score, MATCH_ALLCHARS ];
307 }
308
309 // Nothing matched, so return a score of 0
310 return [0, 'None'];
311 }
312
313 }
314
315