function sanitize_url($url) {
$strip = [
'fbclid', 'fb_', // Facebook
'ga_*', 'gclid', 'gclsrc', 'gs_l', // google ads
'ref',
'mc_eid',
'igshid',
'twclid',
'msclkid',
'trk', 'trkCampaign', // amazon
'utm_*', 'nr_email_referer', // utm
'itm_*', // itm
'mc_*',
'yclid', '_openstat', // yandex
'sc_campaign', 'sc_channel', 'sc_content', 'sc_medium', 'sc_outcome', 'sc_geo', 'sc_country', // Campaign tracking (sc)
];
$parsed = parse_url($url);
if ($parsed === false OR empty($parsed['query'])) {
return $url;
}
parse_str($parsed['query'], $params);
foreach (array_keys($params) as $key) {
foreach ($strip as $rule) {
if (str_ends_with($rule, '*') && str_starts_with($key, substr($rule, 0, -1))) {
unset($params[$key]);
break;
}
if ($key === $rule) {
unset($params[$key]);
break;
}
}
}
// rebuilt the URL with remaining params
$parsed['query'] = empty($params) ? null : http_build_query($params);
$url = isset($parsed['scheme']) ? $parsed['scheme'] . '://' : '';
$url .= $parsed['host'] ?? '';
$url .= isset($parsed['port']) ? ':' . $parsed['port'] : '';
$url .= $parsed['path'] ?? '';
$url .= isset($parsed['query']) ? '?' . $parsed['query'] : '';
$url .= isset($parsed['fragment']) ? '#' . $parsed['fragment'] : '';
return $url;
}