WordPress Google Analytics Content Filter

March 17, 2011

I’ve been working on a site recently that has a lot of PDFs that the content owners link to and I’ve been wanting to track the number of views that each PDF has. To accomplish that I’ve been using a feature of google analytics that requires you to add some extra info to links so you can track how many clicks a PDF gets.

Up until now I’ve been adding that extra information by hand because there haven’t been a lot of links but I figured there was a ripe oppurtunity to do some simple automation. I’ve created the following content filter that will automatically add the google analytics bit to each link in the content, you just have to put it in your theme’s function.php file. I also added a bit that tracks outbound links as well just because that might be handy info to have as well.

// google link filter
add_filter( "the_content", "google_external_link_filter" );

function google_external_link_filter($content)
{
  // load the content in to a dom object
  $dom = new domDocument;
  $dom->loadHTML($content);
	
  // find the A tags
  $xpath = new DOMXPath($dom);
  $links = $xpath->evaluate("//a");
	
  // loop through reach link and check if we need to add the google stuff
  foreach ($links as $tag)
  {
    // get link details
   $linkURL = $tag->getAttribute('href');
   $linkExtension = pathinfo($linkURL, PATHINFO_EXTENSION);
   $linkHost = parse_url($linkURL, PHP_URL_HOST);
		
   // get the site host
   $siteHost = parse_url(get_bloginfo('siteurl'), PHP_URL_HOST);	
		
   // check if the link has an extension or goes to another site		
   if(!empty($linkExtension) || $linkHost != $siteHost)
   {
     // if the link goes to a file
     if(!empty($linkExtension))
     {
       $linkFileName = pathinfo($linkURL, PATHINFO_FILENAME);
       $tag->removeAttribute('onclick');
       $tag->setAttribute("onclick", "javascript: _gaq.push(['_trackPageview', '/file/". $linkFileName . '.' . $linkExtension . "']);");
     }
     // if it goes to an external site
     elseif($linkHost != $siteHost)
     {
        // strip http://
        $cleanURL = str_replace('http://','',$linkURL);
			
        $tag->removeAttribute('onclick');
        $tag->setAttribute("onclick", "javascript: _gaq.push(['_trackPageview', '/external/". $cleanURL . "']);");
      }
    }		
  }
  // update the content with the changes
  $content = $dom->saveHTML();
	
  return $content;
}

If you use it and find a bug or a better way of doing something let me know and I’ll update it.