Editorial Workflow — SearchLink

Run Python Script ?
Source Code
#!/usr/bin/env python

import re
import urllib
import requests
import random
import operator
import editor
import workflow

_VERSION_ = 1.6

class SearchLink(object):
    """
    Based on Brett Terpstra's SearchLink (https://gist.github.com/ttscoff/3900158),
    ported over to Python. The reason to port it over to Python is to be used with
    Editorial for iPad (http://omz-software.com/editorial/), which can run Python.
    """
    def __init__(self,
                 inline=False,
                 include_titles=False,
                 prefix_random=True):
        super(SearchLink, self).__init__()

        self.inline = inline
        self.include_titles = include_titles
        self.prefix_random = prefix_random

        self.references = {}

        prefix = "{0:04}-".format(random.randint(0, 9999)) if self.prefix_random else ''
        self.reference_id = (prefix + "{0:03}".format(i) for i in xrange(1, 1000000))

        self.search_type_func = {"a": lambda term: self._google("site:amazon.com " + term),
                                 "g": self._google,
                                 "wiki": self._wiki,
                                 "def": lambda term: self._google("define " + term, True),
                                 "mas": lambda term: self._itunes("macSoftware", term),
                                 "masd": lambda term: self._itunes("macSoftware", term, True),
                                 "itu": lambda term: self._itunes('iPadSoftware', term),
                                 "itud": lambda term: self._itunes('iPadSoftware', term, True),
                                 "s": lambda term: self._google("(software OR app OR mac) " + term),
                                 "isong": lambda term: self._itunes("song", term),
                                 "iart": lambda term: self._itunes("musicArtist", term),
                                 "ialb": lambda term: self._itunes("album", term),
                                 "lsong": lambda term: self._lastfm("track", term),
                                 "lart": lambda term: self._lastfm("artist", term)}


    def replace(self, text):
        text = re.sub("\\[(.*?)\\]\\((.+?)\\)", self._replace_search_links, text)

        if self.inline:
            return text
        else:
            sorted_refs = sorted(self.references.iteritems(), key=operator.itemgetter(1))
            ref_text = '\n'.join(map(lambda (key, value): value + ": " + key, sorted_refs))

            return text + '\n' + ref_text


    def _replace_search_links(self, search_link):
        link_text = search_link.groups()[0]
        link_info = search_link.groups()[1]

        results = self._get_search_link_info(link_text, link_info)

        # skip over links that are not in SearchLinks syntax
        if results is None:
            return search_link.group(0)

        search_type, search_term = results

        try:
            url, title = self.search_type_func[search_type](search_term)
        except KeyError:
            # my attempt at trying to match URLs (http://stackoverflow.com/a/7160778)
            match = re.search("^((http|ftp)s?://)?"
                              "(([A-Za-z0-9]([A-Za-z0-9-]{0,61}[A-Za-z0-9])?\\.)+"
                              "([A-Za-z]{2,6}\\.?|[A-Za-z0-9-]{2,}\\.?)|"
                              "\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3})"
                              "(/?|[/?]\\S+)$", search_type)

            # not a URL so just return it
            if not match:
                return search_link.group(0)

            url, title = self._google("site:{0} {1}".format(search_type, search_term))

        # if link_text is empty, then replace it with the title of the link
        link_text = title if link_text == '' and title else link_text

        return "[{0}]{1}".format(link_text, self._create_link_reference(url, title))


    def _get_search_link_info(self, link_text, link_info):
        """
        Parses the a SearchLink's information to figure out what type
        and search terms should be used.
        """

        matches = re.search("^!([^ ]+?)( \"(.*?)\")?$", link_info)
        if matches is not None:
            search_type = matches.group(1)
            search_term = matches.group(3)

            if search_term is None:
                search_term = link_text
            return search_type, search_term
        else:
            return None


    def _create_link_reference(self, url, title):
        """
        Based on the given parameters, link references will be inline
        and will have titles.
        """
        if self.include_titles:
            link_reference = u'{url} "{title}"'.format(url=url, title=title)
        else:
            link_reference = u'{url}'.format(url=url)

        if self.inline:
            return '({0})'.format(link_reference)
        else:
            if link_reference not in self.references:
                reference_id = '[{0}]'.format(self._get_reference_id())
                self.references[link_reference] = reference_id
                return reference_id
            else:
                return self.references[link_reference]


    def _get_reference_id(self):
        return next(self.reference_id)


    def _wiki(self, terms):
        url = "http://en.wikipedia.org/w/api.php?action=query&format=json&prop=info&inprop=url&titles={terms}"
        url = url.format(terms=urllib.quote_plus(terms))

        response = requests.request('GET', url).json()

        result = response['query']['pages']
        if result:
            return result.values()[0]['fullurl'], result.values()[0]['title']

        return None, None


    def _itunes(self, type, terms, dev=False):
        url = "http://itunes.apple.com/search?term={terms}&entity={type}"
        url = url.format(terms=urllib.quote_plus(terms), type=type)

        response = requests.request('GET', url).json()

        if response['resultCount']:
            result = response['results'][0]

            if type in ['macSoftware', 'iPadSoftware']:
                output_url = result['sellerUrl'] if dev and result.get('sellerUrl') else result['trackViewUrl']
                title = result['trackName']

                return output_url, title
            elif type in ['musicArtist', 'song', 'album']:
                result_type = result['wrapperType']

                if result_type == 'track':
                    output_url = result['trackViewUrl']
                    title = result['trackName'] + " by " + result['artistName']
                elif result_type == 'collection':
                    output_url = result['collectionViewUrl']
                    title = result['collectionName'] + " by " + result['artistName']
                elif result_type == 'artist':
                    output_url = result['artistLinkUrl']
                    title = result['artistName']

                return output_url, title

        return None, None


    def _lastfm(self, type, terms):
        url = "http://ws.audioscrobbler.com/2.0/?method={type}.search&{type}={terms}&api_key=2f3407ec29601f97ca8a18ff580477de&format=json"
        url = url.format(type=type, terms=urllib.quote_plus(terms))

        response = requests.request('GET', url).json()

        if response['results']:
            if type == 'track':
                result = response['results']['trackmatches']['track'][0]
                output_url = result['url']
                title = result['name'] + " by " + result['artist']

            elif type == 'artist':
                result = response['results']['artistmatches']['artist'][0]
                output_url = result['url']
                title = result['name']

            return output_url, title

        return None, None


    def _google(self, terms, define=False):
        url = "http://ajax.googleapis.com/ajax/services/search/web?v=1.0&filter=1&rsz=small&q={0}"
        url = url.format(urllib.quote_plus(terms))

        response = requests.request('GET', url).json()

        if response['responseData']:
            result = response['responseData']['results'][0]

            if not result: return None, None
            output_url = result['unescapedUrl']

            if define and "dictionary" in output_url:
                title = re.sub("</?.*?>", "", result['content'])
            else:
                title = result['titleNoFormatting']

            return output_url, title

        return None, None 


input = editor.get_text()
output = SearchLink(include_titles=workflow.get_variable("include_titles") == "True", 
                    inline=workflow.get_variable("inline") == "True",
                    prefix_random=workflow.get_variable("prefix_random") == "True").replace(input)
                    
editor.replace_text(0, len(input), output)
Editorial Workflows