tools/get_search_engines.py

   1 #!/usr/bin/python2.4
   2 #
   3 # Copyright (C) 2010 The Android Open Source Project
   4 #
   5 # Licensed under the Apache License, Version 2.0 (the "License");
   6 # you may not use this file except in compliance with the License.
   7 # You may obtain a copy of the License at
   8 #
   9 #      http://www.apache.org/licenses/LICENSE-2.0
  10 #
  11 # Unless required by applicable law or agreed to in writing, software
  12 # distributed under the License is distributed on an "AS IS" BASIS,
  13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14 # See the License for the specific language governing permissions and
  15 # limitations under the License.
  16 #
  17 """
  18 Creates the list of search engines
  19
  20 The created list is placed in the res/values-<locale> directory. Also updates
  21 res/values/all_search_engines.xml if required with new data.
  22
  23 Usage: get_search_engines.py
  24
  25 Copyright (C) 2010 The Android Open Source Project
  26 """
  27
  28 import os
  29 import re
  30 import sys
  31 import urllib
  32 from xml.dom import minidom
  33
  34 # Locales to generate search engine lists for
  35 locales = ["cs-CZ", "da-DK", "de-AT", "de-CH", "de-DE", "el-GR", "en-AU",
  36     "en-GB", "en-IE", "en-NZ", "en-SG", "en-ZA", "es-ES", "fr-BE", "fr-FR",
  37     "it-IT", "ja-JP", "ko-KR", "nb-NO", "nl-BE", "nl-NL", "pl-PL", "pt-PT",
  38     "pt-BR", "ru-RU", "sv-SE", "tr-TR", "zh-CN", "zh-HK", "zh-MO", "zh-TW"]
  39
  40 google_data = ["google", "Google", "google.com",
  41   "http://www.google.com/favicon.ico",
  42   "http://www.google.com/m?hl={language}&amp;ie={inputEncoding}&amp;source=android-browser&amp;q={searchTerms}",
  43   "UTF-8",
  44   "http://www.google.com/complete/search?hl={language}&amp;json=true&amp;q={searchTerms}"]
  45
  46 class SearchEngineManager(object):
  47   """Manages list of search engines and creates locale specific lists.
  48
  49   The main method useful for the caller is generateListForLocale(), which
  50   creates a locale specific search_engines.xml file suitable for use by the
  51   Android WebSearchProvider implementation.
  52   """
  53
  54   def __init__(self):
  55     """Inits SearchEngineManager with relevant search engine data.
  56
  57     The search engine data is downloaded from the Chrome source repository.
  58     """
  59     self.chrome_data = urllib.urlopen(
  60         'http://src.chromium.org/viewvc/chrome/trunk/src/chrome/'
  61         'browser/search_engines/template_url_prepopulate_data.cc').read()
  62     if self.chrome_data.lower().find('repository not found') != -1:
  63       print 'Unable to get Chrome source data for search engine list.\nExiting.'
  64       sys.exit(2)
  65
  66     self.resdir = os.path.normpath(os.path.join(sys.path[0], '../res'))
  67
  68     self.all_engines = set()
  69
  70   def getXmlString(self, str):
  71     """Returns an XML-safe string for the given string.
  72
  73     Given a string from the search engine data structure, convert it to a
  74     string suitable to write to our XML data file by stripping away NULLs,
  75     unwanted quotes, wide-string declarations (L"") and replacing C-style
  76     unicode characters with XML equivalents.
  77     """
  78     str = str.strip()
  79     if str.upper() == 'NULL':
  80       return ''
  81
  82     if str.startswith('L"'):
  83       str = str[2:]
  84     if str.startswith('@') or str.startswith('?'):
  85       str = '\\' + str
  86
  87     str = str.strip('"')
  88     str = str.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
  89     str = str.replace('"', '&quot;').replace('\'', '&apos;')
  90     str = re.sub(r'\\x([a-fA-F0-9]+)', r'&#x\1;', str)
  91
  92     return str
  93
  94   def getEngineData(self, name):
  95     """Returns an array of strings describing the specified search engine.
  96
  97     The returned strings are in the same order as in the Chrome source data file
  98     except that the internal name of the search engine is inserted at the
  99     beginning of the list.
 100     """
 101
 102     if name == "google":
 103       return google_data
 104
 105     # Find the first occurance of this search engine name in the form
 106     # " <name> =" in the chrome data file.
 107     re_exp = '\s' + name + '\s*='
 108     search_obj = re.search(re_exp, self.chrome_data)
 109     if not search_obj:
 110       print ('Unable to find data for search engine ' + name +
 111              '. Please check the chrome data file for format changes.')
 112       return None
 113
 114     # Extract the struct declaration between the curly braces.
 115     start_pos = self.chrome_data.find('{', search_obj.start()) + 1;
 116     end_pos = self.chrome_data.find('};', start_pos);
 117     engine_data_str = self.chrome_data[start_pos:end_pos]
 118
 119     # Remove c++ style '//' comments at the ends of each line
 120     engine_data_lines = engine_data_str.split('\n')
 121     engine_data_str = ""
 122     for line in engine_data_lines:
 123         start_pos = line.find(' // ')
 124         if start_pos != -1:
 125             line = line[:start_pos]
 126         engine_data_str = engine_data_str + line + '\n'
 127
 128     # Join multiple line strings into a single string.
 129     engine_data_str = re.sub('\"\s+\"', '', engine_data_str)
 130     engine_data_str = re.sub('\"\s+L\"', '', engine_data_str)
 131     engine_data_str = engine_data_str.replace('"L"', '')
 132
 133     engine_data = engine_data_str.split(',')
 134     for i in range(len(engine_data)):
 135       engine_data[i] = self.getXmlString(engine_data[i])
 136
 137     # If the last element was an empty string (due to an extra comma at the
 138     # end), ignore it.
 139     if not engine_data[len(engine_data) - 1]:
 140       engine_data.pop()
 141
 142     engine_data.insert(0, name)
 143
 144     return engine_data
 145
 146   def getSearchEnginesForCountry(self, country):
 147     """Returns the list of search engine names for the given country.
 148
 149     The data comes from the Chrome data file.
 150     """
 151     # The Chrome data file has an array defined with the name 'engines_XX'
 152     # where XX = country.
 153     pos = self.chrome_data.find('engines_' + country)
 154     if pos == -1:
 155       print ('Unable to find search engine data for country ' + country + '.')
 156       return
 157
 158     # Extract the text between the curly braces for this array declaration
 159     engines_start = self.chrome_data.find('{', pos) + 1;
 160     engines_end = self.chrome_data.find('}', engines_start);
 161     engines_str = self.chrome_data[engines_start:engines_end]
 162
 163     # Remove embedded /**/ style comments, white spaces, address-of operators
 164     # and the trailing comma if any.
 165     engines_str = re.sub('\/\*.+\*\/', '', engines_str)
 166     engines_str = re.sub('\s+', '', engines_str)
 167     engines_str = engines_str.replace('&','')
 168     engines_str = engines_str.rstrip(',')
 169
 170     # Split the array into it's elements
 171     engines = engines_str.split(',')
 172
 173     return engines
 174
 175   def writeAllEngines(self):
 176     """Writes all search engines to the all_search_engines.xml file.
 177     """
 178
 179     all_search_engines_path = os.path.join(self.resdir, 'values/all_search_engines.xml')
 180
 181     text = []
 182
 183     for engine_name in self.all_engines:
 184       engine_data = self.getEngineData(engine_name)
 185       text.append('  <string-array name="%s" translatable="false">\n' % (engine_data[0]))
 186       for i in range(1, 7):
 187         text.append('    <item>%s</item>\n' % (engine_data[i]))
 188       text.append('  </string-array>\n')
 189       print engine_data[1] + " added to all_search_engines.xml"
 190
 191     self.generateXmlFromTemplate(os.path.join(sys.path[0], 'all_search_engines.template.xml'),
 192         all_search_engines_path, text)
 193
 194   def generateDefaultList(self):
 195     self.writeEngineList(os.path.join(self.resdir, 'values'), "default")
 196
 197   def generateListForLocale(self, locale):
 198     """Creates a new locale specific search_engines.xml file.
 199
 200     The new file contains search engines specific to that country. If required
 201     this function updates all_search_engines.xml file with any new search
 202     engine data necessary.
 203     """
 204     separator_pos = locale.find('-')
 205     if separator_pos == -1:
 206       print ('Locale must be of format <language>-<country>. For e.g.'
 207              ' "es-US" or "en-GB"')
 208       return
 209
 210     language = locale[0:separator_pos]
 211     country = locale[separator_pos + 1:].upper()
 212     dir_path = os.path.join(self.resdir, 'values-' + language + '-r' + country)
 213
 214     self.writeEngineList(dir_path, country)
 215
 216   def writeEngineList(self, dir_path, country):
 217     if os.path.exists(dir_path) and not os.path.isdir(dir_path):
 218       print "File exists in output directory path " + dir_path + ". Please remove it and try again."
 219       return
 220
 221     engines = self.getSearchEnginesForCountry(country)
 222     if not engines:
 223       return
 224     for engine in engines:
 225       self.all_engines.add(engine)
 226
 227     # Create the locale specific search_engines.xml file. Each
 228     # search_engines.xml file has a hardcoded list of 7 items. If there are less
 229     # than 7 search engines for this country, the remaining items are marked as
 230     # enabled=false.
 231     text = []
 232     text.append('  <string-array name="search_engines" translatable="false">\n');
 233     for engine in engines:
 234       engine_data = self.getEngineData(engine)
 235       name = engine_data[0]
 236       text.append('    <item>%s</item>\n' % (name))
 237     text.append('  </string-array>\n');
 238
 239     self.generateXmlFromTemplate(os.path.join(sys.path[0], 'search_engines.template.xml'),
 240         os.path.join(dir_path, 'search_engines.xml'),
 241         text)
 242
 243   def generateXmlFromTemplate(self, template_path, out_path, text):
 244     # Load the template file and insert the new contents before the last line.
 245     template_text = open(template_path).read()
 246     pos = template_text.rfind('\n', 0, -2) + 1
 247     contents = template_text[0:pos] + ''.join(text) + template_text[pos:]
 248
 249     # Make sure what we have created is valid XML :) No need to check for errors
 250     # as the script will terminate with an exception if the XML was malformed.
 251     engines_dom = minidom.parseString(contents)
 252
 253     dir_path = os.path.dirname(out_path)
 254     if not os.path.exists(dir_path):
 255       os.makedirs(dir_path)
 256       print 'Created directory ' + dir_path
 257     file = open(out_path, 'w')
 258     file.write(contents)
 259     file.close()
 260     print 'Wrote ' + out_path
 261
 262 if __name__ == "__main__":
 263   manager = SearchEngineManager()
 264   manager.generateDefaultList()
 265   for locale in locales:
 266     manager.generateListForLocale(locale)
 267   manager.writeAllEngines()
 268