3 # Copyright (C) 2010 The Android Open Source Project
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
18 Creates the list of search engines
20 The created list is placed in the res/values-<locale> directory. Also updates
21 res/values/all_search_engines.xml if required with new data.
23 Usage: get_search_engines.py
25 Copyright (C) 2010 The Android Open Source Project
32 from xml.dom import minidom
34 # Locales to generate search engine lists for
35 locales = ["cs-CZ", "da-DK", "de-AT", "de-CH", "de-DE", "el-GR", "en-AU",
36 "en-GB", "en-IE", "en-NZ", "en-SG", "en-ZA", "es-ES", "fr-BE", "fr-FR",
37 "it-IT", "ja-JP", "ko-KR", "nb-NO", "nl-BE", "nl-NL", "pl-PL", "pt-PT",
38 "pt-BR", "ru-RU", "sv-SE", "tr-TR", "zh-CN", "zh-HK", "zh-MO", "zh-TW"]
40 google_data = ["google", "Google", "google.com",
41 "http://www.google.com/favicon.ico",
42 "http://www.google.com/m?hl={language}&ie={inputEncoding}&source=android-browser&q={searchTerms}",
44 "http://www.google.com/complete/search?hl={language}&json=true&q={searchTerms}"]
46 class SearchEngineManager(object):
47 """Manages list of search engines and creates locale specific lists.
49 The main method useful for the caller is generateListForLocale(), which
50 creates a locale specific search_engines.xml file suitable for use by the
51 Android WebSearchProvider implementation.
55 """Inits SearchEngineManager with relevant search engine data.
57 The search engine data is downloaded from the Chrome source repository.
59 self.chrome_data = urllib.urlopen(
60 'http://src.chromium.org/viewvc/chrome/trunk/src/chrome/'
61 'browser/search_engines/template_url_prepopulate_data.cc').read()
62 if self.chrome_data.lower().find('repository not found') != -1:
63 print 'Unable to get Chrome source data for search engine list.\nExiting.'
66 self.resdir = os.path.normpath(os.path.join(sys.path[0], '../res'))
68 self.all_engines = set()
70 def getXmlString(self, str):
71 """Returns an XML-safe string for the given string.
73 Given a string from the search engine data structure, convert it to a
74 string suitable to write to our XML data file by stripping away NULLs,
75 unwanted quotes, wide-string declarations (L"") and replacing C-style
76 unicode characters with XML equivalents.
79 if str.upper() == 'NULL':
82 if str.startswith('L"'):
84 if str.startswith('@') or str.startswith('?'):
88 str = str.replace('&', '&').replace('<', '<').replace('>', '>')
89 str = str.replace('"', '"').replace('\'', ''')
90 str = re.sub(r'\\x([a-fA-F0-9]+)', r'&#x\1;', str)
94 def getEngineData(self, name):
95 """Returns an array of strings describing the specified search engine.
97 The returned strings are in the same order as in the Chrome source data file
98 except that the internal name of the search engine is inserted at the
99 beginning of the list.
105 # Find the first occurance of this search engine name in the form
106 # " <name> =" in the chrome data file.
107 re_exp = '\s' + name + '\s*='
108 search_obj = re.search(re_exp, self.chrome_data)
110 print ('Unable to find data for search engine ' + name +
111 '. Please check the chrome data file for format changes.')
114 # Extract the struct declaration between the curly braces.
115 start_pos = self.chrome_data.find('{', search_obj.start()) + 1;
116 end_pos = self.chrome_data.find('};', start_pos);
117 engine_data_str = self.chrome_data[start_pos:end_pos]
119 # Remove c++ style '//' comments at the ends of each line
120 engine_data_lines = engine_data_str.split('\n')
122 for line in engine_data_lines:
123 start_pos = line.find(' // ')
125 line = line[:start_pos]
126 engine_data_str = engine_data_str + line + '\n'
128 # Join multiple line strings into a single string.
129 engine_data_str = re.sub('\"\s+\"', '', engine_data_str)
130 engine_data_str = re.sub('\"\s+L\"', '', engine_data_str)
131 engine_data_str = engine_data_str.replace('"L"', '')
133 engine_data = engine_data_str.split(',')
134 for i in range(len(engine_data)):
135 engine_data[i] = self.getXmlString(engine_data[i])
137 # If the last element was an empty string (due to an extra comma at the
139 if not engine_data[len(engine_data) - 1]:
142 engine_data.insert(0, name)
146 def getSearchEnginesForCountry(self, country):
147 """Returns the list of search engine names for the given country.
149 The data comes from the Chrome data file.
151 # The Chrome data file has an array defined with the name 'engines_XX'
152 # where XX = country.
153 pos = self.chrome_data.find('engines_' + country)
155 print ('Unable to find search engine data for country ' + country + '.')
158 # Extract the text between the curly braces for this array declaration
159 engines_start = self.chrome_data.find('{', pos) + 1;
160 engines_end = self.chrome_data.find('}', engines_start);
161 engines_str = self.chrome_data[engines_start:engines_end]
163 # Remove embedded /**/ style comments, white spaces, address-of operators
164 # and the trailing comma if any.
165 engines_str = re.sub('\/\*.+\*\/', '', engines_str)
166 engines_str = re.sub('\s+', '', engines_str)
167 engines_str = engines_str.replace('&','')
168 engines_str = engines_str.rstrip(',')
170 # Split the array into it's elements
171 engines = engines_str.split(',')
175 def writeAllEngines(self):
176 """Writes all search engines to the all_search_engines.xml file.
179 all_search_engines_path = os.path.join(self.resdir, 'values/all_search_engines.xml')
183 for engine_name in self.all_engines:
184 engine_data = self.getEngineData(engine_name)
185 text.append(' <string-array name="%s" translatable="false">\n' % (engine_data[0]))
186 for i in range(1, 7):
187 text.append(' <item>%s</item>\n' % (engine_data[i]))
188 text.append(' </string-array>\n')
189 print engine_data[1] + " added to all_search_engines.xml"
191 self.generateXmlFromTemplate(os.path.join(sys.path[0], 'all_search_engines.template.xml'),
192 all_search_engines_path, text)
194 def generateDefaultList(self):
195 self.writeEngineList(os.path.join(self.resdir, 'values'), "default")
197 def generateListForLocale(self, locale):
198 """Creates a new locale specific search_engines.xml file.
200 The new file contains search engines specific to that country. If required
201 this function updates all_search_engines.xml file with any new search
202 engine data necessary.
204 separator_pos = locale.find('-')
205 if separator_pos == -1:
206 print ('Locale must be of format <language>-<country>. For e.g.'
207 ' "es-US" or "en-GB"')
210 language = locale[0:separator_pos]
211 country = locale[separator_pos + 1:].upper()
212 dir_path = os.path.join(self.resdir, 'values-' + language + '-r' + country)
214 self.writeEngineList(dir_path, country)
216 def writeEngineList(self, dir_path, country):
217 if os.path.exists(dir_path) and not os.path.isdir(dir_path):
218 print "File exists in output directory path " + dir_path + ". Please remove it and try again."
221 engines = self.getSearchEnginesForCountry(country)
224 for engine in engines:
225 self.all_engines.add(engine)
227 # Create the locale specific search_engines.xml file. Each
228 # search_engines.xml file has a hardcoded list of 7 items. If there are less
229 # than 7 search engines for this country, the remaining items are marked as
232 text.append(' <string-array name="search_engines" translatable="false">\n');
233 for engine in engines:
234 engine_data = self.getEngineData(engine)
235 name = engine_data[0]
236 text.append(' <item>%s</item>\n' % (name))
237 text.append(' </string-array>\n');
239 self.generateXmlFromTemplate(os.path.join(sys.path[0], 'search_engines.template.xml'),
240 os.path.join(dir_path, 'search_engines.xml'),
243 def generateXmlFromTemplate(self, template_path, out_path, text):
244 # Load the template file and insert the new contents before the last line.
245 template_text = open(template_path).read()
246 pos = template_text.rfind('\n', 0, -2) + 1
247 contents = template_text[0:pos] + ''.join(text) + template_text[pos:]
249 # Make sure what we have created is valid XML :) No need to check for errors
250 # as the script will terminate with an exception if the XML was malformed.
251 engines_dom = minidom.parseString(contents)
253 dir_path = os.path.dirname(out_path)
254 if not os.path.exists(dir_path):
255 os.makedirs(dir_path)
256 print 'Created directory ' + dir_path
257 file = open(out_path, 'w')
260 print 'Wrote ' + out_path
262 if __name__ == "__main__":
263 manager = SearchEngineManager()
264 manager.generateDefaultList()
265 for locale in locales:
266 manager.generateListForLocale(locale)
267 manager.writeAllEngines()