Blame
Date:
Mon Oct 29 20:33:23 2012 UTC
Message:
add salix 14.0 x86_64 json DB example
001
2012-10-21
fredg
#!/usr/bin/env python
002
2012-10-21
fredg
# -*- coding: utf-8 -*-
003
2012-10-21
fredg
#
004
2012-10-21
fredg
# pkgtxt2db.py
005
2012-10-21
fredg
#
006
2012-10-21
fredg
# Copyright 2012 Frédéric Galusik <fredg~at~salixos~dot~org>
007
2012-10-21
fredg
#
008
2012-10-21
fredg
#
009
2012-10-21
fredg
# License: BSD Revised
010
2012-10-21
fredg
#
011
2012-10-21
fredg
# Convert the Slackware/Salix PACKAGES.TXT file to some various database
012
2012-10-21
fredg
# formats : CSV, json, xml
013
2012-10-21
fredg
#
014
2012-10-21
fredg
015
2012-10-21
fredg
016
2012-10-21
fredg
import os
017
2012-10-29
fredg
import sys
018
2012-10-21
fredg
import urllib2
019
2012-10-21
fredg
import gzip
020
2012-10-21
fredg
import re
021
2012-10-29
fredg
import argparse
022
2012-10-21
fredg
023
2012-10-29
fredg
024
2012-10-21
fredg
# Program information
025
2012-10-21
fredg
my_url = 'http://www.salixos.org/wiki/index.php/Pkgtxt2db'
026
2012-10-21
fredg
my_name = 'pkgtxt2db'
027
2012-10-21
fredg
my_version = '0.0'
028
2012-10-21
fredg
029
2012-10-21
fredg
# initialise the pkg dictionnary with empty values
030
2012-10-21
fredg
fields = ['name',
031
2012-10-21
fredg
'version',
032
2012-10-21
fredg
'arch',
033
2012-10-21
fredg
'release'
034
2012-10-21
fredg
'location',
035
2012-10-21
fredg
'deps',
036
2012-10-21
fredg
'sizec',
037
2012-10-21
fredg
'sizeu',
038
2012-10-21
fredg
'slackdesc']
039
2012-10-21
fredg
040
2012-10-29
fredg
# Parse the CLI options
041
2012-10-29
fredg
parser = argparse.ArgumentParser(
042
2012-10-29
fredg
description='Convert PACKAGES.TXT to DB',
043
2012-10-29
fredg
epilog="ie Pkgtxt2db -t salix -a x86_64 -r 14.0 -o json")
044
2012-10-29
fredg
parser.add_argument('-u', '--update', action="store_true",
045
2012-10-29
fredg
default=False,
046
2012-10-29
fredg
help='Download/update the PACKAGES.TXT file')
047
2012-10-29
fredg
parser.add_argument('-t', '--target', action="store",
048
2012-10-29
fredg
dest='target', default='salix',
049
2012-10-29
fredg
help='Choose the O.S.: slackware or salix (default) ')
050
2012-10-29
fredg
parser.add_argument('--repo', action="store",
051
2012-10-29
fredg
dest='repo', default='i486',
052
2012-10-29
fredg
help='Choose the arch repo: x86_64 or i486 (default)')
053
2012-10-29
fredg
parser.add_argument('-e', '--expa', action="store",
054
2012-10-29
fredg
dest='expa', default='/',
055
2012-10-29
fredg
help='Choose the slackware extra/patches')
056
2012-10-29
fredg
parser.add_argument('-r', '--release', action="store",
057
2012-10-29
fredg
dest='release', default='14.0',
058
2012-10-29
fredg
help='Choose the release: 13.0 to 14.0 (default)')
059
2012-10-29
fredg
parser.add_argument('-c', '--convert', action="store",
060
2012-10-29
fredg
dest='convert',
061
2012-10-29
fredg
help='Choose the DB format: xml, json, csv')
062
2012-10-29
fredg
parser.add_argument('-o', '--output', action="store",
063
2012-10-29
fredg
dest='output', default='packages',
064
2012-10-29
fredg
help='Choose the name of your DB file')
065
2012-10-29
fredg
args = parser.parse_args()
066
2012-10-21
fredg
067
2012-10-29
fredg
# vars
068
2012-10-29
fredg
mirror = 'http://salix.enialis.net/'
069
2012-10-29
fredg
pkgtxtz = 'PACKAGES.TXT.gz'
070
2012-10-29
fredg
pkgtxt = 'PACKAGES.TXT'
071
2012-10-29
fredg
update = args.update
072
2012-10-29
fredg
target = args.target
073
2012-10-29
fredg
repo = args.repo
074
2012-10-29
fredg
release = args.release
075
2012-10-29
fredg
expa = args.expa
076
2012-10-29
fredg
convert = args.convert
077
2012-10-29
fredg
output = args.output
078
2012-10-29
fredg
outputfile = '.'.join([output, convert])
079
2012-10-29
fredg
080
2012-10-29
fredg
081
2012-10-21
fredg
def new_pkgdct():
082
2012-10-21
fredg
return dict(zip(fields, ['', ] * len(fields)))
083
2012-10-21
fredg
084
2012-10-21
fredg
085
2012-10-21
fredg
# Fetch PACKAGES.TXT
086
2012-10-29
fredg
def pkgtxturl(repo='32', target='salix', release='14.0', expa='/'):
087
2012-10-21
fredg
"""
088
2012-10-29
fredg
Download the slackware/salix PACKAGES.TXT.gz from a built URL and unzip it.
089
2012-10-29
fredg
pkgtxturl(repo, target, release, |extra|patches)
090
2012-10-29
fredg
"""
091
2012-10-29
fredg
slash = '/'
092
2012-10-29
fredg
if target == 'slackware':
093
2012-10-29
fredg
target = 'slackware-'
094
2012-10-29
fredg
elif target == 'salix':
095
2012-10-29
fredg
target = ''
096
2012-10-29
fredg
else:
097
2012-10-29
fredg
sys.exit('Choose a valid target, aborting.')
098
2012-10-21
fredg
099
2012-10-29
fredg
if expa == 'extra':
100
2012-10-29
fredg
expa = '/extra/'
101
2012-10-29
fredg
elif expa == 'patches':
102
2012-10-29
fredg
expa = '/patches/'
103
2012-10-21
fredg
104
2012-10-29
fredg
url = mirror + repo + slash + target + release + expa + pkgtxtz
105
2012-10-29
fredg
106
2012-10-21
fredg
# remove old files
107
2012-10-21
fredg
if os.path.isfile(pkgtxtz):
108
2012-10-21
fredg
os.remove(pkgtxtz)
109
2012-10-21
fredg
print "Remove old ", pkgtxtz
110
2012-10-21
fredg
if os.path.isfile(pkgtxt):
111
2012-10-21
fredg
os.remove(pkgtxt)
112
2012-10-21
fredg
print "Remove old ", pkgtxt
113
2012-10-21
fredg
try:
114
2012-10-21
fredg
f = urllib2.urlopen(url)
115
2012-10-21
fredg
print "Fetching ", url
116
2012-10-21
fredg
print ""
117
2012-10-21
fredg
# Open local_file for writing
118
2012-10-21
fredg
with open(os.path.basename(url), "wb") as local_file:
119
2012-10-21
fredg
local_file.write(f.read())
120
2012-10-21
fredg
except urllib2.HTTPError, e:
121
2012-10-21
fredg
print "HTTP Error:", e.code, url
122
2012-10-21
fredg
return False
123
2012-10-21
fredg
except urllib2.URLError, e:
124
2012-10-21
fredg
print "URL Error:", e.reason, url
125
2012-10-21
fredg
return False
126
2012-10-21
fredg
# unzip it
127
2012-10-21
fredg
fout = open(pkgtxt, 'w')
128
2012-10-21
fredg
with gzip.open(pkgtxtz, 'rb') as f:
129
2012-10-21
fredg
for line in f:
130
2012-10-21
fredg
fout.write(line)
131
2012-10-21
fredg
fout.close()
132
2012-10-21
fredg
133
2012-10-21
fredg
134
2012-10-21
fredg
# to CSV DB
135
2012-10-21
fredg
def tocsv(pkgDct, sep=";"):
136
2012-10-21
fredg
"""
137
2012-10-21
fredg
Export PACKAGES.TXT to a CSV database format.
138
2012-10-21
fredg
The separated string can be choosen with the sep var, default is ;
139
2012-10-21
fredg
"""
140
2012-10-29
fredg
with open(outputfile, 'a') as csvf:
141
2012-10-21
fredg
csvf.write(
142
2012-10-21
fredg
sep.join(map(lambda field: pkgDct.get(field, ''), fields)) + '\n')
143
2012-10-21
fredg
144
2012-10-21
fredg
145
2012-10-21
fredg
# to JSON DB
146
2012-10-21
fredg
def tojson(pkgDct):
147
2012-10-21
fredg
"""
148
2012-10-21
fredg
Export PACKAGES.TXT to a JSON database format
149
2012-10-21
fredg
"""
150
2012-10-22
fredg
with open("pre.json", 'a') as j:
151
2012-10-22
fredg
j.write(' {\n')
152
2012-10-22
fredg
j.write(' \"name\": \"' + pkgDct.get("name") + '\",\n')
153
2012-10-22
fredg
j.write(' \"version\": \"' + pkgDct.get("version") + '\",\n')
154
2012-10-22
fredg
j.write(' \"arch\": \"' + pkgDct.get("arch") + '\",\n')
155
2012-10-22
fredg
j.write(' \"release\": \"' + pkgDct.get("release") + '\",\n')
156
2012-10-22
fredg
j.write(' \"location\": \"' + pkgDct.get("location") + '\",\n')
157
2012-10-22
fredg
j.write(' \"deps\": \"' + pkgDct.get("deps") + '\",\n')
158
2012-10-22
fredg
j.write(' \"sizec\": \"' + pkgDct.get("sizec") + '\",\n')
159
2012-10-22
fredg
j.write(' \"sizeu\": \"' + pkgDct.get("sizeu") + '\",\n')
160
2012-10-22
fredg
j.write(' \"slackdesc\": \"' + pkgDct.get("slackdesc") + '\"\n')
161
2012-10-22
fredg
j.write(' },\n')
162
2012-10-21
fredg
163
2012-10-21
fredg
164
2012-10-21
fredg
# to XML DB
165
2012-10-21
fredg
def toxml(pkgDct):
166
2012-10-21
fredg
"""
167
2012-10-21
fredg
Export PACKAGES.TXT to a XML database format.
168
2012-10-21
fredg
"""
169
2012-10-29
fredg
with open(outputfile, 'a') as xmlf:
170
2012-10-21
fredg
xmlf.write('\t<package>\n')
171
2012-10-21
fredg
xmlf.write('\t\t<name>' + pkgDct.get("name") + '</name>\n')
172
2012-10-21
fredg
xmlf.write('\t\t<version>' + pkgDct.get("version") + '</version>\n')
173
2012-10-21
fredg
xmlf.write('\t\t<arch>' + pkgDct.get("arch") + '</arch>\n')
174
2012-10-21
fredg
xmlf.write('\t\t<release>' + pkgDct.get("release") + '</release>\n')
175
2012-10-21
fredg
xmlf.write('\t\t<location>' + pkgDct.get("location") + '</location>\n')
176
2012-10-21
fredg
xmlf.write('\t\t<deps>' + pkgDct.get("deps") + '</deps>\n')
177
2012-10-21
fredg
xmlf.write('\t\t<sizec>' + pkgDct.get("sizec") + '</sizec>\n')
178
2012-10-21
fredg
xmlf.write('\t\t<sizeu>' + pkgDct.get("sizeu") + '</sizeu>\n')
179
2012-10-21
fredg
xmlf.write('\t\t<slackdesc>' + pkgDct.get("slackdesc") + '</slackdesc>\n')
180
2012-10-21
fredg
xmlf.write('\t</package>\n')
181
2012-10-21
fredg
182
2012-10-21
fredg
183
2012-10-21
fredg
# parser
184
2012-10-21
fredg
def mkdadb(towhat):
185
2012-10-21
fredg
"""
186
2012-10-21
fredg
Parse PACKAGES.TXT to get the values we need.
187
2012-10-21
fredg
Choose the export format:
188
2012-10-21
fredg
- CSV : tocsv
189
2012-10-21
fredg
- JSON : tojson
190
2012-10-21
fredg
- XML : toxml
191
2012-10-21
fredg
"""
192
2012-10-21
fredg
if towhat == tocsv:
193
2012-10-29
fredg
if os.path.isfile(outputfile):
194
2012-10-29
fredg
os.remove(outputfile)
195
2012-10-29
fredg
print 'Updating ', outputfile
196
2012-10-21
fredg
if towhat == tojson:
197
2012-10-29
fredg
if os.path.isfile(outputfile):
198
2012-10-29
fredg
os.remove(outputfile)
199
2012-10-29
fredg
print 'Updating ', outputfile
200
2012-10-22
fredg
with open("pre.json", 'w') as j:
201
2012-10-22
fredg
j.write('{\n')
202
2012-10-22
fredg
j.write('"packages": [\n')
203
2012-10-21
fredg
if towhat == toxml:
204
2012-10-29
fredg
if os.path.isfile(outputfile):
205
2012-10-29
fredg
os.remove(outputfile)
206
2012-10-29
fredg
print 'Updating ', outputfile
207
2012-10-29
fredg
with open(outputfile, 'w') as xmlf:
208
2012-10-21
fredg
xmlf.write('<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n')
209
2012-10-21
fredg
xmlf.write('<packages>\n')
210
2012-10-21
fredg
pkg = new_pkgdct()
211
2012-10-21
fredg
with open('PACKAGES.TXT') as f:
212
2012-10-21
fredg
for line in f:
213
2012-10-21
fredg
pkgline = re.match(
214
2012-10-21
fredg
r'(PACKAGE NAME:\s\s)(.*)', line)
215
2012-10-21
fredg
locationline = re.match(
216
2012-10-21
fredg
r'(PACKAGE LOCATION:\s\s\.)(.*)', line)
217
2012-10-21
fredg
depline = re.match(
218
2012-10-21
fredg
r'(PACKAGE REQUIRED:\s\s)(.*)', line)
219
2012-10-21
fredg
sizecline = re.match(
220
2012-10-21
fredg
r'(PACKAGE\sSIZE\s\(compressed\):\s\s)(.*)', line)
221
2012-10-21
fredg
sizeuline = re.match(
222
2012-10-21
fredg
r'(PACKAGE\sSIZE\s\(uncompressed\):\s\s)(.*)', line)
223
2012-10-21
fredg
slackdescline = re.match(
224
2012-10-21
fredg
r'(%s:\s)(.*)' % pkg["name"].replace('+', '\+'), line)
225
2012-10-21
fredg
emptyline = re.match(
226
2012-10-21
fredg
r'^$', line)
227
2012-10-21
fredg
if pkgline:
228
2012-10-21
fredg
pname = pkgline.group(2)
229
2012-10-21
fredg
pname = re.match(
230
2012-10-21
fredg
r'(.*)-([^-]*)-([^-]*)-([^-]*).t[glx]z$', pname)
231
2012-10-21
fredg
pkg["name"] = pname.group(1)
232
2012-10-21
fredg
pkg["version"] = pname.group(2)
233
2012-10-21
fredg
pkg["arch"] = pname.group(3)
234
2012-10-21
fredg
pkg["release"] = pname.group(4)
235
2012-10-21
fredg
if depline:
236
2012-10-21
fredg
pkg["deps"] = depline.group(2)
237
2012-10-21
fredg
if locationline:
238
2012-10-21
fredg
pkg["location"] = locationline.group(2)
239
2012-10-21
fredg
if sizecline:
240
2012-10-21
fredg
pkg["sizec"] = sizecline.group(2)
241
2012-10-21
fredg
if sizeuline:
242
2012-10-21
fredg
pkg["sizeu"] = sizeuline.group(2)
243
2012-10-21
fredg
if slackdescline:
244
2012-10-21
fredg
pkg["slackdesc"] += " " + slackdescline.group(2).\
245
2012-10-21
fredg
replace('"', '\'').\
246
2012-10-21
fredg
replace('&', 'and').\
247
2012-10-21
fredg
replace('>', '').\
248
2012-10-21
fredg
replace('<', '')
249
2012-10-21
fredg
if emptyline and pkg.get("name"):
250
2012-10-21
fredg
pkg["slackdesc"] = pkg["slackdesc"].strip()
251
2012-10-21
fredg
towhat(pkg)
252
2012-10-21
fredg
pkg = new_pkgdct()
253
2012-10-21
fredg
if towhat == tojson:
254
2012-10-29
fredg
with open("pre.json", 'r') as j, open(outputfile, "w") as jsonf:
255
2012-10-22
fredg
alllines = j.readlines()
256
2012-10-22
fredg
alllines[-1] = alllines[-1].replace('},', '}')
257
2012-10-22
fredg
jsonf.writelines(alllines)
258
2012-10-22
fredg
jsonf.write(']\n')
259
2012-10-22
fredg
jsonf.write('}\n')
260
2012-10-22
fredg
os.remove("pre.json")
261
2012-10-21
fredg
if towhat == toxml:
262
2012-10-29
fredg
with open(outputfile, 'a') as xmlf:
263
2012-10-22
fredg
xmlf.write('</packages>\n')
264
2012-10-21
fredg
265
2012-10-21
fredg
266
2012-10-21
fredg
def main():
267
2012-10-29
fredg
if not update and not os.path.isfile(pkgtxt):
268
2012-10-29
fredg
sys.exit('No PACKAGES.TXT found, you should fetch one, aborting.')
269
2012-10-29
fredg
else:
270
2012-10-29
fredg
pkgtxturl(repo, target, release, expa)
271
2012-10-21
fredg
272
2012-10-29
fredg
if convert == 'csv':
273
2012-10-29
fredg
mkdadb(tocsv)
274
2012-10-29
fredg
elif convert == 'json':
275
2012-10-29
fredg
mkdadb(tojson)
276
2012-10-29
fredg
elif convert == 'xml':
277
2012-10-29
fredg
mkdadb(toxml)
278
2012-10-29
fredg
else:
279
2012-10-29
fredg
sys.exit('You have to choose a valid database format, aborting.')
280
2012-10-29
fredg
281
2012-10-29
fredg
282
2012-10-21
fredg
if __name__ == '__main__':
283
2012-10-21
fredg
main()
Frédéric Galusik