Blame
Date:
Sun Oct 21 18:29:50 2012 UTC
Message:
take care of indent
001
2012-10-21
fredg
#!/usr/bin/env python
002
2012-10-21
fredg
# -*- coding: utf-8 -*-
003
2012-10-21
fredg
#
004
2012-10-21
fredg
# pkgtxt2db.py
005
2012-10-21
fredg
#
006
2012-10-21
fredg
# Copyright 2012 Frédéric Galusik <fredg~at~salixos~dot~org>
007
2012-10-21
fredg
#
008
2012-10-21
fredg
#
009
2012-10-21
fredg
# License: BSD Revised
010
2012-10-21
fredg
#
011
2012-10-21
fredg
# Convert the Slackware/Salix PACKAGES.TXT file to some various database
012
2012-10-21
fredg
# formats : CSV, json, xml
013
2012-10-21
fredg
#
014
2012-10-21
fredg
015
2012-10-21
fredg
016
2012-10-21
fredg
import os
017
2012-10-21
fredg
import urllib2
018
2012-10-21
fredg
import gzip
019
2012-10-21
fredg
import re
020
2012-10-21
fredg
021
2012-10-21
fredg
# Program information
022
2012-10-21
fredg
my_url = 'http://www.salixos.org/wiki/index.php/Pkgtxt2db'
023
2012-10-21
fredg
my_name = 'pkgtxt2db'
024
2012-10-21
fredg
my_version = '0.0'
025
2012-10-21
fredg
026
2012-10-21
fredg
# vars
027
2012-10-21
fredg
mirror = 'http://salix.enialis.net/'
028
2012-10-21
fredg
arch = ['i486/', 'x86_64/']
029
2012-10-21
fredg
rep = ['', 'slackware-']
030
2012-10-21
fredg
release = ['current', '14.0', '13.37', '13.1', '13.0']
031
2012-10-21
fredg
expa = ['/', '/extra/', '/patches/']
032
2012-10-21
fredg
pkgtxtz = 'PACKAGES.TXT.gz'
033
2012-10-21
fredg
pkgtxt = 'PACKAGES.TXT'
034
2012-10-21
fredg
035
2012-10-21
fredg
# initialise the pkg dictionnary with empty values
036
2012-10-21
fredg
fields = ['name',
037
2012-10-21
fredg
'version',
038
2012-10-21
fredg
'arch',
039
2012-10-21
fredg
'release'
040
2012-10-21
fredg
'location',
041
2012-10-21
fredg
'deps',
042
2012-10-21
fredg
'sizec',
043
2012-10-21
fredg
'sizeu',
044
2012-10-21
fredg
'slackdesc']
045
2012-10-21
fredg
046
2012-10-21
fredg
047
2012-10-21
fredg
def new_pkgdct():
048
2012-10-21
fredg
return dict(zip(fields, ['', ] * len(fields)))
049
2012-10-21
fredg
050
2012-10-21
fredg
051
2012-10-21
fredg
# Fetch PACKAGES.TXT
052
2012-10-21
fredg
def pkgtxturl(a=0, re=0, rl=1, ep=0):
053
2012-10-21
fredg
"""
054
2012-10-21
fredg
Download the slackware/salix PACKAGES.TXT.gz from a built URL and unzip it
055
2012-10-21
fredg
056
2012-10-21
fredg
pkgtxturl(a, re, rl, ep))
057
2012-10-21
fredg
pkgtxturl(arch, os, release, extra|patches)
058
2012-10-21
fredg
-arch i486 : a=0
059
2012-10-21
fredg
-arch x86_64 : a=1
060
2012-10-21
fredg
-repository salix : re=0
061
2012-10-21
fredg
-repository slackware : re=1
062
2012-10-21
fredg
-release current : rl=0
063
2012-10-21
fredg
-release 14.0 : rl=1
064
2012-10-21
fredg
-release 13.37 : rl=2
065
2012-10-21
fredg
-release 13.1 : rl=3
066
2012-10-21
fredg
-release 13.0 : rl=4
067
2012-10-21
fredg
-standard repo : ep=0
068
2012-10-21
fredg
-extra slackware repo : ep=1
069
2012-10-21
fredg
-patches slackware repo : ep=2
070
2012-10-21
fredg
071
2012-10-21
fredg
examples:
072
2012-10-21
fredg
-salix i486 14.0 : url(0, 0, 1, 0) (default)
073
2012-10-21
fredg
- x86_64 : url(1, 0, 1, 0)
074
2012-10-21
fredg
-slackware i486 14.0 : url(0, 1, 1, 0)
075
2012-10-21
fredg
- extra : url(0, 1, 1, 1)
076
2012-10-21
fredg
- patches: url(0, 1, 1, 2)
077
2012-10-21
fredg
"""
078
2012-10-21
fredg
url = mirror + arch[a] + rep[re] + release[rl] + expa[ep] + pkgtxtz
079
2012-10-21
fredg
# remove old files
080
2012-10-21
fredg
if os.path.isfile(pkgtxtz):
081
2012-10-21
fredg
os.remove(pkgtxtz)
082
2012-10-21
fredg
print "Remove old ", pkgtxtz
083
2012-10-21
fredg
if os.path.isfile(pkgtxt):
084
2012-10-21
fredg
os.remove(pkgtxt)
085
2012-10-21
fredg
print "Remove old ", pkgtxt
086
2012-10-21
fredg
try:
087
2012-10-21
fredg
f = urllib2.urlopen(url)
088
2012-10-21
fredg
print "Fetching ", url
089
2012-10-21
fredg
print ""
090
2012-10-21
fredg
# Open local_file for writing
091
2012-10-21
fredg
with open(os.path.basename(url), "wb") as local_file:
092
2012-10-21
fredg
local_file.write(f.read())
093
2012-10-21
fredg
except urllib2.HTTPError, e:
094
2012-10-21
fredg
print "HTTP Error:", e.code, url
095
2012-10-21
fredg
return False
096
2012-10-21
fredg
except urllib2.URLError, e:
097
2012-10-21
fredg
print "URL Error:", e.reason, url
098
2012-10-21
fredg
return False
099
2012-10-21
fredg
# unzip it
100
2012-10-21
fredg
fout = open(pkgtxt, 'w')
101
2012-10-21
fredg
with gzip.open(pkgtxtz, 'rb') as f:
102
2012-10-21
fredg
for line in f:
103
2012-10-21
fredg
line.strip()
104
2012-10-21
fredg
fout.write(line)
105
2012-10-21
fredg
fout.close()
106
2012-10-21
fredg
107
2012-10-21
fredg
108
2012-10-21
fredg
# to CSV DB
109
2012-10-21
fredg
def tocsv(pkgDct, sep=";"):
110
2012-10-21
fredg
"""
111
2012-10-21
fredg
Export PACKAGES.TXT to a CSV database format.
112
2012-10-21
fredg
The separated string can be choosen with the sep var, default is ;
113
2012-10-21
fredg
"""
114
2012-10-21
fredg
with open("packages.csv", 'a') as csvf:
115
2012-10-21
fredg
csvf.write(
116
2012-10-21
fredg
sep.join(map(lambda field: pkgDct.get(field, ''), fields)) + '\n')
117
2012-10-21
fredg
118
2012-10-21
fredg
119
2012-10-21
fredg
# to JSON DB
120
2012-10-21
fredg
def tojson(pkgDct):
121
2012-10-21
fredg
"""
122
2012-10-21
fredg
Export PACKAGES.TXT to a JSON database format
123
2012-10-21
fredg
"""
124
2012-10-21
fredg
with open("packages.json", 'a') as jsonf:
125
2012-10-21
fredg
jsonf.write(' {\n')
126
2012-10-21
fredg
jsonf.write(' \"name\": \"' + pkgDct.get("name") + '\",\n')
127
2012-10-21
fredg
jsonf.write(' \"version\": \"' + pkgDct.get("version") + '\",\n')
128
2012-10-21
fredg
jsonf.write(' \"arch\": \"' + pkgDct.get("arch") + '\",\n')
129
2012-10-21
fredg
jsonf.write(' \"release\": \"' + pkgDct.get("release") + '\",\n')
130
2012-10-21
fredg
jsonf.write(' \"location\": \"' + pkgDct.get("location") + '\",\n')
131
2012-10-21
fredg
jsonf.write(' \"deps\": \"' + pkgDct.get("deps") + '\",\n')
132
2012-10-21
fredg
jsonf.write(' \"sizec\": \"' + pkgDct.get("sizec") + '\",\n')
133
2012-10-21
fredg
jsonf.write(' \"sizeu\": \"' + pkgDct.get("sizeu") + '\",\n')
134
2012-10-21
fredg
jsonf.write(' \"slackdesc\": \"' + pkgDct.get("slackdesc") + '\",\n')
135
2012-10-21
fredg
136
2012-10-21
fredg
137
2012-10-21
fredg
# to XML DB
138
2012-10-21
fredg
def toxml(pkgDct):
139
2012-10-21
fredg
"""
140
2012-10-21
fredg
Export PACKAGES.TXT to a XML database format.
141
2012-10-21
fredg
"""
142
2012-10-21
fredg
with open("packages.xml", 'a') as xmlf:
143
2012-10-21
fredg
xmlf.write('\t<package>\n')
144
2012-10-21
fredg
xmlf.write('\t\t<name>' + pkgDct.get("name") + '</name>\n')
145
2012-10-21
fredg
xmlf.write('\t\t<version>' + pkgDct.get("version") + '</version>\n')
146
2012-10-21
fredg
xmlf.write('\t\t<arch>' + pkgDct.get("arch") + '</arch>\n')
147
2012-10-21
fredg
xmlf.write('\t\t<release>' + pkgDct.get("release") + '</release>\n')
148
2012-10-21
fredg
xmlf.write('\t\t<location>' + pkgDct.get("location") + '</location>\n')
149
2012-10-21
fredg
xmlf.write('\t\t<deps>' + pkgDct.get("deps") + '</deps>\n')
150
2012-10-21
fredg
xmlf.write('\t\t<sizec>' + pkgDct.get("sizec") + '</sizec>\n')
151
2012-10-21
fredg
xmlf.write('\t\t<sizeu>' + pkgDct.get("sizeu") + '</sizeu>\n')
152
2012-10-21
fredg
xmlf.write('\t\t<slackdesc>' + pkgDct.get("slackdesc") + '</slackdesc>\n')
153
2012-10-21
fredg
xmlf.write('\t</package>\n')
154
2012-10-21
fredg
155
2012-10-21
fredg
156
2012-10-21
fredg
# parser
157
2012-10-21
fredg
def mkdadb(towhat):
158
2012-10-21
fredg
"""
159
2012-10-21
fredg
Parse PACKAGES.TXT to get the values we need.
160
2012-10-21
fredg
Choose the export format:
161
2012-10-21
fredg
- CSV : tocsv
162
2012-10-21
fredg
- JSON : tojson
163
2012-10-21
fredg
- XML : toxml
164
2012-10-21
fredg
"""
165
2012-10-21
fredg
if towhat == tocsv:
166
2012-10-21
fredg
if os.path.isfile("packages.csv"):
167
2012-10-21
fredg
os.remove("packages.csv")
168
2012-10-21
fredg
print "Updating packages.csv"
169
2012-10-21
fredg
if towhat == tojson:
170
2012-10-21
fredg
if os.path.isfile("packages.json"):
171
2012-10-21
fredg
os.remove("packages.json")
172
2012-10-21
fredg
print "Updating packages.json"
173
2012-10-21
fredg
with open("packages.json", 'w') as jsonf:
174
2012-10-21
fredg
jsonf.write('[\n')
175
2012-10-21
fredg
if towhat == toxml:
176
2012-10-21
fredg
if os.path.isfile("packages.xml"):
177
2012-10-21
fredg
os.remove("packages.xml")
178
2012-10-21
fredg
print "Updating packages.xml"
179
2012-10-21
fredg
with open("packages.xml", 'w') as xmlf:
180
2012-10-21
fredg
xmlf.write('<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n')
181
2012-10-21
fredg
xmlf.write('<packages>\n')
182
2012-10-21
fredg
pkg = new_pkgdct()
183
2012-10-21
fredg
with open('PACKAGES.TXT') as f:
184
2012-10-21
fredg
for line in f:
185
2012-10-21
fredg
pkgline = re.match(
186
2012-10-21
fredg
r'(PACKAGE NAME:\s\s)(.*)', line)
187
2012-10-21
fredg
locationline = re.match(
188
2012-10-21
fredg
r'(PACKAGE LOCATION:\s\s\.)(.*)', line)
189
2012-10-21
fredg
depline = re.match(
190
2012-10-21
fredg
r'(PACKAGE REQUIRED:\s\s)(.*)', line)
191
2012-10-21
fredg
sizecline = re.match(
192
2012-10-21
fredg
r'(PACKAGE\sSIZE\s\(compressed\):\s\s)(.*)', line)
193
2012-10-21
fredg
sizeuline = re.match(
194
2012-10-21
fredg
r'(PACKAGE\sSIZE\s\(uncompressed\):\s\s)(.*)', line)
195
2012-10-21
fredg
slackdescline = re.match(
196
2012-10-21
fredg
r'(%s:\s)(.*)' % pkg["name"].replace('+', '\+'), line)
197
2012-10-21
fredg
emptyline = re.match(
198
2012-10-21
fredg
r'^$', line)
199
2012-10-21
fredg
if pkgline:
200
2012-10-21
fredg
pname = pkgline.group(2)
201
2012-10-21
fredg
pname = re.match(
202
2012-10-21
fredg
r'(.*)-([^-]*)-([^-]*)-([^-]*).t[glx]z$', pname)
203
2012-10-21
fredg
pkg["name"] = pname.group(1)
204
2012-10-21
fredg
pkg["version"] = pname.group(2)
205
2012-10-21
fredg
pkg["arch"] = pname.group(3)
206
2012-10-21
fredg
pkg["release"] = pname.group(4)
207
2012-10-21
fredg
if depline:
208
2012-10-21
fredg
pkg["deps"] = depline.group(2)
209
2012-10-21
fredg
if locationline:
210
2012-10-21
fredg
pkg["location"] = locationline.group(2)
211
2012-10-21
fredg
if sizecline:
212
2012-10-21
fredg
pkg["sizec"] = sizecline.group(2)
213
2012-10-21
fredg
if sizeuline:
214
2012-10-21
fredg
pkg["sizeu"] = sizeuline.group(2)
215
2012-10-21
fredg
if slackdescline:
216
2012-10-21
fredg
pkg["slackdesc"] += " " + slackdescline.group(2).\
217
2012-10-21
fredg
replace('"', '\'').\
218
2012-10-21
fredg
replace('&', 'and').\
219
2012-10-21
fredg
replace('>', '').\
220
2012-10-21
fredg
replace('<', '')
221
2012-10-21
fredg
if emptyline and pkg.get("name"):
222
2012-10-21
fredg
towhat(pkg)
223
2012-10-21
fredg
pkg = new_pkgdct()
224
2012-10-21
fredg
if towhat == tojson:
225
2012-10-21
fredg
with open("packages.json", 'a') as jsonf:
226
2012-10-21
fredg
jsonf.write(']\n')
227
2012-10-21
fredg
if towhat == toxml:
228
2012-10-21
fredg
with open("packages.xml", 'a') as xmlf:
229
2012-10-21
fredg
xmlf.write('</packages>\n')
230
2012-10-21
fredg
231
2012-10-21
fredg
232
2012-10-21
fredg
def main():
233
2012-10-21
fredg
pkgtxturl()
234
2012-10-21
fredg
mkdadb(tojson)
235
2012-10-21
fredg
236
2012-10-21
fredg
if __name__ == '__main__':
237
2012-10-21
fredg
main()
Frédéric Galusik