Blame
Date:
Sun Oct 21 17:34:09 2012 UTC
Message:
remove old perl usage
001
2012-10-21
fredg
#!/usr/bin/env python
002
2012-10-21
fredg
# -*- coding: utf-8 -*-
003
2012-10-21
fredg
#
004
2012-10-21
fredg
# pkgtxt2db.py
005
2012-10-21
fredg
#
006
2012-10-21
fredg
# Copyright 2012 Frédéric Galusik <fredg~at~salixos~dot~org>
007
2012-10-21
fredg
#
008
2012-10-21
fredg
#
009
2012-10-21
fredg
# License: BSD Revised
010
2012-10-21
fredg
#
011
2012-10-21
fredg
# Convert the Slackware/Salix PACKAGES.TXT file to some various database
012
2012-10-21
fredg
# formats : CSV, json, xml
013
2012-10-21
fredg
#
014
2012-10-21
fredg
015
2012-10-21
fredg
016
2012-10-21
fredg
import os
017
2012-10-21
fredg
import urllib2
018
2012-10-21
fredg
import gzip
019
2012-10-21
fredg
import re
020
2012-10-21
fredg
021
2012-10-21
fredg
# Program information
022
2012-10-21
fredg
my_url = 'http://www.salixos.org/wiki/index.php/Pkgtxt2db'
023
2012-10-21
fredg
my_name = 'pkgtxt2db'
024
2012-10-21
fredg
my_version = '0.0'
025
2012-10-21
fredg
026
2012-10-21
fredg
# vars
027
2012-10-21
fredg
mirror = 'http://salix.enialis.net/'
028
2012-10-21
fredg
arch = ['i486/', 'x86_64/']
029
2012-10-21
fredg
rep = ['', 'slackware-']
030
2012-10-21
fredg
release = ['current', '14.0', '13.37', '13.1', '13.0']
031
2012-10-21
fredg
expa = ['/', '/extra/', '/patches/']
032
2012-10-21
fredg
pkgtxtz = 'PACKAGES.TXT.gz'
033
2012-10-21
fredg
pkgtxt = 'PACKAGES.TXT'
034
2012-10-21
fredg
035
2012-10-21
fredg
# initialise the pkg dictionnary with empty values
036
2012-10-21
fredg
fields = ['name',
037
2012-10-21
fredg
'version',
038
2012-10-21
fredg
'arch',
039
2012-10-21
fredg
'release'
040
2012-10-21
fredg
'location',
041
2012-10-21
fredg
'deps',
042
2012-10-21
fredg
'sizec',
043
2012-10-21
fredg
'sizeu',
044
2012-10-21
fredg
'slackdesc']
045
2012-10-21
fredg
046
2012-10-21
fredg
047
2012-10-21
fredg
def new_pkgdct():
048
2012-10-21
fredg
return dict(zip(fields, ['', ] * len(fields)))
049
2012-10-21
fredg
050
2012-10-21
fredg
051
2012-10-21
fredg
# Fetch PACKAGES.TXT
052
2012-10-21
fredg
def pkgtxturl(a=0, re=0, rl=1, ep=0):
053
2012-10-21
fredg
"""
054
2012-10-21
fredg
Download the slackware/salix PACKAGES.TXT.gz from a built URL and unzip it
055
2012-10-21
fredg
056
2012-10-21
fredg
pkgtxturl(a, re, rl, ep))
057
2012-10-21
fredg
pkgtxturl(arch, os, release, extra|patches)
058
2012-10-21
fredg
-arch i486 : a=0
059
2012-10-21
fredg
-arch x86_64 : a=1
060
2012-10-21
fredg
-repository salix : re=0
061
2012-10-21
fredg
-repository slackware : re=1
062
2012-10-21
fredg
-release current : rl=0
063
2012-10-21
fredg
-release 14.0 : rl=1
064
2012-10-21
fredg
-release 13.37 : rl=2
065
2012-10-21
fredg
-release 13.1 : rl=3
066
2012-10-21
fredg
-release 13.0 : rl=4
067
2012-10-21
fredg
-standard repo : ep=0
068
2012-10-21
fredg
-extra slackware repo : ep=1
069
2012-10-21
fredg
-patches slackware repo : ep=2
070
2012-10-21
fredg
071
2012-10-21
fredg
examples:
072
2012-10-21
fredg
-salix i486 14.0 : url(0, 0, 1, 0) (default)
073
2012-10-21
fredg
- x86_64 : url(1, 0, 1, 0)
074
2012-10-21
fredg
-slackware i486 14.0 : url(0, 1, 1, 0)
075
2012-10-21
fredg
- extra : url(0, 1, 1, 1)
076
2012-10-21
fredg
- patches: url(0, 1, 1, 2)
077
2012-10-21
fredg
"""
078
2012-10-21
fredg
url = mirror + arch[a] + rep[re] + release[rl] + expa[ep] + pkgtxtz
079
2012-10-21
fredg
# remove old files
080
2012-10-21
fredg
if os.path.isfile(pkgtxtz):
081
2012-10-21
fredg
os.remove(pkgtxtz)
082
2012-10-21
fredg
print "Remove old ", pkgtxtz
083
2012-10-21
fredg
if os.path.isfile(pkgtxt):
084
2012-10-21
fredg
os.remove(pkgtxt)
085
2012-10-21
fredg
print "Remove old ", pkgtxt
086
2012-10-21
fredg
try:
087
2012-10-21
fredg
f = urllib2.urlopen(url)
088
2012-10-21
fredg
print "Fetching ", url
089
2012-10-21
fredg
print ""
090
2012-10-21
fredg
# Open local_file for writing
091
2012-10-21
fredg
with open(os.path.basename(url), "wb") as local_file:
092
2012-10-21
fredg
local_file.write(f.read())
093
2012-10-21
fredg
except urllib2.HTTPError, e:
094
2012-10-21
fredg
print "HTTP Error:", e.code, url
095
2012-10-21
fredg
return False
096
2012-10-21
fredg
except urllib2.URLError, e:
097
2012-10-21
fredg
print "URL Error:", e.reason, url
098
2012-10-21
fredg
return False
099
2012-10-21
fredg
# unzip it
100
2012-10-21
fredg
fout = open(pkgtxt, 'w')
101
2012-10-21
fredg
with gzip.open(pkgtxtz, 'rb') as f:
102
2012-10-21
fredg
for line in f:
103
2012-10-21
fredg
fout.write(line)
104
2012-10-21
fredg
fout.close()
105
2012-10-21
fredg
106
2012-10-21
fredg
107
2012-10-21
fredg
# to CSV DB
108
2012-10-21
fredg
def tocsv(pkgDct, sep=";"):
109
2012-10-21
fredg
"""
110
2012-10-21
fredg
Export PACKAGES.TXT to a CSV database format.
111
2012-10-21
fredg
The separated string can be choosen with the sep var, default is ;
112
2012-10-21
fredg
"""
113
2012-10-21
fredg
with open("packages.csv", 'a') as csvf:
114
2012-10-21
fredg
csvf.write(
115
2012-10-21
fredg
sep.join(map(lambda field: pkgDct.get(field, ''), fields)) + '\n')
116
2012-10-21
fredg
117
2012-10-21
fredg
118
2012-10-21
fredg
# to JSON DB
119
2012-10-21
fredg
def tojson(pkgDct):
120
2012-10-21
fredg
"""
121
2012-10-21
fredg
Export PACKAGES.TXT to a JSON database format
122
2012-10-21
fredg
"""
123
2012-10-21
fredg
with open("packages.json", 'a') as jsonf:
124
2012-10-21
fredg
jsonf.write(' {\n')
125
2012-10-21
fredg
jsonf.write(' \"name\": \"' + pkgDct.get("name") + '\",\n')
126
2012-10-21
fredg
jsonf.write(' \"version\": \"' + pkgDct.get("version") + '\",\n')
127
2012-10-21
fredg
jsonf.write(' \"arch\": \"' + pkgDct.get("arch") + '\",\n')
128
2012-10-21
fredg
jsonf.write(' \"release\": \"' + pkgDct.get("release") + '\",\n')
129
2012-10-21
fredg
jsonf.write(' \"location\": \"' + pkgDct.get("location") + '\",\n')
130
2012-10-21
fredg
jsonf.write(' \"deps\": \"' + pkgDct.get("deps") + '\",\n')
131
2012-10-21
fredg
jsonf.write(' \"sizec\": \"' + pkgDct.get("sizec") + '\",\n')
132
2012-10-21
fredg
jsonf.write(' \"sizeu\": \"' + pkgDct.get("sizeu") + '\",\n')
133
2012-10-21
fredg
jsonf.write(' \"slackdesc\": \"' + pkgDct.get("slackdesc") + '\",\n')
134
2012-10-21
fredg
135
2012-10-21
fredg
136
2012-10-21
fredg
# to XML DB
137
2012-10-21
fredg
def toxml(pkgDct):
138
2012-10-21
fredg
"""
139
2012-10-21
fredg
Export PACKAGES.TXT to a XML database format.
140
2012-10-21
fredg
"""
141
2012-10-21
fredg
with open("packages.xml", 'a') as xmlf:
142
2012-10-21
fredg
xmlf.write('\t<package>\n')
143
2012-10-21
fredg
xmlf.write('\t\t<name>' + pkgDct.get("name") + '</name>\n')
144
2012-10-21
fredg
xmlf.write('\t\t<version>' + pkgDct.get("version") + '</version>\n')
145
2012-10-21
fredg
xmlf.write('\t\t<arch>' + pkgDct.get("arch") + '</arch>\n')
146
2012-10-21
fredg
xmlf.write('\t\t<release>' + pkgDct.get("release") + '</release>\n')
147
2012-10-21
fredg
xmlf.write('\t\t<location>' + pkgDct.get("location") + '</location>\n')
148
2012-10-21
fredg
xmlf.write('\t\t<deps>' + pkgDct.get("deps") + '</deps>\n')
149
2012-10-21
fredg
xmlf.write('\t\t<sizec>' + pkgDct.get("sizec") + '</sizec>\n')
150
2012-10-21
fredg
xmlf.write('\t\t<sizeu>' + pkgDct.get("sizeu") + '</sizeu>\n')
151
2012-10-21
fredg
xmlf.write('\t\t<slackdesc>' + pkgDct.get("slackdesc") + '</slackdesc>\n')
152
2012-10-21
fredg
xmlf.write('\t</package>\n')
153
2012-10-21
fredg
154
2012-10-21
fredg
155
2012-10-21
fredg
# parser
156
2012-10-21
fredg
def mkdadb(towhat):
157
2012-10-21
fredg
"""
158
2012-10-21
fredg
Parse PACKAGES.TXT to get the values we need.
159
2012-10-21
fredg
Choose the export format:
160
2012-10-21
fredg
- CSV : tocsv
161
2012-10-21
fredg
- JSON : tojson
162
2012-10-21
fredg
- XML : toxml
163
2012-10-21
fredg
"""
164
2012-10-21
fredg
if towhat == tocsv:
165
2012-10-21
fredg
if os.path.isfile("packages.csv"):
166
2012-10-21
fredg
os.remove("packages.csv")
167
2012-10-21
fredg
print "Updating packages.csv"
168
2012-10-21
fredg
if towhat == tojson:
169
2012-10-21
fredg
if os.path.isfile("packages.json"):
170
2012-10-21
fredg
os.remove("packages.json")
171
2012-10-21
fredg
print "Updating packages.json"
172
2012-10-21
fredg
with open("packages.json", 'w') as jsonf:
173
2012-10-21
fredg
jsonf.write('[\n')
174
2012-10-21
fredg
if towhat == toxml:
175
2012-10-21
fredg
if os.path.isfile("packages.xml"):
176
2012-10-21
fredg
os.remove("packages.xml")
177
2012-10-21
fredg
print "Updating packages.xml"
178
2012-10-21
fredg
with open("packages.xml", 'w') as xmlf:
179
2012-10-21
fredg
xmlf.write('<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n')
180
2012-10-21
fredg
xmlf.write('<packages>\n')
181
2012-10-21
fredg
pkg = new_pkgdct()
182
2012-10-21
fredg
with open('PACKAGES.TXT') as f:
183
2012-10-21
fredg
for line in f:
184
2012-10-21
fredg
pkgline = re.match(
185
2012-10-21
fredg
r'(PACKAGE NAME:\s\s)(.*)', line)
186
2012-10-21
fredg
locationline = re.match(
187
2012-10-21
fredg
r'(PACKAGE LOCATION:\s\s\.)(.*)', line)
188
2012-10-21
fredg
depline = re.match(
189
2012-10-21
fredg
r'(PACKAGE REQUIRED:\s\s)(.*)', line)
190
2012-10-21
fredg
sizecline = re.match(
191
2012-10-21
fredg
r'(PACKAGE\sSIZE\s\(compressed\):\s\s)(.*)', line)
192
2012-10-21
fredg
sizeuline = re.match(
193
2012-10-21
fredg
r'(PACKAGE\sSIZE\s\(uncompressed\):\s\s)(.*)', line)
194
2012-10-21
fredg
slackdescline = re.match(
195
2012-10-21
fredg
r'(%s:\s)(.*)' % pkg["name"].replace('+', '\+'), line)
196
2012-10-21
fredg
emptyline = re.match(
197
2012-10-21
fredg
r'^$', line)
198
2012-10-21
fredg
if pkgline:
199
2012-10-21
fredg
pname = pkgline.group(2)
200
2012-10-21
fredg
pname = re.match(
201
2012-10-21
fredg
r'(.*)-([^-]*)-([^-]*)-([^-]*).t[glx]z$', pname)
202
2012-10-21
fredg
pkg["name"] = pname.group(1)
203
2012-10-21
fredg
pkg["version"] = pname.group(2)
204
2012-10-21
fredg
pkg["arch"] = pname.group(3)
205
2012-10-21
fredg
pkg["release"] = pname.group(4)
206
2012-10-21
fredg
if depline:
207
2012-10-21
fredg
pkg["deps"] = depline.group(2)
208
2012-10-21
fredg
if locationline:
209
2012-10-21
fredg
pkg["location"] = locationline.group(2)
210
2012-10-21
fredg
if sizecline:
211
2012-10-21
fredg
pkg["sizec"] = sizecline.group(2)
212
2012-10-21
fredg
if sizeuline:
213
2012-10-21
fredg
pkg["sizeu"] = sizeuline.group(2)
214
2012-10-21
fredg
if slackdescline:
215
2012-10-21
fredg
pkg["slackdesc"] += " " + slackdescline.group(2).\
216
2012-10-21
fredg
replace('"', '\'').\
217
2012-10-21
fredg
replace('&', 'and').\
218
2012-10-21
fredg
replace('>', '').\
219
2012-10-21
fredg
replace('<', '')
220
2012-10-21
fredg
if emptyline and pkg.get("name"):
221
2012-10-21
fredg
towhat(pkg)
222
2012-10-21
fredg
pkg = new_pkgdct()
223
2012-10-21
fredg
if towhat == tojson:
224
2012-10-21
fredg
with open("packages.json", 'a') as jsonf:
225
2012-10-21
fredg
jsonf.write(']\n')
226
2012-10-21
fredg
if towhat == toxml:
227
2012-10-21
fredg
with open("packages.xml", 'a') as xmlf:
228
2012-10-21
fredg
xmlf.write('</packages>\n')
229
2012-10-21
fredg
230
2012-10-21
fredg
231
2012-10-21
fredg
def main():
232
2012-10-21
fredg
pkgtxturl()
233
2012-10-21
fredg
mkdadb(tocsv)
234
2012-10-21
fredg
235
2012-10-21
fredg
if __name__ == '__main__':
236
2012-10-21
fredg
main()
Frédéric Galusik