Skip to content
Snippets Groups Projects
Commit 197c4b16 authored by Marko Kuder's avatar Marko Kuder
Browse files

fixed eurovoc tag loader for arbitrary URLs

parent 9d0bb837
No related branches found
No related tags found
No related merge requests found
......@@ -4,6 +4,8 @@ import zipfile
import urllib2
import xml.etree.ElementTree as eTree
EUROVOC_FILENAME = "eurovoc_xml.zip"
GEMET_FILENAME = "gemet-definitions.rdf"
class TagLoader:
def __init__(self, name, url):
......@@ -14,13 +16,15 @@ class TagLoader:
def loader(self):
response = urllib2.urlopen(self._url)
data = response.read()
filename = self._url.split('/')[-1]
file_ = open(filename, 'w')
if self._name == 'eurovoc':
file_ = open(EUROVOC_FILENAME, 'w')
else:
file_ = open(GEMET_FILENAME, 'w')
file_.write(data)
file_.close()
if self._name == 'eurovoc':
with zipfile.ZipFile("eurovoc_xml.zip", "r") as zip_ref:
with zipfile.ZipFile(EUROVOC_FILENAME, "r") as zip_ref:
zip_ref.extractall("eurovoc_xml")
def parse_xml(self, file_name, to_find):
......@@ -50,7 +54,7 @@ class TagLoader:
def clean_up(self):
if self._name == 'eurovoc':
os.remove('eurovoc_xml.zip')
os.remove(EUROVOC_FILENAME)
shutil.rmtree('eurovoc_xml')
elif self._name == 'gemet':
os.remove('gemet-definitions.rdf')
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment