Skip to content
Snippets Groups Projects
Commit 9c84d0bd authored by Nadia Arslan's avatar Nadia Arslan
Browse files

main

parent d1a15ec4
No related branches found
No related tags found
No related merge requests found
......@@ -8,6 +8,8 @@ Created on Fri Dec 13 10:48:25 2019
import json
from bs4 import BeautifulSoup
import requests
import tika
tika.initVM()
from tika import parser as pdf_parser
import re
import html_extractor
......@@ -219,7 +221,12 @@ def iterate(data):
de_en = open_json(DE_EN)
for i, el in enumerate(data.items()):
if el[0] in de_en:
print(el[0], ' there')
continue
#elif el[0] == '1613752':
#print('passed 1613752')
#continue
print(el[0])
extracted = extractor(el)
if extracted == False:
with open(FAILS,'r') as f:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment