Solved

running Go program on Windows 7

Posted on 2014-04-19
1
345 Views
Last Modified: 2014-04-23
i'm new to Go programming.

I am running a Windows 7 desktop pc
I setup my environment variables and path and was able to run the hello world program just fine.

Now, I'm trying the example located at this link:

https://gist.github.com/fastest963/10978299

So this is the code for iocrawler.go:
//before running make sure you setup a GOPATH env variable and ran: "go get code.google.com/p/go.net/html"
 
//to run: go run ioCrawler.go -url="http://developers.google.com/"
//also try http://developer.android.com/index.html
//output goo.gl links to try and redeem will be sent to foundLinks.txt
 
//by the way there's an artificial "rate limit" in func crawler, you can lower that or raise it as you wish
//You can also comment out the onlyGoogleRegex code if you don't want to limit to google.com/youtube
//if you're getting I/O timeout errors, then you might need to increase the timeouts on line 231
 
package main
 
import (
"code.google.com/p/go.net/html"
"errors"
"flag"
"fmt"
"io/ioutil"
"net"
"net/http"
"net/url"
"os"
"os/signal"
"regexp"
"strings"
"time"
"sync"
"math/rand"
)
 
//prevent from downloading files we can't even read
var extToIgnore = regexp.MustCompile(`(?i)\.(gz|tar|tgz|bz2|jpg|png|jpeg|gif|psd|mp3|webm|ogg|mp4|mpeg|flv|avi|wmv|fla|mov|bin|wav|rpm|dmg|exe|zip|rar|7z|doc|ppt|pdf|iso|torrent)$`)
 
//if you want to index more domains feel free to update this or remove it entirely (update validateUri too to remove the check at the bottom)
var onlyGoogleRegex = regexp.MustCompile(`(?i)(google|youtube|android|golang|opers.blogspot|oogle.blogspot|youtu|angularjs|angulardart|dartlang|html5rocks)\.(com|org|net|io|be)`)
 
var googlRegex = regexp.MustCompile(`(?im)((?:goo\.gl/[a-zA-Z0-9]{3,9})|(?:[a-zA-Z0-9]{3,9}(?:/|\\)lg\.oog))`)
 
//if you're getting a lot of I/O (pun not intended...) timeouts then raise this
var defaultConnectTimeout = time.Duration(2*time.Second)
var defaultRWTimeout = time.Duration(5*time.Second)
 
var guessTimeout = time.Duration(1000*time.Millisecond)
var rateLimitedGoog bool
 
var queuedLock = sync.RWMutex{}
var knownLock = sync.RWMutex{}
var knownGooglLock = sync.RWMutex{}
var queuedGooglLock = sync.RWMutex{}
 
var queuedUris = map[string]bool{}
var knownUris = map[string]bool{}
var queuedGoogl = map[string]bool{}
var knownGoogl = map[string]bool{
"goo.gl/ZV63Cr": true,
"goo.gl/1nToQ4": true,
"goo.gl/q7wKp8": true,
"goo.gl/dDZIsO": true,
"goo.gl/R1jVTj": true,
"goo.gl/B92rvj": true,
"goo.gl/Au49Um": true,
"goo.gl/uQvdDQ": true,
"goo.gl/KBXDW9": true,
"goo.gl/m3uwvU": true,
"goo.gl/bhLDq0": true,
"goo.gl/gdEknO": true,
"goo.gl/8AUTRP": true,
"goo.gl/5nz6cg": true,
"goo.gl/T3mzjk": true,
"goo.gl/5FwCJy": true,
"goo.gl/3QGGK0": true,
"goo.gl/K6E7l8": true,
"goo.gl/xZM9jt": true,
"goo.gl/rMFYNX": true,
"goo.gl/F3noUV": true,
"goo.gl/JYSlLQ": true,
"goo.gl/hmgbrW": true,
"goo.gl/0gNnSE": true,
"goo.gl/Tw6kP9": true,
"goo.gl/RB2zhx": true,
"goo.gl/kJPqT3": true,
"goo.gl/qllLsG": true,
"goo.gl/zj9bO0": true,
"goo.gl/bjAPLg": true,
"goo.gl/7tKTyd": true,
"goo.gl/eXeVao": true,
"goo.gl/Dpoyqi": true,
"goo.gl/iztHMd": true,
"goo.gl/HACUad": true,
"goo.gl/oJDnYs": true,
"goo.gl/ia7jLd": true,
"goo.gl/AJRPcI": true,
"goo.gl/bS7lhD": true,
"goo.gl/lyBKa0": true,
"goo.gl/OmM6DN": true,
"goo.gl/puUsyX": true,
"goo.gl/Gc8Dsd": true,
"goo.gl/P3cEUu": true,
"goo.gl/9lT4ZV": true,
"goo.gl/HNA9YG": true,
"goo.gl/xjP4Ke": true,
"goo.gl/ykmzUC": true,
"goo.gl/EOiis9": true,
"goo.gl/wgzU7N": true,
"goo.gl/5ixjt4": true,
"goo.gl/L2N5sY": true,
"goo.gl/SWNo61": true,
"goo.gl/06Hov3": true,
"goo.gl/JOzJ9j": true,
"goo.gl/8JoXn0": true,
"goo.gl/y1y9Lh": true,
"goo.gl/FDTdEx": true,
"goo.gl/LgqaL0": true,
"goo.gl/OQwgds": true,
"goo.gl/hZWUau": true,
"goo.gl/wfkUr7": true,
"goo.gl/sK6kAA": true,
"goo.gl/DpYUwO": true,
"goo.gl/vLppsl": true,
"goo.gl/KVDTVy": true,
"goo.gl/dGrMwY": true,
"goo.gl/YOTeO1": true,
"goo.gl/PltckZ": true,
"goo.gl/sUJ1r7": true,
"goo.gl/okXi4W": true,
"goo.gl/ENWVvN": true,
"goo.gl/BcLOM9": true,
"goo.gl/R9MgF3": true,
"goo.gl/egnhgc": true,
"goo.gl/Jca6KP": true,
"goo.gl/1AiXEk": true,
"goo.gl/iMmkIj": true,
"goo.gl/E1vTJk": true,
"goo.gl/pikTTi": true,
"goo.gl/9KK0k0": true,
"goo.gl/Oof8mG": true,
"goo.gl/X1R54k": true,
"goo.gl/ykH2OP": true,
"goo.gl/cuSgmK": true,
"goo.gl/I6Lpbz": true,
"goo.gl/JFKDGw": true,
"goo.gl/o0YxL2": true,
"goo.gl/cCpQrU": true,
"goo.gl/sWHuKQ": true,
"goo.gl/FhGPJW": true,
"goo.gl/v6jFEL": true,
"goo.gl/hBKyB5": true,
"goo.gl/mH9rXs": true,
"goo.gl/3QXZ9r": true,
"goo.gl/6M31VD": true,
"goo.gl/ZaQQyr": true,
"goo.gl/FbPhW1": true,
"goo.gl/EePxH9": true,
"goo.gl/LLy4UO": true,
"goo.gl/yGQsaV": true,
"goo.gl/MtXamT": true,
"goo.gl/SaKrk1": true,
"goo.gl/Jtc6vr": true,
}
var uriQueue = make(chan *url.URL, 16384)
var googlQueue = make(chan *url.URL, 4096)
 
var googlOutputFile *os.File
var shouldGuess bool
 
func main() {
startURL := flag.String("url", "", "the url to start crawling")
guess := flag.Bool("guess", false, "guess goo.gl links")
flag.Parse()
shouldGuess = *guess
startUri, err := url.Parse(*startURL)
if err != nil || *startURL == "" {
fmt.Printf("Failed to parse starting URL: %s. Response: %s\n", *startURL, err)
return
}
 
googlOutputFile, err = os.OpenFile("./foundLinks.txt", os.O_APPEND | os.O_CREATE | os.O_WRONLY, 0644)
if err != nil {
fmt.Printf("Failed to open foundLinks.txt. Error: %s\n", err)
return
}
defer func() {
if err := googlOutputFile.Close(); err != nil {
fmt.Printf("Failed to close output file: %s\n", err)
}
}()
 
for i := 0; i < 48; i++ {
go crawler()
}
for i := 0; i < 8; i++ {
go googlChecker()
}
for i := 0; i < 1; i++ {
go guesser()
}
//fakeGoogl, _ := url.Parse("http://goo.gl/ZV63Cr")
//googlQueue <- fakeGoogl
uriQueue <- startUri
waitForSignal()
}
 
func crawler() {
var nextUri *url.URL
for {
<-time.After(500*time.Millisecond)
select {
case nextUri = <-uriQueue:
crawl(nextUri)
}
}
}
 
func googlChecker() {
var nextUri *url.URL
for {
if rateLimitedGoog {
fmt.Println("RATE LIMITED!! Waiting...")
<-time.After(120*time.Second)
}
select {
case nextUri = <-googlQueue:
verifyGoogl(nextUri)
}
}
}
 
func normalizeUri(uri *url.URL) string {
host := uri.Host
//remove www from the host
if len(host) > 4 && host[0:4] == "www." {
host = host[4:]
}
path := uri.Path
if path == "" || path == "/" {
return host
}
pathLen := len(path)
//remove any trailing slashes
if path[pathLen-1] == '/' {
path = path[:pathLen-1]
}
//unfortunately including RawQuery in here to fix youtube links not working anymore
return strings.Join([]string{host, path, uri.RawQuery}, "")
}
 
 
func validateUri(uri *url.URL) (bool, string) {
if uri.Scheme != "http" && uri.Scheme != "https" {
return false, ""
}
normalUri := normalizeUri(uri)
knownLock.RLock()
if _, ok := knownUris[normalUri]; ok {
knownLock.RUnlock()
//already checked this
return false, normalUri
}
knownLock.RUnlock()
if uri.Host == "" {
return false, normalUri
}
if extToIgnore.MatchString(uri.Path) {
return false, normalUri
}
if !onlyGoogleRegex.MatchString(uri.Host) {
return false, normalUri
}
return true, normalUri
}
 
//from http://stackoverflow.com/questions/16895294/how-to-set-timeout-for-http-get-requests-in-golang
func TimeoutDialer(cTimeout time.Duration, rwTimeout time.Duration) func(net, addr string) (c net.Conn, err error) {
return func(netw, addr string) (net.Conn, error) {
conn, err := net.DialTimeout(netw, addr, cTimeout)
if err != nil {
return nil, err
}
conn.SetDeadline(time.Now().Add(rwTimeout))
return conn, nil
}
}
func NewTimeoutClient(connectTimeout time.Duration, readWriteTimeout time.Duration, baseNormalUri string) *http.Client {
return &http.Client{
Transport: &http.Transport{
Dial: TimeoutDialer(connectTimeout, readWriteTimeout),
},
CheckRedirect: func(req *http.Request, via []*http.Request) error {
valid, newNormalUri := validateUri(req.URL)
//allow redirects to the *same* url since we might already have this url in knownUris because we're processing it right now
if !valid && baseNormalUri != newNormalUri {
return errors.New("Redirect:Invalid")
}
return nil
},
}
}
 
func crawl(uri *url.URL) {
valid, normalUri := validateUri(uri)
if !valid || uri == nil {
queuedLock.Lock()
delete(queuedUris, normalUri)
queuedLock.Unlock()
return
}
knownLock.Lock()
knownUris[normalUri] = true
knownLock.Unlock()
 
queuedLock.Lock()
delete(queuedUris, normalUri)
queuedLock.Unlock()
 
findYoutubeAnnotations(uri)
 
urlStr := uri.String()
fmt.Printf("Fetching: %s\n", urlStr)
client := NewTimeoutClient(defaultConnectTimeout, defaultRWTimeout, normalUri)
resp, err := client.Get(urlStr)
if err != nil {
var errMsg string
urlErr, ok := err.(*url.Error)
if !ok {
errMsg = err.Error()
} else {
errMsg = urlErr.Err.Error()
}
 
if errMsg != "Redirect:Invalid" {
fmt.Printf("Failed to Get URL: %s. Response: %s\n", urlStr, errMsg)
}
knownLock.Lock()
delete(knownUris, normalUri)
knownLock.Unlock()
return
}
htmlBytes, err := ioutil.ReadAll(resp.Body)
if err != nil {
fmt.Printf("Failed to read from server to get full HTML for %s. Error: %s\n", urlStr, err)
knownLock.Lock()
delete(knownUris, normalUri)
knownLock.Unlock()
return
}
resp.Body.Close()
htmlStr := string(htmlBytes)
locateAndSaveGoogl(htmlStr)
 
doc, err := html.Parse(strings.NewReader(htmlStr))
if err != nil {
fmt.Printf("Failed to parse HTML for %s. Error: %s\n", urlStr, err)
return
}
findAnchors(doc, uri)
}
 
func findAnchors(node *html.Node, baseUri *url.URL) {
if node.Type == html.ElementNode && (node.Data == "a" || node.Data == "url") {
for _, attr := range node.Attr {
if (node.Data == "a" && attr.Key == "href") || (node.Data == "url" && attr.Key == "value") {
newUri, err := url.Parse(attr.Val)
if err != nil || newUri == nil {
fmt.Printf("Invalid url detected: %s. Error %s\n", attr.Val, err)
break
}
if !newUri.IsAbs() {
newUri = baseUri.ResolveReference(newUri)
}
valid, newNorm := validateUri(newUri)
if valid {
//fmt.Printf("Found new URL! %s\n", newUri.String())
queuedLock.Lock()
if _, ok := queuedUris[newNorm]; !ok {
queuedUris[newNorm] = true
queuedLock.Unlock()
 
select {
case uriQueue <- newUri:
case <-time.After(time.Second * 1):
//fmt.Println("Queue is full!!")
return
}
} else {
queuedLock.Unlock()
}
}
break
}
}
} else if node.Type == html.ElementNode && node.Data == "head" {
//skip looping through head
node = node.NextSibling
if node == nil {
return
}
}
for nextNode := node.FirstChild; nextNode != nil; nextNode = nextNode.NextSibling {
findAnchors(nextNode, baseUri)
}
}
 
func findYoutubeAnnotations(uri *url.URL) {
if (uri.Host == "youtube.com" || uri.Host == "www.youtube.com") {
if len(uri.Path) < 6 || uri.Path[0:6] != "/watch" {
return
}
queryVals := uri.Query();
if queryVals == nil {
return
}
videoIDs, ok := queryVals["v"]
if !ok || len(videoIDs) < 1 {
videoIDsCommas, ok := queryVals["video_ids"]
if !ok || len(videoIDsCommas) < 1 {
return
}
videoIDs = strings.Split(videoIDsCommas[0], ",")
}
for _, videoID := range videoIDs {
addYoutubeAnnotations(videoID)
}
} else if (uri.Host == "youtu.be") {
addYoutubeAnnotations(uri.Path);
} else {
return
}
}
 
//thanks https://plus.google.com/+MarcLesterTan
func addYoutubeAnnotations(videoID string) {
if len(videoID) < 5 {
return
}
newUri, err := url.Parse("https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=" + videoID)
if err != nil || newUri == nil {
fmt.Printf("Invalid youtube annotations url generated. Error %s\n", err)
return
}
valid, newNorm := validateUri(newUri)
if valid {
//fmt.Printf("Added new youtube annotations URL! %s\n", newUri.String())
queuedLock.Lock()
if _, ok := queuedUris[newNorm]; !ok {
queuedUris[newNorm] = true
queuedLock.Unlock()
 
select {
case uriQueue <- newUri:
case <-time.After(time.Second * 1):
//fmt.Println("Queue is full!!")
return
}
} else {
queuedLock.Unlock()
}
}
}
 
func ReverseString(str string) string {
i := len(str)
final := make([]rune, i)
for _, r := range str {
i--
final[i] = r
}
return string(final)
}
 
func locateAndSaveGoogl(htmlStr string) {
matches := googlRegex.FindAllString(htmlStr, -1)
if matches == nil {
return
}
for _, match := range matches {
//detect backwards goo.gl links
if len(match) > 6 && match[len(match)-6:] == "lg.oog" {
match = ReverseString(match)
}
//fmt.Printf("Found goo.gl link!: %s\n", match)
//prevent backwards slashes from breaking this
uri, err := url.Parse("http://" + strings.Replace(match, `\`, "/", -1))
if err != nil {
fmt.Printf("Failed to parse goo.gl link: %s. Error: %s\n", match, err)
continue
}
normalUri := normalizeUri(uri)
knownGooglLock.RLock()
if _, ok := knownGoogl[normalUri]; ok {
knownGooglLock.RUnlock()
continue
}
knownGooglLock.RUnlock()
 
queuedGooglLock.Lock()
if _, ok := queuedGoogl[normalUri]; ok {
queuedGooglLock.Unlock()
continue
}
queuedGoogl[normalUri] = true
queuedGooglLock.Unlock()
 
googlQueue <- uri
}
}
 
var validGooglChars = "avE1k4yiQhAzl9B6DpIRdCqnF8tbSr7J2NKGT3VmoMPuc5UHeLjOZWxfwXgsY0"
 
func guesser() { //s float64
if !shouldGuess {
return
}
 
var i int
n := make([]uint8, 6)
for {
if rateLimitedGoog {
<-time.After(180*time.Second)
continue
}
 
i = 0
for i < 6 {
n[i] = validGooglChars[int(rand.Float64() * 62)]
i++
}
normal := "goo.gl/" + string(n)
knownGooglLock.RLock()
if _, ok := knownGoogl[normal]; ok {
knownGooglLock.RUnlock()
continue
}
knownGooglLock.RUnlock()
 
queuedGooglLock.Lock()
if _, ok := queuedGoogl[normal]; ok {
queuedGooglLock.Unlock()
continue
}
queuedGoogl[normal] = true
queuedGooglLock.Unlock()
 
uri, err := url.Parse("http://" + normal)
if err != nil {
fmt.Printf("Failed to generate valid goo.gl link: %s. Error: %s\n", normal, err)
continue
}
if guessTimeout > 0 {
<-time.After(guessTimeout)
}
googlQueue <- uri
}
}
 
var googlClient = &http.Client{
CheckRedirect: func(req *http.Request, via []*http.Request) error {
// /events/io/2014/redeem
//yes... this is ghetto how it works
if req.URL.Host == "developers.google.com" && len(req.URL.Path) > 22 && req.URL.Path[0:22] == "/events/io/2014/redeem" {
return errors.New("Redirect:Valid")
}
return errors.New("Redirect:Invalid")
},
}
 
func verifyGoogl(uri *url.URL) {
normalUri := normalizeUri(uri)
 
queuedGooglLock.Lock()
delete(queuedGoogl, normalUri)
queuedGooglLock.Unlock()
 
knownGooglLock.Lock()
if _, ok := knownGoogl[normalUri]; ok {
knownGooglLock.Unlock()
return
}
knownGoogl[normalUri] = true
knownGooglLock.Unlock()
 
urlStr := uri.String()
fmt.Println("Fetching goo.gl link: " + urlStr)
resp, err := googlClient.Head(urlStr)
if err != nil {
var errMsg string
urlErr, ok := err.(*url.Error)
if !ok {
errMsg = err.Error()
} else {
errMsg = urlErr.Err.Error()
}
 
if errMsg == "Redirect:Valid" {
rateLimitedGoog = false
fmt.Printf("Found a valid goo.gl redirect!! %s\n", urlStr)
 
if _, err := googlOutputFile.WriteString(urlStr + "\n"); err != nil {
fmt.Printf("ERROR! Failed to write goo.gl redirect to output file! Error: %s/n", err)
}
} else if errMsg == "Redirect:Invalid" {
rateLimitedGoog = false
//do nothing
} else {
fmt.Printf("Failed to Head URL: %s. Response: %s\n", urlStr, errMsg)
}
}
if resp.StatusCode == 403 {
rateLimitedGoog = true
} else {
rateLimitedGoog = false
}
}
 
func waitForSignal() {
c := make(chan os.Signal, 1)
signal.Notify(c, os.Interrupt)
<-c
fmt.Println("Received SIGINT!")
}

Open in new window


So when i enter the command at my dos command prompt:  go build
I get this message.

error message on build
Anyone know why I'm getting this message or what am i doing wrong to run this program?
0
Comment
Question by:maqskywalker
1 Comment
 
LVL 14

Accepted Solution

by:
brendanmeyer earned 500 total points
ID: 40011809
have you run the "go get code.google.com/p/go.net/html" command to get the go.net/html package?
0

Featured Post

How to improve team productivity

Quip adds documents, spreadsheets, and tasklists to your Slack experience
- Elevate ideas to Quip docs
- Share Quip docs in Slack
- Get notified of changes to your docs
- Available on iOS/Android/Desktop/Web
- Online/Offline

Join & Write a Comment

Suggested Solutions

Title # Comments Views Activity
Specific format 21 142
word0 challenge 3 58
recursion example 16 70
Currency Conversion? 1 37
If you haven’t already, I encourage you to read the first article (http://www.experts-exchange.com/articles/18680/An-Introduction-to-R-Programming-and-R-Studio.html) in my series to gain a basic foundation of R and R Studio.  You will also find the …
Does the idea of dealing with bits scare or confuse you? Does it seem like a waste of time in an age where we all have terabytes of storage? If so, you're missing out on one of the core tools in every professional programmer's toolbox. Learn how to …
This theoretical tutorial explains exceptions, reasons for exceptions, different categories of exception and exception hierarchy.
The viewer will learn how to use the return statement in functions in C++. The video will also teach the user how to pass data to a function and have the function return data back for further processing.

760 members asked questions and received personalized solutions in the past 7 days.

Join the community of 500,000 technology professionals and ask your questions.

Join & Ask a Question

Need Help in Real-Time?

Connect with top rated Experts

18 Experts available now in Live!

Get 1:1 Help Now